/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin.           */
/* All rights reserved.                                              */
/*                                                                   */
/* Redistribution and use in source and binary forms, with or        */
/* without modification, are permitted provided that the following   */
/* conditions are met:                                               */
/*                                                                   */
/*   1. Redistributions of source code must retain the above         */
/*      copyright notice, this list of conditions and the following  */
/*      disclaimer.                                                  */
/*                                                                   */
/*   2. Redistributions in binary form must reproduce the above      */
/*      copyright notice, this list of conditions and the following  */
/*      disclaimer in the documentation and/or other materials       */
/*      provided with the distribution.                              */
/*                                                                   */
/*    THIS  SOFTWARE IS PROVIDED  BY THE  UNIVERSITY OF  TEXAS AT    */
/*    AUSTIN  ``AS IS''  AND ANY  EXPRESS OR  IMPLIED WARRANTIES,    */
/*    INCLUDING, BUT  NOT LIMITED  TO, THE IMPLIED  WARRANTIES OF    */
/*    MERCHANTABILITY  AND FITNESS FOR  A PARTICULAR  PURPOSE ARE    */
/*    DISCLAIMED.  IN  NO EVENT SHALL THE UNIVERSITY  OF TEXAS AT    */
/*    AUSTIN OR CONTRIBUTORS BE  LIABLE FOR ANY DIRECT, INDIRECT,    */
/*    INCIDENTAL,  SPECIAL, EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES    */
/*    (INCLUDING, BUT  NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE    */
/*    GOODS  OR  SERVICES; LOSS  OF  USE,  DATA,  OR PROFI_BANIAS;  OR    */
/*    BUSINESS INTERRUPTION) HOWEVER CAUSED  AND ON ANY THEORY OF    */
/*    LIABILITY, WHETHER  IN CONTRACT, STRICT  LIABILITY, OR TORT    */
/*    (INCLUDING NEGLIGENCE OR OTHERWISE)  ARISING IN ANY WAY OUT    */
/*    OF  THE  USE OF  THIS  SOFTWARE,  EVEN  IF ADVISED  OF  THE    */
/*    POSSIBILITY OF SUCH DAMAGE.                                    */
/*                                                                   */
/* The views and conclusions contained in the software and           */
/* documentation are those of the authors and should not be          */
/* interpreted as representing official policies, either expressed   */
/* or implied, of The University of Texas at Austin.                 */
/*********************************************************************/

#include <stdio.h>
#include <string.h>
#include "common.h"

#ifdef BUILD_KERNEL
#include "kernel_BANIAS.h"
#endif

#undef DEBUG

static void init_parameter(void);

gotoblas_t TABLE_NAME = {
  DTB_DEFAULT_ENTRIES ,

  GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,

  0, 0, 0,
  SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
#ifdef SGEMM_DEFAULT_UNROLL_MN
 SGEMM_DEFAULT_UNROLL_MN,
#else
 MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
#endif


#ifdef HAVE_EXCLUSIVE_CACHE
  1,
#else
  0,
#endif

  samax_k_BANIAS,  samin_k_BANIAS,  smax_k_BANIAS,  smin_k_BANIAS,
  isamax_k_BANIAS, isamin_k_BANIAS, ismax_k_BANIAS, ismin_k_BANIAS,
  snrm2_k_BANIAS,  sasum_k_BANIAS,  scopy_k_BANIAS, sdot_k_BANIAS,
  dsdot_k_BANIAS,
  srot_k_BANIAS,   saxpy_k_BANIAS,  sscal_k_BANIAS, sswap_k_BANIAS,
  sgemv_n_BANIAS,  sgemv_t_BANIAS, sger_k_BANIAS,
  ssymv_L_BANIAS, ssymv_U_BANIAS,

  sgemm_kernel_BANIAS, sgemm_beta_BANIAS,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  sgemm_incopy_BANIAS, sgemm_itcopy_BANIAS,
#else
  sgemm_oncopy_BANIAS, sgemm_otcopy_BANIAS,
#endif
  sgemm_oncopy_BANIAS, sgemm_otcopy_BANIAS,
  strsm_kernel_LN_BANIAS, strsm_kernel_LT_BANIAS, strsm_kernel_RN_BANIAS, strsm_kernel_RT_BANIAS,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  strsm_iunucopy_BANIAS, strsm_iunncopy_BANIAS, strsm_iutucopy_BANIAS, strsm_iutncopy_BANIAS,
  strsm_ilnucopy_BANIAS, strsm_ilnncopy_BANIAS, strsm_iltucopy_BANIAS, strsm_iltncopy_BANIAS,
#else
  strsm_ounucopy_BANIAS, strsm_ounncopy_BANIAS, strsm_outucopy_BANIAS, strsm_outncopy_BANIAS,
  strsm_olnucopy_BANIAS, strsm_olnncopy_BANIAS, strsm_oltucopy_BANIAS, strsm_oltncopy_BANIAS,
#endif
  strsm_ounucopy_BANIAS, strsm_ounncopy_BANIAS, strsm_outucopy_BANIAS, strsm_outncopy_BANIAS,
  strsm_olnucopy_BANIAS, strsm_olnncopy_BANIAS, strsm_oltucopy_BANIAS, strsm_oltncopy_BANIAS,
  strmm_kernel_RN_BANIAS, strmm_kernel_RT_BANIAS, strmm_kernel_LN_BANIAS, strmm_kernel_LT_BANIAS,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  strmm_iunucopy_BANIAS, strmm_iunncopy_BANIAS, strmm_iutucopy_BANIAS, strmm_iutncopy_BANIAS,
  strmm_ilnucopy_BANIAS, strmm_ilnncopy_BANIAS, strmm_iltucopy_BANIAS, strmm_iltncopy_BANIAS,
#else
  strmm_ounucopy_BANIAS, strmm_ounncopy_BANIAS, strmm_outucopy_BANIAS, strmm_outncopy_BANIAS,
  strmm_olnucopy_BANIAS, strmm_olnncopy_BANIAS, strmm_oltucopy_BANIAS, strmm_oltncopy_BANIAS,
#endif
  strmm_ounucopy_BANIAS, strmm_ounncopy_BANIAS, strmm_outucopy_BANIAS, strmm_outncopy_BANIAS,
  strmm_olnucopy_BANIAS, strmm_olnncopy_BANIAS, strmm_oltucopy_BANIAS, strmm_oltncopy_BANIAS,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
  ssymm_iutcopy_BANIAS, ssymm_iltcopy_BANIAS,
#else
  ssymm_outcopy_BANIAS, ssymm_oltcopy_BANIAS,
#endif
  ssymm_outcopy_BANIAS, ssymm_oltcopy_BANIAS,

#ifndef NO_LAPACK
  sneg_tcopy_BANIAS, slaswp_ncopy_BANIAS,
#else
  NULL,NULL,
#endif

  0, 0, 0,
  DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
#ifdef DGEMM_DEFAULT_UNROLL_MN
 DGEMM_DEFAULT_UNROLL_MN,
#else
 MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
#endif

  damax_k_BANIAS,  damin_k_BANIAS,  dmax_k_BANIAS,  dmin_k_BANIAS,
  idamax_k_BANIAS, idamin_k_BANIAS, idmax_k_BANIAS, idmin_k_BANIAS,
  dnrm2_k_BANIAS,  dasum_k_BANIAS,  dcopy_k_BANIAS, ddot_k_BANIAS,
  drot_k_BANIAS,   daxpy_k_BANIAS,  dscal_k_BANIAS, dswap_k_BANIAS,
  dgemv_n_BANIAS,  dgemv_t_BANIAS,  dger_k_BANIAS,
  dsymv_L_BANIAS,  dsymv_U_BANIAS,

  dgemm_kernel_BANIAS, dgemm_beta_BANIAS,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  dgemm_incopy_BANIAS, dgemm_itcopy_BANIAS,
#else
  dgemm_oncopy_BANIAS, dgemm_otcopy_BANIAS,
#endif
  dgemm_oncopy_BANIAS, dgemm_otcopy_BANIAS,
  dtrsm_kernel_LN_BANIAS, dtrsm_kernel_LT_BANIAS, dtrsm_kernel_RN_BANIAS, dtrsm_kernel_RT_BANIAS,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  dtrsm_iunucopy_BANIAS, dtrsm_iunncopy_BANIAS, dtrsm_iutucopy_BANIAS, dtrsm_iutncopy_BANIAS,
  dtrsm_ilnucopy_BANIAS, dtrsm_ilnncopy_BANIAS, dtrsm_iltucopy_BANIAS, dtrsm_iltncopy_BANIAS,
#else
  dtrsm_ounucopy_BANIAS, dtrsm_ounncopy_BANIAS, dtrsm_outucopy_BANIAS, dtrsm_outncopy_BANIAS,
  dtrsm_olnucopy_BANIAS, dtrsm_olnncopy_BANIAS, dtrsm_oltucopy_BANIAS, dtrsm_oltncopy_BANIAS,
#endif
  dtrsm_ounucopy_BANIAS, dtrsm_ounncopy_BANIAS, dtrsm_outucopy_BANIAS, dtrsm_outncopy_BANIAS,
  dtrsm_olnucopy_BANIAS, dtrsm_olnncopy_BANIAS, dtrsm_oltucopy_BANIAS, dtrsm_oltncopy_BANIAS,
  dtrmm_kernel_RN_BANIAS, dtrmm_kernel_RT_BANIAS, dtrmm_kernel_LN_BANIAS, dtrmm_kernel_LT_BANIAS,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  dtrmm_iunucopy_BANIAS, dtrmm_iunncopy_BANIAS, dtrmm_iutucopy_BANIAS, dtrmm_iutncopy_BANIAS,
  dtrmm_ilnucopy_BANIAS, dtrmm_ilnncopy_BANIAS, dtrmm_iltucopy_BANIAS, dtrmm_iltncopy_BANIAS,
#else
  dtrmm_ounucopy_BANIAS, dtrmm_ounncopy_BANIAS, dtrmm_outucopy_BANIAS, dtrmm_outncopy_BANIAS,
  dtrmm_olnucopy_BANIAS, dtrmm_olnncopy_BANIAS, dtrmm_oltucopy_BANIAS, dtrmm_oltncopy_BANIAS,
#endif
  dtrmm_ounucopy_BANIAS, dtrmm_ounncopy_BANIAS, dtrmm_outucopy_BANIAS, dtrmm_outncopy_BANIAS,
  dtrmm_olnucopy_BANIAS, dtrmm_olnncopy_BANIAS, dtrmm_oltucopy_BANIAS, dtrmm_oltncopy_BANIAS,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
  dsymm_iutcopy_BANIAS, dsymm_iltcopy_BANIAS,
#else
  dsymm_outcopy_BANIAS, dsymm_oltcopy_BANIAS,
#endif
  dsymm_outcopy_BANIAS, dsymm_oltcopy_BANIAS,

#ifndef NO_LAPACK
  dneg_tcopy_BANIAS, dlaswp_ncopy_BANIAS,
#else
  NULL, NULL,
#endif

#ifdef EXPRECISION

  0, 0, 0,
  QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),

  qamax_k_BANIAS,  qamin_k_BANIAS,  qmax_k_BANIAS,  qmin_k_BANIAS,
  iqamax_k_BANIAS, iqamin_k_BANIAS, iqmax_k_BANIAS, iqmin_k_BANIAS,
  qnrm2_k_BANIAS,  qasum_k_BANIAS,  qcopy_k_BANIAS, qdot_k_BANIAS,
  qrot_k_BANIAS,   qaxpy_k_BANIAS,  qscal_k_BANIAS, qswap_k_BANIAS,
  qgemv_n_BANIAS,  qgemv_t_BANIAS,  qger_k_BANIAS,
  qsymv_L_BANIAS,  qsymv_U_BANIAS,

  qgemm_kernel_BANIAS, qgemm_beta_BANIAS,
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  qgemm_incopy_BANIAS, qgemm_itcopy_BANIAS,
#else
  qgemm_oncopy_BANIAS, qgemm_otcopy_BANIAS,
#endif
  qgemm_oncopy_BANIAS, qgemm_otcopy_BANIAS,
  qtrsm_kernel_LN_BANIAS, qtrsm_kernel_LT_BANIAS, qtrsm_kernel_RN_BANIAS, qtrsm_kernel_RT_BANIAS,
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  qtrsm_iunucopy_BANIAS, qtrsm_iunncopy_BANIAS, qtrsm_iutucopy_BANIAS, qtrsm_iutncopy_BANIAS,
  qtrsm_ilnucopy_BANIAS, qtrsm_ilnncopy_BANIAS, qtrsm_iltucopy_BANIAS, qtrsm_iltncopy_BANIAS,
#else
  qtrsm_ounucopy_BANIAS, qtrsm_ounncopy_BANIAS, qtrsm_outucopy_BANIAS, qtrsm_outncopy_BANIAS,
  qtrsm_olnucopy_BANIAS, qtrsm_olnncopy_BANIAS, qtrsm_oltucopy_BANIAS, qtrsm_oltncopy_BANIAS,
#endif
  qtrsm_ounucopy_BANIAS, qtrsm_ounncopy_BANIAS, qtrsm_outucopy_BANIAS, qtrsm_outncopy_BANIAS,
  qtrsm_olnucopy_BANIAS, qtrsm_olnncopy_BANIAS, qtrsm_oltucopy_BANIAS, qtrsm_oltncopy_BANIAS,
  qtrmm_kernel_RN_BANIAS, qtrmm_kernel_RT_BANIAS, qtrmm_kernel_LN_BANIAS, qtrmm_kernel_LT_BANIAS,
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  qtrmm_iunucopy_BANIAS, qtrmm_iunncopy_BANIAS, qtrmm_iutucopy_BANIAS, qtrmm_iutncopy_BANIAS,
  qtrmm_ilnucopy_BANIAS, qtrmm_ilnncopy_BANIAS, qtrmm_iltucopy_BANIAS, qtrmm_iltncopy_BANIAS,
#else
  qtrmm_ounucopy_BANIAS, qtrmm_ounncopy_BANIAS, qtrmm_outucopy_BANIAS, qtrmm_outncopy_BANIAS,
  qtrmm_olnucopy_BANIAS, qtrmm_olnncopy_BANIAS, qtrmm_oltucopy_BANIAS, qtrmm_oltncopy_BANIAS,
#endif
  qtrmm_ounucopy_BANIAS, qtrmm_ounncopy_BANIAS, qtrmm_outucopy_BANIAS, qtrmm_outncopy_BANIAS,
  qtrmm_olnucopy_BANIAS, qtrmm_olnncopy_BANIAS, qtrmm_oltucopy_BANIAS, qtrmm_oltncopy_BANIAS,
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N
  qsymm_iutcopy_BANIAS, qsymm_iltcopy_BANIAS,
#else
  qsymm_outcopy_BANIAS, qsymm_oltcopy_BANIAS,
#endif
  qsymm_outcopy_BANIAS, qsymm_oltcopy_BANIAS,

#ifndef NO_LAPACK
  qneg_tcopy_BANIAS, qlaswp_ncopy_BANIAS,
#else
  NULL, NULL,
#endif

#endif

  0, 0, 0,
  CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
#ifdef CGEMM_DEFAULT_UNROLL_MN
 CGEMM_DEFAULT_UNROLL_MN,
#else
 MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
#endif

  camax_k_BANIAS, camin_k_BANIAS, icamax_k_BANIAS, icamin_k_BANIAS,
  cnrm2_k_BANIAS, casum_k_BANIAS, ccopy_k_BANIAS,
  cdotu_k_BANIAS, cdotc_k_BANIAS, csrot_k_BANIAS,
  caxpy_k_BANIAS, caxpyc_k_BANIAS, cscal_k_BANIAS, cswap_k_BANIAS,

  cgemv_n_BANIAS, cgemv_t_BANIAS, cgemv_r_BANIAS, cgemv_c_BANIAS,
  cgemv_o_BANIAS, cgemv_u_BANIAS, cgemv_s_BANIAS, cgemv_d_BANIAS,
  cgeru_k_BANIAS, cgerc_k_BANIAS, cgerv_k_BANIAS, cgerd_k_BANIAS,
  csymv_L_BANIAS, csymv_U_BANIAS,
  chemv_L_BANIAS, chemv_U_BANIAS, chemv_M_BANIAS, chemv_V_BANIAS,

  cgemm_kernel_n_BANIAS, cgemm_kernel_l_BANIAS, cgemm_kernel_r_BANIAS, cgemm_kernel_b_BANIAS,
  cgemm_beta_BANIAS,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  cgemm_incopy_BANIAS, cgemm_itcopy_BANIAS,
#else
  cgemm_oncopy_BANIAS, cgemm_otcopy_BANIAS,
#endif
  cgemm_oncopy_BANIAS, cgemm_otcopy_BANIAS,

  ctrsm_kernel_LN_BANIAS, ctrsm_kernel_LT_BANIAS, ctrsm_kernel_LR_BANIAS, ctrsm_kernel_LC_BANIAS,
  ctrsm_kernel_RN_BANIAS, ctrsm_kernel_RT_BANIAS, ctrsm_kernel_RR_BANIAS, ctrsm_kernel_RC_BANIAS,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  ctrsm_iunucopy_BANIAS,  ctrsm_iunncopy_BANIAS,  ctrsm_iutucopy_BANIAS,  ctrsm_iutncopy_BANIAS,
  ctrsm_ilnucopy_BANIAS,  ctrsm_ilnncopy_BANIAS,  ctrsm_iltucopy_BANIAS,  ctrsm_iltncopy_BANIAS,
#else
  ctrsm_ounucopy_BANIAS,  ctrsm_ounncopy_BANIAS,  ctrsm_outucopy_BANIAS,  ctrsm_outncopy_BANIAS,
  ctrsm_olnucopy_BANIAS,  ctrsm_olnncopy_BANIAS,  ctrsm_oltucopy_BANIAS,  ctrsm_oltncopy_BANIAS,
#endif
  ctrsm_ounucopy_BANIAS,  ctrsm_ounncopy_BANIAS,  ctrsm_outucopy_BANIAS,  ctrsm_outncopy_BANIAS,
  ctrsm_olnucopy_BANIAS,  ctrsm_olnncopy_BANIAS,  ctrsm_oltucopy_BANIAS,  ctrsm_oltncopy_BANIAS,

  ctrmm_kernel_RN_BANIAS,  ctrmm_kernel_RT_BANIAS,  ctrmm_kernel_RR_BANIAS,  ctrmm_kernel_RC_BANIAS,
  ctrmm_kernel_LN_BANIAS,  ctrmm_kernel_LT_BANIAS,  ctrmm_kernel_LR_BANIAS,  ctrmm_kernel_LC_BANIAS,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  ctrmm_iunucopy_BANIAS,  ctrmm_iunncopy_BANIAS,  ctrmm_iutucopy_BANIAS,  ctrmm_iutncopy_BANIAS,
  ctrmm_ilnucopy_BANIAS,  ctrmm_ilnncopy_BANIAS,  ctrmm_iltucopy_BANIAS,  ctrmm_iltncopy_BANIAS,
#else
  ctrmm_ounucopy_BANIAS,  ctrmm_ounncopy_BANIAS,  ctrmm_outucopy_BANIAS,  ctrmm_outncopy_BANIAS,
  ctrmm_olnucopy_BANIAS,  ctrmm_olnncopy_BANIAS,  ctrmm_oltucopy_BANIAS,  ctrmm_oltncopy_BANIAS,
#endif
  ctrmm_ounucopy_BANIAS,  ctrmm_ounncopy_BANIAS,  ctrmm_outucopy_BANIAS,  ctrmm_outncopy_BANIAS,
  ctrmm_olnucopy_BANIAS,  ctrmm_olnncopy_BANIAS,  ctrmm_oltucopy_BANIAS,  ctrmm_oltncopy_BANIAS,

#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  csymm_iutcopy_BANIAS,  csymm_iltcopy_BANIAS,
#else
  csymm_outcopy_BANIAS,  csymm_oltcopy_BANIAS,
#endif
  csymm_outcopy_BANIAS,  csymm_oltcopy_BANIAS,
#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
  chemm_iutcopy_BANIAS,  chemm_iltcopy_BANIAS,
#else
  chemm_outcopy_BANIAS,  chemm_oltcopy_BANIAS,
#endif
  chemm_outcopy_BANIAS,  chemm_oltcopy_BANIAS,

  0, 0, 0,
#ifdef CGEMM3M_DEFAULT_UNROLL_M
  CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
#else
  SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
#endif


  cgemm3m_kernel_BANIAS,

  cgemm3m_incopyb_BANIAS,  cgemm3m_incopyr_BANIAS,
  cgemm3m_incopyi_BANIAS,  cgemm3m_itcopyb_BANIAS,
  cgemm3m_itcopyr_BANIAS,  cgemm3m_itcopyi_BANIAS,
  cgemm3m_oncopyb_BANIAS,  cgemm3m_oncopyr_BANIAS,
  cgemm3m_oncopyi_BANIAS,  cgemm3m_otcopyb_BANIAS,
  cgemm3m_otcopyr_BANIAS,  cgemm3m_otcopyi_BANIAS,

  csymm3m_iucopyb_BANIAS,  csymm3m_ilcopyb_BANIAS,
  csymm3m_iucopyr_BANIAS,  csymm3m_ilcopyr_BANIAS,
  csymm3m_iucopyi_BANIAS,  csymm3m_ilcopyi_BANIAS,
  csymm3m_oucopyb_BANIAS,  csymm3m_olcopyb_BANIAS,
  csymm3m_oucopyr_BANIAS,  csymm3m_olcopyr_BANIAS,
  csymm3m_oucopyi_BANIAS,  csymm3m_olcopyi_BANIAS,

  chemm3m_iucopyb_BANIAS,  chemm3m_ilcopyb_BANIAS,
  chemm3m_iucopyr_BANIAS,  chemm3m_ilcopyr_BANIAS,
  chemm3m_iucopyi_BANIAS,  chemm3m_ilcopyi_BANIAS,

  chemm3m_oucopyb_BANIAS,  chemm3m_olcopyb_BANIAS,
  chemm3m_oucopyr_BANIAS,  chemm3m_olcopyr_BANIAS,
  chemm3m_oucopyi_BANIAS,  chemm3m_olcopyi_BANIAS,

#ifndef NO_LAPACK
  cneg_tcopy_BANIAS, claswp_ncopy_BANIAS,
#else
  NULL, NULL,
#endif

  0, 0, 0,
  ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
#ifdef ZGEMM_DEFAULT_UNROLL_MN
 ZGEMM_DEFAULT_UNROLL_MN,
#else
 MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N),
#endif

  zamax_k_BANIAS, zamin_k_BANIAS, izamax_k_BANIAS, izamin_k_BANIAS,
  znrm2_k_BANIAS, zasum_k_BANIAS, zcopy_k_BANIAS,
  zdotu_k_BANIAS, zdotc_k_BANIAS, zdrot_k_BANIAS,
  zaxpy_k_BANIAS, zaxpyc_k_BANIAS, zscal_k_BANIAS, zswap_k_BANIAS,

  zgemv_n_BANIAS, zgemv_t_BANIAS, zgemv_r_BANIAS, zgemv_c_BANIAS,
  zgemv_o_BANIAS, zgemv_u_BANIAS, zgemv_s_BANIAS, zgemv_d_BANIAS,
  zgeru_k_BANIAS, zgerc_k_BANIAS, zgerv_k_BANIAS, zgerd_k_BANIAS,
  zsymv_L_BANIAS, zsymv_U_BANIAS,
  zhemv_L_BANIAS, zhemv_U_BANIAS, zhemv_M_BANIAS, zhemv_V_BANIAS,

  zgemm_kernel_n_BANIAS, zgemm_kernel_l_BANIAS, zgemm_kernel_r_BANIAS, zgemm_kernel_b_BANIAS,
  zgemm_beta_BANIAS,

#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  zgemm_incopy_BANIAS, zgemm_itcopy_BANIAS,
#else
  zgemm_oncopy_BANIAS, zgemm_otcopy_BANIAS,
#endif
  zgemm_oncopy_BANIAS, zgemm_otcopy_BANIAS,

  ztrsm_kernel_LN_BANIAS, ztrsm_kernel_LT_BANIAS, ztrsm_kernel_LR_BANIAS, ztrsm_kernel_LC_BANIAS,
  ztrsm_kernel_RN_BANIAS, ztrsm_kernel_RT_BANIAS, ztrsm_kernel_RR_BANIAS, ztrsm_kernel_RC_BANIAS,

#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  ztrsm_iunucopy_BANIAS,  ztrsm_iunncopy_BANIAS,  ztrsm_iutucopy_BANIAS,  ztrsm_iutncopy_BANIAS,
  ztrsm_ilnucopy_BANIAS,  ztrsm_ilnncopy_BANIAS,  ztrsm_iltucopy_BANIAS,  ztrsm_iltncopy_BANIAS,
#else
  ztrsm_ounucopy_BANIAS,  ztrsm_ounncopy_BANIAS,  ztrsm_outucopy_BANIAS,  ztrsm_outncopy_BANIAS,
  ztrsm_olnucopy_BANIAS,  ztrsm_olnncopy_BANIAS,  ztrsm_oltucopy_BANIAS,  ztrsm_oltncopy_BANIAS,
#endif
  ztrsm_ounucopy_BANIAS,  ztrsm_ounncopy_BANIAS,  ztrsm_outucopy_BANIAS,  ztrsm_outncopy_BANIAS,
  ztrsm_olnucopy_BANIAS,  ztrsm_olnncopy_BANIAS,  ztrsm_oltucopy_BANIAS,  ztrsm_oltncopy_BANIAS,

  ztrmm_kernel_RN_BANIAS,  ztrmm_kernel_RT_BANIAS,  ztrmm_kernel_RR_BANIAS,  ztrmm_kernel_RC_BANIAS,
  ztrmm_kernel_LN_BANIAS,  ztrmm_kernel_LT_BANIAS,  ztrmm_kernel_LR_BANIAS,  ztrmm_kernel_LC_BANIAS,

#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  ztrmm_iunucopy_BANIAS,  ztrmm_iunncopy_BANIAS,  ztrmm_iutucopy_BANIAS,  ztrmm_iutncopy_BANIAS,
  ztrmm_ilnucopy_BANIAS,  ztrmm_ilnncopy_BANIAS,  ztrmm_iltucopy_BANIAS,  ztrmm_iltncopy_BANIAS,
#else
  ztrmm_ounucopy_BANIAS,  ztrmm_ounncopy_BANIAS,  ztrmm_outucopy_BANIAS,  ztrmm_outncopy_BANIAS,
  ztrmm_olnucopy_BANIAS,  ztrmm_olnncopy_BANIAS,  ztrmm_oltucopy_BANIAS,  ztrmm_oltncopy_BANIAS,
#endif
  ztrmm_ounucopy_BANIAS,  ztrmm_ounncopy_BANIAS,  ztrmm_outucopy_BANIAS,  ztrmm_outncopy_BANIAS,
  ztrmm_olnucopy_BANIAS,  ztrmm_olnncopy_BANIAS,  ztrmm_oltucopy_BANIAS,  ztrmm_oltncopy_BANIAS,

#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  zsymm_iutcopy_BANIAS,  zsymm_iltcopy_BANIAS,
#else
  zsymm_outcopy_BANIAS,  zsymm_oltcopy_BANIAS,
#endif
  zsymm_outcopy_BANIAS,  zsymm_oltcopy_BANIAS,
#if ZGEMM_DEFAULT_UNROLL_M != ZGEMM_DEFAULT_UNROLL_N
  zhemm_iutcopy_BANIAS,  zhemm_iltcopy_BANIAS,
#else
  zhemm_outcopy_BANIAS,  zhemm_oltcopy_BANIAS,
#endif
  zhemm_outcopy_BANIAS,  zhemm_oltcopy_BANIAS,

  0, 0, 0,
#ifdef ZGEMM3M_DEFAULT_UNROLL_M
  ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
#else
  DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
#endif


  zgemm3m_kernel_BANIAS,

  zgemm3m_incopyb_BANIAS,  zgemm3m_incopyr_BANIAS,
  zgemm3m_incopyi_BANIAS,  zgemm3m_itcopyb_BANIAS,
  zgemm3m_itcopyr_BANIAS,  zgemm3m_itcopyi_BANIAS,
  zgemm3m_oncopyb_BANIAS,  zgemm3m_oncopyr_BANIAS,
  zgemm3m_oncopyi_BANIAS,  zgemm3m_otcopyb_BANIAS,
  zgemm3m_otcopyr_BANIAS,  zgemm3m_otcopyi_BANIAS,

  zsymm3m_iucopyb_BANIAS,  zsymm3m_ilcopyb_BANIAS,
  zsymm3m_iucopyr_BANIAS,  zsymm3m_ilcopyr_BANIAS,
  zsymm3m_iucopyi_BANIAS,  zsymm3m_ilcopyi_BANIAS,
  zsymm3m_oucopyb_BANIAS,  zsymm3m_olcopyb_BANIAS,
  zsymm3m_oucopyr_BANIAS,  zsymm3m_olcopyr_BANIAS,
  zsymm3m_oucopyi_BANIAS,  zsymm3m_olcopyi_BANIAS,

  zhemm3m_iucopyb_BANIAS,  zhemm3m_ilcopyb_BANIAS,
  zhemm3m_iucopyr_BANIAS,  zhemm3m_ilcopyr_BANIAS,
  zhemm3m_iucopyi_BANIAS,  zhemm3m_ilcopyi_BANIAS,

  zhemm3m_oucopyb_BANIAS,  zhemm3m_olcopyb_BANIAS,
  zhemm3m_oucopyr_BANIAS,  zhemm3m_olcopyr_BANIAS,
  zhemm3m_oucopyi_BANIAS,  zhemm3m_olcopyi_BANIAS,

#ifndef NO_LAPACK
  zneg_tcopy_BANIAS, zlaswp_ncopy_BANIAS,
#else
  NULL, NULL,
#endif

#ifdef EXPRECISION

  0, 0, 0,
  XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N, MAX(XGEMM_DEFAULT_UNROLL_M, XGEMM_DEFAULT_UNROLL_N),

  xamax_k_BANIAS, xamin_k_BANIAS, ixamax_k_BANIAS, ixamin_k_BANIAS,
  xnrm2_k_BANIAS, xasum_k_BANIAS, xcopy_k_BANIAS,
  xdotu_k_BANIAS, xdotc_k_BANIAS, xqrot_k_BANIAS,
  xaxpy_k_BANIAS, xaxpyc_k_BANIAS, xscal_k_BANIAS, xswap_k_BANIAS,

  xgemv_n_BANIAS, xgemv_t_BANIAS, xgemv_r_BANIAS, xgemv_c_BANIAS,
  xgemv_o_BANIAS, xgemv_u_BANIAS, xgemv_s_BANIAS, xgemv_d_BANIAS,
  xgeru_k_BANIAS, xgerc_k_BANIAS, xgerv_k_BANIAS, xgerd_k_BANIAS,
  xsymv_L_BANIAS, xsymv_U_BANIAS,
  xhemv_L_BANIAS, xhemv_U_BANIAS, xhemv_M_BANIAS, xhemv_V_BANIAS,

  xgemm_kernel_n_BANIAS, xgemm_kernel_l_BANIAS, xgemm_kernel_r_BANIAS, xgemm_kernel_b_BANIAS,
  xgemm_beta_BANIAS,

#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xgemm_incopy_BANIAS, xgemm_itcopy_BANIAS,
#else
  xgemm_oncopy_BANIAS, xgemm_otcopy_BANIAS,
#endif
  xgemm_oncopy_BANIAS, xgemm_otcopy_BANIAS,

  xtrsm_kernel_LN_BANIAS, xtrsm_kernel_LT_BANIAS, xtrsm_kernel_LR_BANIAS, xtrsm_kernel_LC_BANIAS,
  xtrsm_kernel_RN_BANIAS, xtrsm_kernel_RT_BANIAS, xtrsm_kernel_RR_BANIAS, xtrsm_kernel_RC_BANIAS,

#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xtrsm_iunucopy_BANIAS,  xtrsm_iunncopy_BANIAS,  xtrsm_iutucopy_BANIAS,  xtrsm_iutncopy_BANIAS,
  xtrsm_ilnucopy_BANIAS,  xtrsm_ilnncopy_BANIAS,  xtrsm_iltucopy_BANIAS,  xtrsm_iltncopy_BANIAS,
#else
  xtrsm_ounucopy_BANIAS,  xtrsm_ounncopy_BANIAS,  xtrsm_outucopy_BANIAS,  xtrsm_outncopy_BANIAS,
  xtrsm_olnucopy_BANIAS,  xtrsm_olnncopy_BANIAS,  xtrsm_oltucopy_BANIAS,  xtrsm_oltncopy_BANIAS,
#endif
  xtrsm_ounucopy_BANIAS,  xtrsm_ounncopy_BANIAS,  xtrsm_outucopy_BANIAS,  xtrsm_outncopy_BANIAS,
  xtrsm_olnucopy_BANIAS,  xtrsm_olnncopy_BANIAS,  xtrsm_oltucopy_BANIAS,  xtrsm_oltncopy_BANIAS,

  xtrmm_kernel_RN_BANIAS,  xtrmm_kernel_RT_BANIAS,  xtrmm_kernel_RR_BANIAS,  xtrmm_kernel_RC_BANIAS,
  xtrmm_kernel_LN_BANIAS,  xtrmm_kernel_LT_BANIAS,  xtrmm_kernel_LR_BANIAS,  xtrmm_kernel_LC_BANIAS,

#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xtrmm_iunucopy_BANIAS,  xtrmm_iunncopy_BANIAS,  xtrmm_iutucopy_BANIAS,  xtrmm_iutncopy_BANIAS,
  xtrmm_ilnucopy_BANIAS,  xtrmm_ilnncopy_BANIAS,  xtrmm_iltucopy_BANIAS,  xtrmm_iltncopy_BANIAS,
#else
  xtrmm_ounucopy_BANIAS,  xtrmm_ounncopy_BANIAS,  xtrmm_outucopy_BANIAS,  xtrmm_outncopy_BANIAS,
  xtrmm_olnucopy_BANIAS,  xtrmm_olnncopy_BANIAS,  xtrmm_oltucopy_BANIAS,  xtrmm_oltncopy_BANIAS,
#endif
  xtrmm_ounucopy_BANIAS,  xtrmm_ounncopy_BANIAS,  xtrmm_outucopy_BANIAS,  xtrmm_outncopy_BANIAS,
  xtrmm_olnucopy_BANIAS,  xtrmm_olnncopy_BANIAS,  xtrmm_oltucopy_BANIAS,  xtrmm_oltncopy_BANIAS,

#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xsymm_iutcopy_BANIAS,  xsymm_iltcopy_BANIAS,
#else
  xsymm_outcopy_BANIAS,  xsymm_oltcopy_BANIAS,
#endif
  xsymm_outcopy_BANIAS,  xsymm_oltcopy_BANIAS,
#if XGEMM_DEFAULT_UNROLL_M != XGEMM_DEFAULT_UNROLL_N
  xhemm_iutcopy_BANIAS,  xhemm_iltcopy_BANIAS,
#else
  xhemm_outcopy_BANIAS,  xhemm_oltcopy_BANIAS,
#endif
  xhemm_outcopy_BANIAS,  xhemm_oltcopy_BANIAS,

  0, 0, 0,
  QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),

  xgemm3m_kernel_BANIAS,

  xgemm3m_incopyb_BANIAS,  xgemm3m_incopyr_BANIAS,
  xgemm3m_incopyi_BANIAS,  xgemm3m_itcopyb_BANIAS,
  xgemm3m_itcopyr_BANIAS,  xgemm3m_itcopyi_BANIAS,
  xgemm3m_oncopyb_BANIAS,  xgemm3m_oncopyr_BANIAS,
  xgemm3m_oncopyi_BANIAS,  xgemm3m_otcopyb_BANIAS,
  xgemm3m_otcopyr_BANIAS,  xgemm3m_otcopyi_BANIAS,

  xsymm3m_iucopyb_BANIAS,  xsymm3m_ilcopyb_BANIAS,
  xsymm3m_iucopyr_BANIAS,  xsymm3m_ilcopyr_BANIAS,
  xsymm3m_iucopyi_BANIAS,  xsymm3m_ilcopyi_BANIAS,
  xsymm3m_oucopyb_BANIAS,  xsymm3m_olcopyb_BANIAS,
  xsymm3m_oucopyr_BANIAS,  xsymm3m_olcopyr_BANIAS,
  xsymm3m_oucopyi_BANIAS,  xsymm3m_olcopyi_BANIAS,

  xhemm3m_iucopyb_BANIAS,  xhemm3m_ilcopyb_BANIAS,
  xhemm3m_iucopyr_BANIAS,  xhemm3m_ilcopyr_BANIAS,
  xhemm3m_iucopyi_BANIAS,  xhemm3m_ilcopyi_BANIAS,

  xhemm3m_oucopyb_BANIAS,  xhemm3m_olcopyb_BANIAS,
  xhemm3m_oucopyr_BANIAS,  xhemm3m_olcopyr_BANIAS,
  xhemm3m_oucopyi_BANIAS,  xhemm3m_olcopyi_BANIAS,

#ifndef NO_LAPACK
  xneg_tcopy_BANIAS, xlaswp_ncopy_BANIAS,
#else
  NULL, NULL,
#endif

#endif

  init_parameter,

  SNUMOPT, DNUMOPT, QNUMOPT,

  saxpby_k_BANIAS, daxpby_k_BANIAS, caxpby_k_BANIAS, zaxpby_k_BANIAS,

  somatcopy_k_cn_BANIAS, somatcopy_k_ct_BANIAS, somatcopy_k_rn_BANIAS, somatcopy_k_rt_BANIAS,
  domatcopy_k_cn_BANIAS, domatcopy_k_ct_BANIAS, domatcopy_k_rn_BANIAS, domatcopy_k_rt_BANIAS,
  comatcopy_k_cn_BANIAS, comatcopy_k_ct_BANIAS, comatcopy_k_rn_BANIAS, comatcopy_k_rt_BANIAS,
  comatcopy_k_cnc_BANIAS, comatcopy_k_ctc_BANIAS, comatcopy_k_rnc_BANIAS, comatcopy_k_rtc_BANIAS,
  zomatcopy_k_cn_BANIAS, zomatcopy_k_ct_BANIAS, zomatcopy_k_rn_BANIAS, zomatcopy_k_rt_BANIAS,
  zomatcopy_k_cnc_BANIAS, zomatcopy_k_ctc_BANIAS, zomatcopy_k_rnc_BANIAS, zomatcopy_k_rtc_BANIAS,

  simatcopy_k_cn_BANIAS, simatcopy_k_ct_BANIAS, simatcopy_k_rn_BANIAS, simatcopy_k_rt_BANIAS,
  dimatcopy_k_cn_BANIAS, dimatcopy_k_ct_BANIAS, dimatcopy_k_rn_BANIAS, dimatcopy_k_rt_BANIAS,
  cimatcopy_k_cn_BANIAS, cimatcopy_k_ct_BANIAS, cimatcopy_k_rn_BANIAS, cimatcopy_k_rt_BANIAS,
  cimatcopy_k_cnc_BANIAS, cimatcopy_k_ctc_BANIAS, cimatcopy_k_rnc_BANIAS, cimatcopy_k_rtc_BANIAS,
  zimatcopy_k_cn_BANIAS, zimatcopy_k_ct_BANIAS, zimatcopy_k_rn_BANIAS, zimatcopy_k_rt_BANIAS,
  zimatcopy_k_cnc_BANIAS, zimatcopy_k_ctc_BANIAS, zimatcopy_k_rnc_BANIAS, zimatcopy_k_rtc_BANIAS,

  sgeadd_k_BANIAS, dgeadd_k_BANIAS, cgeadd_k_BANIAS, zgeadd_k_BANIAS

};

#ifdef ARCH_X86
static int get_l2_size_old(void){
  int i, eax, ebx, ecx, edx, cpuid_level;
  int info[15];

  cpuid(2, &eax, &ebx, &ecx, &edx);

  info[ 0] = BITMASK(eax,  8, 0xff);
  info[ 1] = BITMASK(eax, 16, 0xff);
  info[ 2] = BITMASK(eax, 24, 0xff);

  info[ 3] = BITMASK(ebx,  0, 0xff);
  info[ 4] = BITMASK(ebx,  8, 0xff);
  info[ 5] = BITMASK(ebx, 16, 0xff);
  info[ 6] = BITMASK(ebx, 24, 0xff);

  info[ 7] = BITMASK(ecx,  0, 0xff);
  info[ 8] = BITMASK(ecx,  8, 0xff);
  info[ 9] = BITMASK(ecx, 16, 0xff);
  info[10] = BITMASK(ecx, 24, 0xff);

  info[11] = BITMASK(edx,  0, 0xff);
  info[12] = BITMASK(edx,  8, 0xff);
  info[13] = BITMASK(edx, 16, 0xff);
  info[14] = BITMASK(edx, 24, 0xff);

  for (i = 0; i < 15; i++){

    switch (info[i]){

      /* This table is from http://www.sandpile.org/ia32/cpuid.htm */

    case 0x1a :
      return 96;

    case 0x39 :
    case 0x3b :
    case 0x41 :
    case 0x79 :
    case 0x81 :
      return 128;

    case 0x3a :
      return 192;

    case 0x21 :
    case 0x3c :
    case 0x42 :
    case 0x7a :
    case 0x7e :
    case 0x82 :
      return 256;

    case 0x3d :
      return 384;

    case 0x3e :
    case 0x43 :
    case 0x7b :
    case 0x7f :
    case 0x83 :
    case 0x86 :
      return 512;

    case 0x44 :
    case 0x78 :
    case 0x7c :
    case 0x84 :
    case 0x87 :
      return 1024;

    case 0x45 :
    case 0x7d :
    case 0x85 :
      return 2048;

    case 0x48 :
      return 3184;

    case 0x49 :
      return 4096;

    case 0x4e :
      return 6144;
    }
  }
  return 0;
}
#endif

static __inline__ int get_l2_size(void){

  int eax, ebx, ecx, edx, l2;

  cpuid(0x80000006, &eax, &ebx, &ecx, &edx);

  l2 = BITMASK(ecx, 16, 0xffff);

#ifndef ARCH_X86
  return l2;

#else

  if (l2 > 0) return l2;

  return get_l2_size_old();
#endif
}

static __inline__ int get_l3_size(void){

  int eax, ebx, ecx, edx;

  cpuid(0x80000006, &eax, &ebx, &ecx, &edx);

  return BITMASK(edx, 18, 0x3fff) * 512;
}


static void init_parameter(void) {

  int l2 = get_l2_size();

  TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
  TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
  TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
  TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;

#ifdef CGEMM3M_DEFAULT_Q
  TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
#else
  TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
#endif

#ifdef ZGEMM3M_DEFAULT_Q
  TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
#else
  TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
#endif

#ifdef EXPRECISION
  TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
  TABLE_NAME.xgemm_q = XGEMM_DEFAULT_Q;
  TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
#endif

#if defined(CORE_KATMAI)  || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON)

#ifdef DEBUG
  fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
#endif

  TABLE_NAME.sgemm_p =  64 * (l2 >> 7);
  TABLE_NAME.dgemm_p =  32 * (l2 >> 7);
  TABLE_NAME.cgemm_p =  32 * (l2 >> 7);
  TABLE_NAME.zgemm_p =  16 * (l2 >> 7);
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  16 * (l2 >> 7);
  TABLE_NAME.xgemm_p =   8 * (l2 >> 7);
#endif
#endif

#ifdef CORE_NORTHWOOD

#ifdef DEBUG
  fprintf(stderr, "Northwood\n");
#endif

  TABLE_NAME.sgemm_p =  96 * (l2 >> 7);
  TABLE_NAME.dgemm_p =  48 * (l2 >> 7);
  TABLE_NAME.cgemm_p =  48 * (l2 >> 7);
  TABLE_NAME.zgemm_p =  24 * (l2 >> 7);
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  24 * (l2 >> 7);
  TABLE_NAME.xgemm_p =  12 * (l2 >> 7);
#endif
#endif

#ifdef ATOM

#ifdef DEBUG
  fprintf(stderr, "Atom\n");
#endif

  TABLE_NAME.sgemm_p = 256;
  TABLE_NAME.dgemm_p = 128;
  TABLE_NAME.cgemm_p = 128;
  TABLE_NAME.zgemm_p =  64;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  64;
  TABLE_NAME.xgemm_p =  32;
#endif
#endif

#ifdef CORE_PRESCOTT

#ifdef DEBUG
  fprintf(stderr, "Prescott\n");
#endif

  TABLE_NAME.sgemm_p =  56 * (l2 >> 7);
  TABLE_NAME.dgemm_p =  28 * (l2 >> 7);
  TABLE_NAME.cgemm_p =  28 * (l2 >> 7);
  TABLE_NAME.zgemm_p =  14 * (l2 >> 7);
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  14 * (l2 >> 7);
  TABLE_NAME.xgemm_p =   7 * (l2 >> 7);
#endif
#endif

#ifdef CORE2

#ifdef DEBUG
  fprintf(stderr, "Core2\n");
#endif

  TABLE_NAME.sgemm_p =  92 * (l2 >> 9) + 8;
  TABLE_NAME.dgemm_p =  46 * (l2 >> 9) + 8;
  TABLE_NAME.cgemm_p =  46 * (l2 >> 9) + 4;
  TABLE_NAME.zgemm_p =  23 * (l2 >> 9) + 4;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  92 * (l2 >> 9) + 8;
  TABLE_NAME.xgemm_p =  46 * (l2 >> 9) + 4;
#endif
#endif

#ifdef PENRYN

#ifdef DEBUG
  fprintf(stderr, "Penryn\n");
#endif

  TABLE_NAME.sgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.dgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.cgemm_p =  21 * (l2 >> 9) + 4;
  TABLE_NAME.zgemm_p =  21 * (l2 >> 9) + 4;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.xgemm_p =  21 * (l2 >> 9) + 4;
#endif
#endif

#ifdef DUNNINGTON

#ifdef DEBUG
  fprintf(stderr, "Dunnington\n");
#endif

  TABLE_NAME.sgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.dgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.cgemm_p =  21 * (l2 >> 9) + 4;
  TABLE_NAME.zgemm_p =  21 * (l2 >> 9) + 4;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  42 * (l2 >> 9) + 8;
  TABLE_NAME.xgemm_p =  21 * (l2 >> 9) + 4;
#endif
#endif


#ifdef NEHALEM

#ifdef DEBUG
  fprintf(stderr, "Nehalem\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef SANDYBRIDGE

#ifdef DEBUG
  fprintf(stderr, "Sandybridge\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef HASWELL

#ifdef DEBUG
  fprintf(stderr, "Haswell\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif


#ifdef OPTERON

#ifdef DEBUG
  fprintf(stderr, "Opteron\n");
#endif

  TABLE_NAME.sgemm_p = 224 +  56 * (l2 >> 7);
  TABLE_NAME.dgemm_p = 112 +  28 * (l2 >> 7);
  TABLE_NAME.cgemm_p = 112 +  28 * (l2 >> 7);
  TABLE_NAME.zgemm_p =  56 +  14 * (l2 >> 7);
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p =  56 +  14 * (l2 >> 7);
  TABLE_NAME.xgemm_p =  28 +   7 * (l2 >> 7);
#endif
#endif

#ifdef BARCELONA

#ifdef DEBUG
  fprintf(stderr, "Barcelona\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef BOBCAT

#ifdef DEBUG
  fprintf(stderr, "Bobcate\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef BULLDOZER

#ifdef DEBUG
  fprintf(stderr, "Bulldozer\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef PILEDRIVER

#ifdef DEBUG
  fprintf(stderr, "Piledriver\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif

#ifdef STEAMROLLER

#ifdef DEBUG
  fprintf(stderr, "Steamroller\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif


#ifdef NANO

#ifdef DEBUG
  fprintf(stderr, "NANO\n");
#endif

  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;



#ifdef EXPRECISION
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif

#endif


#ifdef CGEMM3M_DEFAULT_P
  TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
#else
  TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
#endif

#ifdef ZGEMM3M_DEFAULT_P
  TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
#else
  TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
#endif

#ifdef EXPRECISION
  TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
#endif



  TABLE_NAME.sgemm_p = (TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.dgemm_p = (TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.cgemm_p = (TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1) & ~(CGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.zgemm_p = (TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1) & ~(ZGEMM_DEFAULT_UNROLL_M - 1);

#ifdef CGEMM3M_DEFAULT_UNROLL_M
  TABLE_NAME.cgemm3m_p = (TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1) & ~(CGEMM3M_DEFAULT_UNROLL_M - 1);
#else
  TABLE_NAME.cgemm3m_p = (TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1) & ~(SGEMM_DEFAULT_UNROLL_M - 1);
#endif

#ifdef ZGEMM3M_DEFAULT_UNROLL_M
  TABLE_NAME.zgemm3m_p = (TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1) & ~(ZGEMM3M_DEFAULT_UNROLL_M - 1);
#else
  TABLE_NAME.zgemm3m_p = (TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1) & ~(DGEMM_DEFAULT_UNROLL_M - 1);
#endif

#ifdef QUAD_PRECISION
  TABLE_NAME.qgemm_p = (TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.xgemm_p = (TABLE_NAME.xgemm_p + XGEMM_DEFAULT_UNROLL_M - 1) & ~(XGEMM_DEFAULT_UNROLL_M - 1);
  TABLE_NAME.xgemm3m_p = (TABLE_NAME.xgemm3m_p + QGEMM_DEFAULT_UNROLL_M - 1) & ~(QGEMM_DEFAULT_UNROLL_M - 1);
#endif

#ifdef DEBUG
  fprintf(stderr, "L2 = %8d DGEMM_P  .. %d\n", l2, TABLE_NAME.dgemm_p);
#endif

  TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q *  4 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.sgemm_q *  4) - 15) & ~15);

  TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q *  8 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.dgemm_q *  8) - 15) & ~15);

#ifdef EXPRECISION
  TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.qgemm_p * TABLE_NAME.qgemm_q * 16 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
#endif

  TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q *  8 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.cgemm_q *  8) - 15) & ~15);

  TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);

  TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q *  8 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.cgemm3m_q *  8) - 15) & ~15);

  TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
			       ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);




#ifdef EXPRECISION
  TABLE_NAME.xgemm_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.xgemm_p * TABLE_NAME.xgemm_q * 32 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
		       ) / (TABLE_NAME.xgemm_q * 32) - 15) & ~15);

  TABLE_NAME.xgemm3m_r = (((BUFFER_SIZE -
			       ((TABLE_NAME.xgemm3m_p * TABLE_NAME.xgemm3m_q * 32 + TABLE_NAME.offsetA
				 + TABLE_NAME.align) & ~TABLE_NAME.align)
		       ) / (TABLE_NAME.xgemm3m_q * 32) - 15) & ~15);

#endif



}
