/* @cond INNERDOC */
/*!
 @file
 @brief
 Performance kernels dispatching code, for each type, submatrix size, operation.
 For block coordinates format.
 Kernels unrolled, with no loops, for only user-specified blockings.
 */

/*

Copyright (C) 2008-2022 Michele Martone

This file is part of librsb.

librsb is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.

librsb is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public
License along with librsb; see the file COPYING.
If not, see <http://www.gnu.org/licenses/>.

*/
/*
 The code in this file was generated automatically by an M4 script. 
 It is not meant to be used as an API (Application Programming Interface).
 p.s.: right now, only row major matrix access is considered.

 */
/*!
 @file
 @brief
 Performance kernels dispatching code, for each type, submatrix size, operation.
 For block coordinates format.
 Kernels unrolled, with no loops, for only user-specified blockings.
 */

/*

Copyright (C) 2008-2022 Michele Martone

This file is part of librsb.

librsb is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.

librsb is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public
License along with librsb; see the file COPYING.
If not, see <http://www.gnu.org/licenses/>.

*/
/*
 The code in this file was generated automatically by an M4 script. 
 It is not meant to be used as an API (Application Programming Interface).
 p.s.: right now, only row major matrix access is considered.

 */
#include "rsb.h"
#include "rsb_common.h"
#include "rsb_internals.h"


#pragma GCC visibility push(hidden)


rsb_err_t rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tT_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_C__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tT_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_H__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tC_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_C__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tC_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_H__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tT_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tT_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tC_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tC_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tT_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tT_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tC_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tC_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tT_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_C__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tT_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_H__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tC_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_C__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tC_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_H__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tT_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tT_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tC_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tC_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tT_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tT_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_C__tC_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_double_H__tC_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_double_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,Mdim,NULL,out,1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,Mdim,NULL,out,1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tT_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_C__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tT_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_H__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tC_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_C__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tC_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_H__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tT_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tT_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tC_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tC_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tT_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tT_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tC_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tC_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,Mdim,NULL,out,1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,Mdim,NULL,out,1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tT_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_C__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tT_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_H__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tC_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_C__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tC_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_H__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tT_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tT_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tC_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tC_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_DOUBLE ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tT_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tT_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_C__tC_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_double_H__tC_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_double_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tT_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_C__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tT_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_H__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tC_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_C__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tC_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_H__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tT_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tT_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tC_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tC_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tT_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tT_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tC_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tC_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tT_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_C__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tT_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_H__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tC_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_C__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tC_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_H__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tT_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tT_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tC_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tC_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tT_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tT_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_C__tC_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_double_H__tC_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_double_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tT_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_C__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tT_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_H__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tC_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_C__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tC_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_H__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tT_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tT_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tC_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tC_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tT_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tT_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tC_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tC_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tT_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_C__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tT_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_H__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tC_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_C__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tC_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_H__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// S
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tT_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tT_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tC_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tC_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const double *trhs = rhs+1*(roff-coff);// H
	double *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tT_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tT_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_C__tC_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_double_H__tC_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_double_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tT_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_C__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tT_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_H__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tC_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_C__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tC_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_H__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const double *trhs = rhs+incx*(roff-coff);// S
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const double *trhs = rhs+incx*(roff-coff);// S
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tT_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tT_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tC_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tC_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const double *trhs = rhs+incx*(roff-coff);// H
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const double *trhs = rhs+incx*(roff-coff);// H
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tT_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tT_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tC_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tC_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tT_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_C__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tT_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_H__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tC_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_C__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tC_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_H__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const double *trhs = rhs+incx*(roff-coff);// S
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const double *trhs = rhs+incx*(roff-coff);// S
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tT_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tT_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tC_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tC_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const double *trhs = rhs+incx*(roff-coff);// H
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const double *trhs = rhs+incx*(roff-coff);// H
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tT_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tT_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_C__tC_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_double_H__tC_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_double_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}

rsb_err_t rsb__BCOR_spsv_uxua_double_C__tN_r1_c1_uu_sU_dE_uU(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		if(n==nnz || VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*1]=(out[ii*1]-ax)/VA[n];
		--n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_C__tN_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_double_C__tN_r1_c1_uu_sU_dE_uL(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		if(n==nnz || VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*1]=(out[ii*1]-ax)/VA[n];
		++n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_C__tN_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_double_H__tN_r1_c1_uu_sU_dE_uU(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		if(n==nnz || VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*1]=(out[ii*1]-ax)/VA[n];
		--n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_H__tN_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_double_H__tN_r1_c1_uu_sU_dE_uL(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		if(n==nnz || VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*1]=(out[ii*1]-ax)/VA[n];
		++n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_H__tN_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_double_C__tT_r1_c1_uu_sU_dE_uU(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*1]/=aa;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_C__tT_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_double_C__tT_r1_c1_uu_sU_dE_uL(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*1]/=aa;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_C__tT_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_double_H__tT_r1_c1_uu_sU_dE_uU(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*1]/=aa;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_H__tT_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_double_H__tT_r1_c1_uu_sU_dE_uL(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*1]/=aa;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_H__tT_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_double_C__tC_r1_c1_uu_sU_dE_uU(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*1]/=aa;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_C__tC_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_double_C__tC_r1_c1_uu_sU_dE_uL(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*1]/=aa;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_C__tC_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_double_H__tC_r1_c1_uu_sU_dE_uU(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*1]/=aa;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_H__tC_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_double_H__tC_r1_c1_uu_sU_dE_uL(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*1]/=aa;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_H__tC_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_double_C__tN_r1_c1_uu_sU_dI_uU(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		out[ii*1]=(out[ii*1]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_C__tN_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_double_C__tN_r1_c1_uu_sU_dI_uL(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		out[ii*1]=(out[ii*1]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_C__tN_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_double_H__tN_r1_c1_uu_sU_dI_uU(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		out[ii*1]=(out[ii*1]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_H__tN_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_double_H__tN_r1_c1_uu_sU_dI_uL(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		out[ii*1]=(out[ii*1]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_H__tN_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_double_C__tT_r1_c1_uu_sU_dI_uU(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_C__tT_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_double_C__tT_r1_c1_uu_sU_dI_uL(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_C__tT_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_double_H__tT_r1_c1_uu_sU_dI_uU(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_H__tT_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_double_H__tT_r1_c1_uu_sU_dI_uL(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_H__tT_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_double_C__tC_r1_c1_uu_sU_dI_uU(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_C__tC_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_double_C__tC_r1_c1_uu_sU_dI_uL(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_C__tC_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_double_H__tC_r1_c1_uu_sU_dI_uU(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_H__tC_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_double_H__tC_r1_c1_uu_sU_dI_uL(const double * restrict VA, const double * rhs, double * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_double_H__tC_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}




rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tT_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_C__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tT_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_H__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tC_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_C__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tC_r1_c1_uu_sU_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_H__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	const double *trhs = rhs+incx*(roff-coff);// S
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	const double *trhs = rhs+incx*(roff-coff);// S
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tT_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tT_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tC_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tC_r1_c1_uu_sS_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	const double *trhs = rhs+incx*(roff-coff);// H
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	const double *trhs = rhs+incx*(roff-coff);// H
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tT_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tT_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tC_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tC_r1_c1_uu_sH_dE_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tT_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_C__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tT_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_H__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tC_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_C__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tC_r1_c1_uu_sU_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_H__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	const double *trhs = rhs+incx*(roff-coff);// S
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	const double *trhs = rhs+incx*(roff-coff);// S
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tT_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tT_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tC_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tC_r1_c1_uu_sS_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	const double *trhs = rhs+incx*(roff-coff);// H
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;
	const double *trhs = rhs+incx*(roff-coff);// H
	double *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tT_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tT_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_C__tC_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_double_H__tC_r1_c1_uu_sH_dI_uG(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_double_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_C__tN_r1_c1_uu_sU_dE_uU(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		if(n==nnz || VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*incy]=((alpha)*out[ii*incy]-ax)/VA[n];
		--n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_C__tN_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_C__tN_r1_c1_uu_sU_dE_uL(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		if(n==nnz || VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*incy]=((alpha)*out[ii*incy]-ax)/VA[n];
		++n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_C__tN_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_double_H__tN_r1_c1_uu_sU_dE_uU(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		if(n==nnz || VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*incy]=((alpha)*out[ii*incy]-ax)/VA[n];
		--n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_H__tN_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_H__tN_r1_c1_uu_sU_dE_uL(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		if(n==nnz || VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*incy]=((alpha)*out[ii*incy]-ax)/VA[n];
		++n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_H__tN_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_double_C__tT_r1_c1_uu_sU_dE_uU(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*incy]/=aa;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_C__tT_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_C__tT_r1_c1_uu_sU_dE_uL(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*incy]/=aa;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_C__tT_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_double_H__tT_r1_c1_uu_sU_dE_uU(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*incy]/=aa;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_H__tT_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_H__tT_r1_c1_uu_sU_dE_uL(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*incy]/=aa;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_H__tT_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_double_C__tC_r1_c1_uu_sU_dE_uU(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*incy]/=aa;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_C__tC_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_C__tC_r1_c1_uu_sU_dE_uL(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*incy]/=aa;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_C__tC_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_double_H__tC_r1_c1_uu_sU_dE_uU(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*incy]/=aa;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_H__tC_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_H__tC_r1_c1_uu_sU_dE_uL(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		double aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((double)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*incy]/=aa;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_H__tC_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_double_C__tN_r1_c1_uu_sU_dI_uU(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		out[ii*incy]=((alpha)*out[ii*incy]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_C__tN_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_C__tN_r1_c1_uu_sU_dI_uL(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		out[ii*incy]=((alpha)*out[ii*incy]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_C__tN_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_double_H__tN_r1_c1_uu_sU_dI_uU(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		out[ii*incy]=((alpha)*out[ii*incy]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_H__tN_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_H__tN_r1_c1_uu_sU_dI_uL(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		out[ii*incy]=((alpha)*out[ii*incy]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_H__tN_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_double_C__tT_r1_c1_uu_sU_dI_uU(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_C__tT_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_C__tT_r1_c1_uu_sU_dI_uL(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_C__tT_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_double_H__tT_r1_c1_uu_sU_dI_uU(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_H__tT_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_H__tT_r1_c1_uu_sU_dI_uL(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_H__tT_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_double_C__tC_r1_c1_uu_sU_dI_uU(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_C__tC_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_C__tC_r1_c1_uu_sU_dI_uL(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_C__tC_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_double_H__tC_r1_c1_uu_sU_dI_uU(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		double ax;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_H__tC_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_double_H__tC_r1_c1_uu_sU_dI_uL(const double * restrict VA, const double * restrict rhs, double * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const double * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const double alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		double ax;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_double_H__tC_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}




rsb_err_t rsb__BCOR_infty_norm_double_C__tN_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tN_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tT_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tT_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tC_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tC_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tN_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tN_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tT_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tT_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tC_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tC_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tN_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tN_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tT_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tT_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tC_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tC_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tN_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tN_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tT_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tT_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tC_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tC_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tN_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tN_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tT_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tT_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tC_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tC_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tN_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tN_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tT_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tT_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_C__tC_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_double_H__tC_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabs(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabs(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tN_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tN_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tT_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tT_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tC_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tC_r1_c1_uu_sU_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tN_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tN_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tT_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tT_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tC_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tC_r1_c1_uu_sS_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tN_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tN_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tT_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tT_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tC_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tC_r1_c1_uu_sH_dE_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tN_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tN_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tT_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tT_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tC_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tC_r1_c1_uu_sU_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tN_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tN_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tT_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tT_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tC_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tC_r1_c1_uu_sS_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tN_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tN_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tT_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tT_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_C__tC_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_double_H__tC_r1_c1_uu_sH_dI_uG(const double * VA, double * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tN_r1_c1_uu_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tN_r1_c1_uu_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tT_r1_c1_uu_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tT_r1_c1_uu_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tC_r1_c1_uu_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tC_r1_c1_uu_sU_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tN_r1_c1_uu_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tN_r1_c1_uu_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tT_r1_c1_uu_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tT_r1_c1_uu_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tC_r1_c1_uu_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tC_r1_c1_uu_sS_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tN_r1_c1_uu_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tN_r1_c1_uu_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tT_r1_c1_uu_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tT_r1_c1_uu_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tC_r1_c1_uu_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tC_r1_c1_uu_sH_dE_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tN_r1_c1_uu_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tN_r1_c1_uu_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tT_r1_c1_uu_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tT_r1_c1_uu_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tC_r1_c1_uu_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tC_r1_c1_uu_sU_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tN_r1_c1_uu_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tN_r1_c1_uu_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tT_r1_c1_uu_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tT_r1_c1_uu_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tC_r1_c1_uu_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tC_r1_c1_uu_sS_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tN_r1_c1_uu_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tN_r1_c1_uu_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tT_r1_c1_uu_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tT_r1_c1_uu_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_C__tC_r1_c1_uu_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_double_H__tC_r1_c1_uu_sH_dI_uG(double * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const double *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type double, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tT_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_C__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tT_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_H__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tC_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_C__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tC_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_H__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tT_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tT_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tC_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tC_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tT_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tT_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tC_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tC_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tT_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_C__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tT_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_H__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tC_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_C__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tC_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_H__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tT_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tT_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tC_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tC_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tT_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tT_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_C__tC_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uaua_float_H__tC_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uaua_float_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,Mdim,NULL,out,1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,Mdim,NULL,out,1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tT_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_C__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tT_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_H__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tC_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_C__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tC_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_H__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tT_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tT_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tC_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tC_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tT_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tT_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tC_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tC_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,Mdim,NULL,out,1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,Mdim,NULL,out,1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tT_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_C__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tT_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_H__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tC_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_C__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tC_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,mdim,NULL,out,1);
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_H__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tT_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tT_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tC_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tC_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	rsb__cblas_Xscal(RSB_NUMERICAL_TYPE_FLOAT ,Mdim,NULL,out,1);
	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=VA[n+1 ]*rhs[j*1];
		tout[j*1]+=VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=VA[n+2 ]*rhs[j*1];
		tout[j*1]+=VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=VA[n+3 ]*rhs[j*1];
		tout[j*1]+=VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=VA[n+0 ]*rhs[j*1];
		tout[j*1]+=VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tT_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tT_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_C__tC_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uauz_float_H__tC_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uauz_float_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tT_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_C__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tT_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_H__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tC_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_C__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tC_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_H__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tT_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tT_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tC_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tC_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tT_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tT_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tC_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tC_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tT_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_C__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tT_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_H__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tC_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_C__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tC_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(alpha)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(alpha)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(alpha)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(alpha)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_H__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tT_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tT_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tC_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tC_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(alpha)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(alpha)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(alpha)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(alpha)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(alpha)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(alpha)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(alpha)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tT_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tT_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_C__tC_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_uxua_float_H__tC_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_uxua_float_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap);
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tT_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_C__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tT_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_H__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tC_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_C__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tC_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_H__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tT_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tT_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tC_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tC_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tT_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tT_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tC_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tC_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tT_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_C__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tT_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_H__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tC_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_C__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tC_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	rhs=(rhs-coff*(1))+roff*(1);
	out=(out-roff*(1))+coff*(1);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*1]+=(-1)*VA[n+1 ]*rhs[i*1];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*1]+=(-1)*VA[n+2 ]*rhs[i*1];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*1]+=(-1)*VA[n+3 ]*rhs[i*1];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*1]+=(-1)*VA[n+0 ]*rhs[i*1];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_H__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// S
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tT_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tT_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tC_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tC_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	
	const float *trhs = rhs+1*(roff-coff);// H
	float *tout=out+1*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*1]+=(-1)*VA[n]*rhs[j*1];
		if(RSB_LIKELY(i!=j))
			out[j*1]+=(-1)*VA[n]*rhs[i*1];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*1]+=(-1)*VA[n+1 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+1 ]*trhs[i*1];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*1]+=(-1)*VA[n+2 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+2 ]*trhs[i*1];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*1]+=(-1)*VA[n+3 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+3 ]*trhs[i*1];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*1]+=(-1)*VA[n+0 ]*rhs[j*1];
		tout[j*1]+=(-1)*VA[n+0 ]*trhs[i*1];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tT_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tT_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_C__tC_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_unua_float_H__tC_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y - {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_unua_float_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tT_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_C__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tT_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_H__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tC_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_C__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tC_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_H__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const float *trhs = rhs+incx*(roff-coff);// S
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const float *trhs = rhs+incx*(roff-coff);// S
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tT_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tT_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tC_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tC_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const float *trhs = rhs+incx*(roff-coff);// H
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const float *trhs = rhs+incx*(roff-coff);// H
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tT_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tT_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tC_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tC_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tT_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_C__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tT_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_H__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tC_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_C__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tC_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_H__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const float *trhs = rhs+incx*(roff-coff);// S
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const float *trhs = rhs+incx*(roff-coff);// S
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tT_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tT_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tC_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tC_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const float *trhs = rhs+incx*(roff-coff);// H
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;

	const float *trhs = rhs+incx*(roff-coff);// H
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tT_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tT_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^T} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_C__tC_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sasa_float_H__tC_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A^H} \cdot x, where A == A^H. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sasa_float_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,incx,incy);
}

rsb_err_t rsb__BCOR_spsv_uxua_float_C__tN_r1_c1_uu_sU_dE_uU(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		if(n==nnz || VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*1]=(out[ii*1]-ax)/VA[n];
		--n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_C__tN_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_float_C__tN_r1_c1_uu_sU_dE_uL(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		if(n==nnz || VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*1]=(out[ii*1]-ax)/VA[n];
		++n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_C__tN_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_float_H__tN_r1_c1_uu_sU_dE_uU(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		if(n==nnz || VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*1]=(out[ii*1]-ax)/VA[n];
		--n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_H__tN_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_float_H__tN_r1_c1_uu_sU_dE_uL(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		if(n==nnz || VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*1]=(out[ii*1]-ax)/VA[n];
		++n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_H__tN_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_float_C__tT_r1_c1_uu_sU_dE_uU(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*1]/=aa;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_C__tT_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_float_C__tT_r1_c1_uu_sU_dE_uL(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*1]/=aa;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_C__tT_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_float_H__tT_r1_c1_uu_sU_dE_uU(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*1]/=aa;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_H__tT_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_float_H__tT_r1_c1_uu_sU_dE_uL(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*1]/=aa;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_H__tT_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_float_C__tC_r1_c1_uu_sU_dE_uU(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*1]/=aa;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_C__tC_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_float_C__tC_r1_c1_uu_sU_dE_uL(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*1]/=aa;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_C__tC_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_float_H__tC_r1_c1_uu_sU_dE_uU(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*1]/=aa;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_H__tC_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_float_H__tC_r1_c1_uu_sU_dE_uL(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*1]/=aa;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_H__tC_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_float_C__tN_r1_c1_uu_sU_dI_uU(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		out[ii*1]=(out[ii*1]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_C__tN_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_float_C__tN_r1_c1_uu_sU_dI_uL(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		out[ii*1]=(out[ii*1]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_C__tN_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_float_H__tN_r1_c1_uu_sU_dI_uU(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		out[ii*1]=(out[ii*1]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_H__tN_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_float_H__tN_r1_c1_uu_sU_dI_uL(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*1];
		}

		out[ii*1]=(out[ii*1]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_H__tN_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_float_C__tT_r1_c1_uu_sU_dI_uU(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_C__tT_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_float_C__tT_r1_c1_uu_sU_dI_uL(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_C__tT_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_float_H__tT_r1_c1_uu_sU_dI_uU(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_H__tT_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_float_H__tT_r1_c1_uu_sU_dI_uL(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^T}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_H__tT_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_float_C__tC_r1_c1_uu_sU_dI_uU(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_C__tC_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_float_C__tC_r1_c1_uu_sU_dI_uL(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_C__tC_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_uxua_float_H__tC_r1_c1_uu_sU_dI_uU(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=out[ii*1];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_H__tC_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_uxua_float_H__tC_r1_c1_uu_sU_dI_uL(const float * restrict VA, const float * rhs, float * out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow {A^H}^{-1} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
		rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=out[ii*1];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*1]-=VA[n]*ax;
		}

		out[ii*1]=(out[ii*1]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_uxua_float_H__tC_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}




rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tT_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_C__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tT_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_H__tT_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tC_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_C__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tC_r1_c1_uu_sU_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_H__tC_r1_c1_uu_sU_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	const float *trhs = rhs+incx*(roff-coff);// S
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	const float *trhs = rhs+incx*(roff-coff);// S
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sS_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tT_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tT_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tC_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tC_r1_c1_uu_sS_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sS_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	const float *trhs = rhs+incx*(roff-coff);// H
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	const float *trhs = rhs+incx*(roff-coff);// H
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sH_dE_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tT_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tT_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tC_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tC_r1_c1_uu_sH_dE_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sH_dE_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tT_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_C__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tT_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_H__tT_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tC_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_C__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tC_r1_c1_uu_sU_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A \neq A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	rhs=(rhs-coff*(incx))+roff*(incx);
	out=(out-roff*(incy))+coff*(incy);
	{
for(n=0;n+3<nnz;n+=4){
	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
	i=IA[n+1 ]; j=JA[n+1 ];
	out[j*incy]+=(alpha)*VA[n+1 ]*rhs[i*incx];
	i=IA[n+2 ]; j=JA[n+2 ];
	out[j*incy]+=(alpha)*VA[n+2 ]*rhs[i*incx];
	i=IA[n+3 ]; j=JA[n+3 ];
	out[j*incy]+=(alpha)*VA[n+3 ]*rhs[i*incx];
}
for(     ;n<nnz;++n){	i=IA[n+0 ]; j=JA[n+0 ];
	out[j*incy]+=(alpha)*VA[n+0 ]*rhs[i*incx];
}
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_H__tC_r1_c1_uu_sU_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	const float *trhs = rhs+incx*(roff-coff);// S
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	const float *trhs = rhs+incx*(roff-coff);// S
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sS_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tT_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tT_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tC_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tC_r1_c1_uu_sS_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^T. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sS_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	const float *trhs = rhs+incx*(roff-coff);// H
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;
	const float *trhs = rhs+incx*(roff-coff);// H
	float *tout=out+incy*(coff-roff);

	if(roff==coff)
	for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		out[i*incy]+=(alpha)*VA[n]*rhs[j*incx];
		if(RSB_LIKELY(i!=j))
			out[j*incy]+=(alpha)*VA[n]*rhs[i*incx];
	}
	if(roff!=coff)
	{
for(n=0;n+3<nnz;n+=4){
		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
			i=IA[n+1 ];
		j=JA[n+1 ];
		out[i*incy]+=(alpha)*VA[n+1 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+1 ]*trhs[i*incx];
			i=IA[n+2 ];
		j=JA[n+2 ];
		out[i*incy]+=(alpha)*VA[n+2 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+2 ]*trhs[i*incx];
			i=IA[n+3 ];
		j=JA[n+3 ];
		out[i*incy]+=(alpha)*VA[n+3 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+3 ]*trhs[i*incx];
	}
for(     ;n<nnz;++n){ 		i=IA[n+0 ];
		j=JA[n+0 ];
		out[i*incy]+=(alpha)*VA[n+0 ]*rhs[j*incx];
		tout[j*incy]+=(alpha)*VA[n+0 ]*trhs[i*incx];
	 }
}

	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sH_dI_uG\n");
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tT_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tT_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^T} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_C__tC_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_C__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}



rsb_err_t rsb__BCOR_spmv_sxsa_float_H__tC_r1_c1_uu_sH_dI_uG(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow \beta \cdot y + \alpha \cdot {A^H} \cdot x, where A == A^H. \f$
	 * with incx and incy as x and y vector strides
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	/* NOTE: Diagonal implicit is not really handled here: look at caller level. */
	/* Symmetric transposed reverts to symmetric not transposed */
	return rsb__BCOR_spmv_sxsa_float_H__tN_r1_c1_uu_sH_dI_uG(VA,rhs,out,Mdim,mdim,bindx,bpntr,indptr,rpntr,cpntr,br,bc,roff,coff,flags,nnz,alphap,incx,incy);
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_C__tN_r1_c1_uu_sU_dE_uU(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		if(n==nnz || VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*incy]=((alpha)*out[ii*incy]-ax)/VA[n];
		--n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_C__tN_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_C__tN_r1_c1_uu_sU_dE_uL(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		if(n==nnz || VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*incy]=((alpha)*out[ii*incy]-ax)/VA[n];
		++n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_C__tN_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_float_H__tN_r1_c1_uu_sU_dE_uU(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		if(n==nnz || VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*incy]=((alpha)*out[ii*incy]-ax)/VA[n];
		--n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_H__tN_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_H__tN_r1_c1_uu_sU_dE_uL(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		if(n==nnz || VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		out[ii*incy]=((alpha)*out[ii*incy]-ax)/VA[n];
		++n;
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_H__tN_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_float_C__tT_r1_c1_uu_sU_dE_uU(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*incy]/=aa;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_C__tT_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_C__tT_r1_c1_uu_sU_dE_uL(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*incy]/=aa;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_C__tT_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_float_H__tT_r1_c1_uu_sU_dE_uU(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*incy]/=aa;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_H__tT_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_H__tT_r1_c1_uu_sU_dE_uL(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*incy]/=aa;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_H__tT_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_float_C__tC_r1_c1_uu_sU_dE_uU(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*incy]/=aa;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_C__tC_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_C__tC_r1_c1_uu_sU_dE_uL(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*incy]/=aa;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_C__tC_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_float_H__tC_r1_c1_uu_sU_dE_uU(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;
		n++;
		out[ii*incy]/=aa;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_H__tC_r1_c1_uu_sU_dE_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_H__tC_r1_c1_uu_sU_dE_uL(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		float aa;
		if(n>=nnz)return RSB_ERR_INVALID_NUMERICAL_DATA;
		aa=VA[n];
		if(VA[n]==((float)(0)))return RSB_ERR_INVALID_NUMERICAL_DATA;

		n--;
		out[ii*incy]/=aa;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_H__tC_r1_c1_uu_sU_dE_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_float_C__tN_r1_c1_uu_sU_dI_uU(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		out[ii*incy]=((alpha)*out[ii*incy]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_C__tN_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_C__tN_r1_c1_uu_sU_dI_uL(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		out[ii*incy]=((alpha)*out[ii*incy]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_C__tN_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_float_H__tN_r1_c1_uu_sU_dI_uU(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=0;

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		out[ii*incy]=((alpha)*out[ii*incy]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_H__tN_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_H__tN_r1_c1_uu_sU_dI_uL(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=0;
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii && j!=i)))
				break;
			ax += VA[n]*out[j*incy];
		}

		out[ii*incy]=((alpha)*out[ii*incy]-ax);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_H__tN_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_float_C__tT_r1_c1_uu_sU_dI_uU(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_C__tT_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_C__tT_r1_c1_uu_sU_dI_uL(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_C__tT_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_float_H__tT_r1_c1_uu_sU_dI_uU(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_H__tT_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_H__tT_r1_c1_uu_sU_dI_uL(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_H__tT_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_float_C__tC_r1_c1_uu_sU_dI_uU(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_C__tC_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_C__tC_r1_c1_uu_sU_dI_uL(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_C__tC_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}


rsb_err_t rsb__BCOR_spsv_sxsx_float_H__tC_r1_c1_uu_sU_dI_uU(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;
	for(n=0,ii=0;RSB_LIKELY(ii<Mdim);++ii)
	{
		float ax;
		ax=out[ii*incy];
		for(;RSB_LIKELY(n<nnz);++n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_H__tC_r1_c1_uu_sU_dI_uU\n");
	return RSB_ERR_NO_ERROR;
}

rsb_err_t rsb__BCOR_spsv_sxsx_float_H__tC_r1_c1_uu_sU_dI_uL(const float * restrict VA, const float * restrict rhs, float * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz,const float * restrict alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy)
{
	/**
	 * \ingroup rsb_doc_kernels
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
	const float alpha=*alphap;	rsb_coo_idx_t ii;

	for(n=nnz-1,ii=Mdim-1;RSB_LIKELY(ii+1>0) ;--ii)
	{
		float ax;
		ax=out[ii*incy];

		for(;RSB_LIKELY(n+1>0);--n)
		{
			i=IA[n];
			j=JA[n];
			if(RSB_UNLIKELY(!(i==ii )))
				break;
			out[j*incy]-=VA[n]*ax;
		}

		out[ii*incy]=((alpha)*out[ii*incy]);
	}
	if(rsb__getenv_int_t("RSB_VERBOSE_KERNELS",0))RSB_STDOUT("in rsb__BCOR_spsv_sxsx_float_H__tC_r1_c1_uu_sU_dI_uL\n");
	return RSB_ERR_NO_ERROR;
}




rsb_err_t rsb__BCOR_infty_norm_float_C__tN_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tN_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tT_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tT_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tC_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tC_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tN_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tN_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tT_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tT_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tC_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tC_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tN_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tN_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tT_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tT_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tC_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tC_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tN_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tN_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tT_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tT_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tC_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tC_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tN_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tN_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tT_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tT_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tC_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tC_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tN_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tN_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tT_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tT_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_C__tC_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_infty_norm_float_H__tC_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{\infty} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{j=0}^{mdim} A_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=fabsf(VA[n]);
		if( roff+i != coff+j )
			row_sums[coff+j]+=fabsf(VA[n]);
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tN_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tN_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tT_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tT_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tC_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tC_r1_c1_uu_sU_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tN_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tN_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tT_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tT_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tC_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tC_r1_c1_uu_sS_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tN_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tN_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tT_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tT_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tC_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tC_r1_c1_uu_sH_dE_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tN_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tN_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		row_sums[roff+i]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tT_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tT_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tC_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tC_r1_c1_uu_sU_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		j=JA[n];
		row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tN_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tN_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tT_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tT_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tC_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tC_r1_c1_uu_sS_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tN_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tN_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tT_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tT_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_C__tC_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_coo_idx_t i=0,j=0;
	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr, *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_rowssums_float_H__tC_r1_c1_uu_sH_dI_uG(const float * VA, float * row_sums, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$ \|A\|_{1} \f$ (or rather, \f$ row\_sums_i \leftarrow \sum_{i=0}^{Mdim} A^{T}_{ij} ), where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	register rsb_half_idx_t i=0,j=0;
	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr, *JA=(const rsb_half_idx_t*)bindx;
	register rsb_nnz_idx_t n=0;
					for(n=0;RSB_LIKELY(n<nnz);++n)
	{
		i=IA[n];
		j=JA[n];
		row_sums[roff+i]+=VA[n];
		if( roff+i != coff+j )
			row_sums[coff+j]+=VA[n];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tN_r1_c1_uu_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tN_r1_c1_uu_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tT_r1_c1_uu_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tT_r1_c1_uu_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tC_r1_c1_uu_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tC_r1_c1_uu_sU_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tN_r1_c1_uu_sS_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tN_r1_c1_uu_sS_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tT_r1_c1_uu_sS_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tT_r1_c1_uu_sS_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tC_r1_c1_uu_sS_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tC_r1_c1_uu_sS_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tN_r1_c1_uu_sH_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tN_r1_c1_uu_sH_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tT_r1_c1_uu_sH_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tT_r1_c1_uu_sH_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tC_r1_c1_uu_sH_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tC_r1_c1_uu_sH_dE_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tN_r1_c1_uu_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tN_r1_c1_uu_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tT_r1_c1_uu_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tT_r1_c1_uu_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tC_r1_c1_uu_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tC_r1_c1_uu_sU_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A \neq A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tN_r1_c1_uu_sS_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tN_r1_c1_uu_sS_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tT_r1_c1_uu_sS_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tT_r1_c1_uu_sS_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tC_r1_c1_uu_sS_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tC_r1_c1_uu_sS_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^T.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tN_r1_c1_uu_sH_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *IA=(const rsb_coo_idx_t*)bpntr;
	register rsb_coo_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tN_r1_c1_uu_sH_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *IA=(const rsb_half_idx_t*)bpntr;
	register rsb_half_idx_t i=0;	
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			i=IA[n];
		VA[n]*=scale_factors[i];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tT_r1_c1_uu_sH_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tT_r1_c1_uu_sH_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_C__tC_r1_c1_uu_sH_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_coo_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_coo_idx_t *JA=(const rsb_coo_idx_t*)bindx;
	register rsb_coo_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_scale_float_H__tC_r1_c1_uu_sH_dI_uG(float * VA, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_half_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz, const float *scale_factors)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$A \leftarrow A\cdot P, P_{ii}=s_{i}, where A == A^H.\f$
         * A blocked 1 x 1, stored in BCOR format, diagonal implicit, of type float, with rsb_half_idx_t column indices.
	 * \return \rsb_errval_inp_param_msg
	 */

	const rsb_half_idx_t *JA=(const rsb_half_idx_t*)bindx;
	register rsb_half_idx_t j=0;
	register rsb_nnz_idx_t n=0;
		for(n=0;RSB_LIKELY(n<nnz);++n)
	{
			j=JA[n];
		VA[n]*=scale_factors[j];
	}
	return RSB_ERR_NO_ERROR;
}



rsb_err_t rsb__BCOR_spmv_uaua_float_complex_C__tN_r1_c1_uu_sU_dE_uG(const float complex * restrict VA, const float complex * restrict rhs, float complex * restrict out, const rsb_coo_idx_t  Mdim,const rsb_coo_idx_t  mdim,const rsb_coo_idx_t * restrict bindx,const rsb_nnz_idx_t * restrict bpntr,const rsb_nnz_idx_t *restrict indptr,const rsb_coo_idx_t * restrict rpntr,const rsb_coo_idx_t * restrict cpntr,const rsb_coo_idx_t br,const rsb_coo_idx_t bc,const rsb_coo_idx_t roff,const rsb_coo_idx_t coff,const rsb_flags_t flags,const rsb_nnz_idx_t nnz)
{
	/**
	 * \ingroup rsb_doc_kernels
	 * Computes \f$y \leftarrow y + {A} \cdot x, where A \neq A^T. \f$
         * A blocked 1 x 1, stored in BCOR format, diagonal explicit, of type float complex, with rsb_coo_idx_t column indices.
	 * \return \rsb