/* * spGPU - Sparse matrices on GPU library. * * Copyright (C) 2010 - 2015 * Davide Barbieri - University of Rome Tor Vergata * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * version 3 as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #define IDX2 #define THREAD_BLOCK 128 #define MMBSZ 8 #if 0 __global__ void CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn) (int count, VALUE_TYPE *z, int zPitch, const VALUE_TYPE *y, int yPitch, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, int xPitch, VALUE_TYPE beta, int baseIndex) { VALUE_TYPE *pz,*px,*py; VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); VALUE_TYPE yVal; __shared__ VALUE_TYPE temp[MMBSZ][THREAD_BLOCK]; int *rrP; VALUE_TYPE *rcM; unsigned int i = threadIdx.x + blockIdx.x * blockDim.x; unsigned int gridSize = gridDim.x * blockDim.x; while (i < rows) { int j; int hackId = i / hackSize; int hackLaneId = i % hackSize; int hackOffset; unsigned int laneId = threadIdx.x % 32; if (laneId == 0) hackOffset = hackOffsets[hackId]; //__syncthreads(); hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0) + hackLaneId; rrP = (int *) rP + hackOffset; rcM = (VALUE_TYPE *) cM + hackOffset; int rowSize = rS[i]; for (int k=0; kcurrentStream >>> (count, z, zPitch,y, yPitch, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, xPitch, beta, baseIndex); }