Program Listing for File jaccard_weights_cuda.cu

Return to documentation for file (src/sparsebase/feature/jaccard_weights_cuda.cu)

#include "sparsebase/feature/jaccard_weights_cuda.cuh"
#include "sparsebase/format/cuda_array_cuda.cuh"
#include "sparsebase/format/cuda_csr_cuda.cuh"

namespace sparsebase::feature {

template <typename IDType, typename NNZType, typename FeatureType>
__global__ void jac_binning_gpu_u_per_grid_bst_kernel(const NNZType *xadj,
                                                      const IDType *adj,
                                                      NNZType n,
                                                      FeatureType *emetrics,
                                                      IDType SM_FAC);
template <typename IDType, typename NNZType, typename ValueType,
          typename FeatureType>
format::CUDAArray<FeatureType> *RunJaccardKernel(
    format::CUDACSR<IDType, NNZType, ValueType> *cuda_csr) {
  context::CUDAContext *gpu_context =
      static_cast<context::CUDAContext *>(cuda_csr->get_context());
  cudaSetDevice(gpu_context->device_id);

  FeatureType *jaccard_weights;
  cudaMalloc(&jaccard_weights, cuda_csr->get_num_nnz() * sizeof(FeatureType));

#define MAX_GRID_DIM 65535
#define WARP_SIZE 32
  dim3 block(1, 1, 1);
  int g = 64;
  int a = 8;
  block.x = g;
  // block.y = max((size_t)1,(size_t)THREADS_PER_BLOCK/block.x);
  if (g < WARP_SIZE) {
    block.y = WARP_SIZE / g;
  }

  dim3 grid(1, 1, 1);
  // grid.x = max((size_t)1,(size_t)a/block.y);
  grid.x = a;
  grid.y = max((int)min((int)cuda_csr->get_dimensions()[0], (int)MAX_GRID_DIM),
               (int)1);
  grid.z =
      (int)min((int)max((int)cuda_csr->get_dimensions()[0] / grid.y, (int)1),
               (int)MAX_GRID_DIM);

  jac_binning_gpu_u_per_grid_bst_kernel<IDType, NNZType, FeatureType>
      <<<grid, block, 0>>>(cuda_csr->get_row_ptr(), cuda_csr->get_col(),
                           (NNZType)cuda_csr->get_dimensions()[0],
                           jaccard_weights, 0);
  cudaDeviceSynchronize();
  auto array = new format::CUDAArray<FeatureType>(
      cuda_csr->get_num_nnz(), jaccard_weights, *gpu_context);
  return array;
}
#define FULL_MASK 0xffffffff
__inline__ __device__ unsigned calculateMask(char length,
                                             unsigned long long thread_id) {
  if (length >= 32) return FULL_MASK;
  // unsigned mask = 0x80000000;
  unsigned mask = 1;
  for (char i = 1; i < length; i++) {
    mask = (mask << 1) | 1;
    //  mask = (mask>>1)|0x80000000;
  }
  int group_in_warp = (thread_id % WARP_SIZE) / length;
  mask = mask << (group_in_warp * length);
  // mask = mask >> (group_in_warp*length);
  return mask;
}
template <typename NNZType, typename IDType>
__device__ inline NNZType bst(const NNZType *__restrict__ xadj,
                              const IDType *__restrict__ adj, IDType neighbor,
                              IDType target) {
  NNZType match = (NNZType)(-1);
  NNZType left = xadj[neighbor] + 1;
  NNZType right = xadj[neighbor + 1];
  IDType curr;
  NNZType middle;
  while (left <= right) {
    middle = ((unsigned long long)left + (unsigned long long)right) >> 1;
    curr = adj[middle - 1];
    if (curr > target) {
      right = middle - 1;
    } else if (curr < target) {
      left = middle + 1;
    } else {
      match = middle - 1;
      break;
    }
  }
  return match;
}
__inline__ __device__ int warpReduce(int val, unsigned int length,
                                     unsigned mask) {
  for (int delta = length / 2; delta > 0; delta /= 2) {
    val += __shfl_down_sync(mask, val, delta, length);
  }
  return val;
}
template <typename IDType, typename NNZType, typename FeatureType>
__global__ void jac_binning_gpu_u_per_grid_bst_kernel(const NNZType *xadj,
                                                      const IDType *adj,
                                                      NNZType n,
                                                      FeatureType *emetrics,
                                                      IDType SM_FAC) {
  const bool directed = false;
  // int no_threads = blockDim.z * blockDim.y * blockDim.x * gridDim.x;
  unsigned int block_local_id = blockDim.x * blockDim.y * threadIdx.z +
                                blockDim.x * threadIdx.y + threadIdx.x;
  unsigned int grid_id =
      gridDim.x * gridDim.y * blockIdx.z + gridDim.x * blockIdx.y + blockIdx.x;
  unsigned long long tid =
      (long long)blockDim.z * blockDim.x * blockDim.y * grid_id +
      (unsigned long long)block_local_id;
  unsigned mask =
      calculateMask(blockDim.x, tid);  // threadIdx.y*blockDim.x+threadIdx.x);

  for (NNZType ptr = blockIdx.y + blockIdx.z * gridDim.y; ptr < n;
       ptr += gridDim.y * gridDim.z) {
    IDType u = ptr;
    NNZType degu = xadj[u + 1] - xadj[u];

    for (NNZType neigh_ptr = threadIdx.y + blockDim.y * threadIdx.z +
                             blockIdx.x * blockDim.y * blockDim.z;
         neigh_ptr < degu; neigh_ptr += blockDim.y * blockDim.z * gridDim.x) {
      IDType v = adj[neigh_ptr + xadj[u]];
      bool skippable = (xadj[v + 1] - xadj[v] < degu ||
                        (xadj[v + 1] - xadj[v] == degu && v > u));
      if (!directed && skippable) continue;
      NNZType other_ptr = bst(xadj, adj, v, u);
      if (directed && other_ptr != (NNZType)-1 && skippable) continue;
      NNZType intersection_size = 0;

      for (NNZType t_ptr = threadIdx.x; t_ptr < degu; t_ptr += blockDim.x) {
        NNZType loc = bst(xadj, adj, v, adj[xadj[u] + t_ptr]);
        intersection_size += (loc != (NNZType)-1);
      }

      intersection_size = warpReduce(intersection_size, blockDim.x, mask);
      // intersection_size = warpReduce(intersection_size, blockDim.x, tid%32,
      // threadIdx.x==0);
      if (threadIdx.x == 0) {
        FeatureType J =
            float(intersection_size) /
            float(degu + (xadj[v + 1] - xadj[v]) - intersection_size);
        emetrics[(xadj[u] + neigh_ptr)] = J;
        if (other_ptr != (NNZType)-1) emetrics[other_ptr] = J;
      }
    }
    //    __syncthreads();
  }
}

#if !defined(_HEADER_ONLY)
#include "init/cuda/jaccard_weights_cuda.inc"
#endif
}  // namespace sparsebase::feature