Basic sum scan algorithm

etienne87 · April 5, 2023, 9:02am

import numpy as np
import math
import numba
from numba import cuda


# basic blelloch
@cuda.jit
def sum_reduction_kernel(array, niter):
    k = cuda.grid(1) 
    n = array.shape[0]
    if k < n:
        for d in range(niter):
            l = k + 2**d # neighbor offset is 1,2,4,8,16,...
            if (k % 2**(d+1))== 0: # sum only if k itself is modulo 2,4,8,16,...
                if l < n: # neighbor might not exist
                    array[k] = array[k] + array[l]
            else:
                break
        cuda.syncthreads()

def sum_reduction(array):
    n = len(array)
    xcu = cuda.to_device(array)

    threads_per_block = 128
    blocks_per_grid = (n + (threads_per_block - 1)) // threads_per_block
    
    niter = n.bit_length()
    sum_reduction_kernel[blocks_per_grid, threads_per_block](xcu, niter)
    xcpu = xcu.copy_to_host()
    print(2**niter)
    print(xcpu[::2**niter])
    return xcu[0]

cpu_sum = lambda x:np.sum(x)


verbose = False
n = 100000
print(n.bit_length(), int(math.ceil(math.log(n,2))))
nchecks = 1
for i in range(nchecks):
#     x = np.random.randn(n)
    x = np.ones((n), dtype=np.float32)
    y = sum_reduction(x)
    y_cpu = cpu_sum(x)
    diff = abs(y_cpu-y)
    if diff > 1e-6:
        print(f'{i}/100 ERROR: ', y, ' ', y_cpu)
    elif verbose:
        print(f'{i}/100 SUCCESS: ', y, ' ', y_cpu)
print('done')

my code works for small arrays (<20000) but not bigger… i do not understand why… i am trying to reproduce the basic sum parallel algorithm

Topic		Replies	Views
Question about Numba CUDA Matrix Multiply example Community Support	1	42	April 12, 2024
Usage of CUDA Python, Linear Algebra on GPU and Computational Code Community Support	7	2854	December 31, 2021
Shared Memory Reduction example assuming 1 block Community Support	2	508	February 24, 2023
CUDA Python - Multiple Threads Operating on Same Array Location Support: How do I do ...?	6	1308	January 21, 2022
Understanding concurrency of streams in numba cuda correctly Numba	2	299	March 17, 2023

Basic sum scan algorithm

Related Topics