import gmpy2
from numba import cuda, types
import numpy as np
@cuda.jit(device=True)
def powmod(a, b, m):
res = 1
while b > 0:
if b & 1:
res = (res * a) % m
a = (a * a) % m
b >>= 1
return res
@cuda.jit(device=True)
def int_to_str(n):
if n == 0:
return "0"
negative = n < 0
n = abs(n)
digits = "0123456789"
result = ""
while n > 0:
digit = n % 10
result = digits[digit] + result
n //= 10
if negative:
result = "-" + result
return result
class GpuFunctions:
@staticmethod
@cuda.jit((types.int64, types.int64, types.int64, types.int64, types.int64, types.int64[::1], types.int64))
def kernel_function(C, min_a, max_a, min_b, max_b, results, batch_size):
idx = cuda.grid(1)
stride = cuda.gridsize(1)
for i in range(idx, batch_size, stride):
a_idx = i // (max_b - min_b + 1)
b_idx = i % (max_b - min_b + 1)
a = min_a + a_idx
b = min_b + b_idx
a_pow_b = powmod(a, b, C)
remainder = C - a_pow_b
closeness = 1 - abs(remainder) / C
offset = i * 500
a_str = int_to_str(a)
b_str = int_to_str(b)
a_pow_b_str = int_to_str(a_pow_b)
remainder_str = int_to_str(remainder)
closeness_str = int_to_str(int(closeness * 1e8))
for j, char in enumerate(a_str):
results[offset + j] = ord(char)
results[offset + len(a_str)] = 0
offset += 100
for j, char in enumerate(b_str):
results[offset + j] = ord(char)
results[offset + len(b_str)] = 0
offset += 100
for j, char in enumerate(a_pow_b_str):
results[offset + j] = ord(char)
results[offset + len(a_pow_b_str)] = 0
offset += 100
for j, char in enumerate(remainder_str):
results[offset + j] = ord(char)
results[offset + len(remainder_str)] = 0
offset += 100
for j, char in enumerate(closeness_str):
results[offset + j] = ord(char)
results[offset + len(closeness_str)] = 0
def execute_kernel(C, batch_size):
results = cuda.device_array(batch_size * 500, dtype=np.int64)
C_int = np.int64(C)
min_a_int = np.int64(2)
max_a_int = np.int64(100)
min_b_int = np.int64(1)
max_b_int = np.int64(50)
batch_size_int = np.int64(batch_size)
threads_per_block = 256
blocks_per_grid = (batch_size + threads_per_block - 1) // threads_per_block
GpuFunctions.kernel_function[blocks_per_grid, threads_per_block](
C_int, min_a_int, max_a_int, min_b_int, max_b_int, results, batch_size_int)
if __name__ == '__main__':
C = 123456789
batch_size = 1024
execute_kernel(C, batch_size)
I am encountering a TypeError: Signature mismatch: 2 argument types given, but function takes 3 arguments error when trying to run a Numba CUDA kernel. The kernel is intended to perform calculations based on multiple parameters.
- Numba version: 0.55.1
- CUDA version: 11.2
- Python version: 3.8.10