CUDA Kernel Signature Mismatch Error

import gmpy2
from numba import cuda, types
import numpy as np

@cuda.jit(device=True)
def powmod(a, b, m):
    res = 1
    while b > 0:
        if b & 1:
            res = (res * a) % m
        a = (a * a) % m
        b >>= 1
    return res

@cuda.jit(device=True)
def int_to_str(n):
    if n == 0:
        return "0"
    negative = n < 0
    n = abs(n)
    digits = "0123456789"
    result = ""
    while n > 0:
        digit = n % 10
        result = digits[digit] + result
        n //= 10
    if negative:
        result = "-" + result
    return result

class GpuFunctions:
    @staticmethod
    @cuda.jit((types.int64, types.int64, types.int64, types.int64, types.int64, types.int64[::1], types.int64))
    def kernel_function(C, min_a, max_a, min_b, max_b, results, batch_size):
        idx = cuda.grid(1)
        stride = cuda.gridsize(1)

        for i in range(idx, batch_size, stride):
            a_idx = i // (max_b - min_b + 1)
            b_idx = i % (max_b - min_b + 1)
            a = min_a + a_idx
            b = min_b + b_idx

            a_pow_b = powmod(a, b, C)
            remainder = C - a_pow_b
            closeness = 1 - abs(remainder) / C

            offset = i * 500
            a_str = int_to_str(a)
            b_str = int_to_str(b)
            a_pow_b_str = int_to_str(a_pow_b)
            remainder_str = int_to_str(remainder)
            closeness_str = int_to_str(int(closeness * 1e8))

            for j, char in enumerate(a_str):
                results[offset + j] = ord(char)
            results[offset + len(a_str)] = 0
            offset += 100
            for j, char in enumerate(b_str):
                results[offset + j] = ord(char)
            results[offset + len(b_str)] = 0
            offset += 100
            for j, char in enumerate(a_pow_b_str):
                results[offset + j] = ord(char)
            results[offset + len(a_pow_b_str)] = 0
            offset += 100
            for j, char in enumerate(remainder_str):
                results[offset + j] = ord(char)
            results[offset + len(remainder_str)] = 0
            offset += 100
            for j, char in enumerate(closeness_str):
                results[offset + j] = ord(char)
            results[offset + len(closeness_str)] = 0

def execute_kernel(C, batch_size):
    results = cuda.device_array(batch_size * 500, dtype=np.int64)
    C_int = np.int64(C)
    min_a_int = np.int64(2)
    max_a_int = np.int64(100)
    min_b_int = np.int64(1)
    max_b_int = np.int64(50)
    batch_size_int = np.int64(batch_size)

    threads_per_block = 256
    blocks_per_grid = (batch_size + threads_per_block - 1) // threads_per_block

    GpuFunctions.kernel_function[blocks_per_grid, threads_per_block](
        C_int, min_a_int, max_a_int, min_b_int, max_b_int, results, batch_size_int)

if __name__ == '__main__':
    C = 123456789
    batch_size = 1024
    execute_kernel(C, batch_size)

I am encountering a TypeError: Signature mismatch: 2 argument types given, but function takes 3 arguments error when trying to run a Numba CUDA kernel. The kernel is intended to perform calculations based on multiple parameters.

  • Numba version: 0.55.1
  • CUDA version: 11.2
  • Python version: 3.8.10

Note - I’ve edited your original post to put the code in triple backticks (```) so that it formats correctly.

The error message is not very helpful here, but the root cause is that you’re attempting to do string operations that would require a memory allocation in the kernel, and memory allocations inside kernels are not presently possible in Numba - if they were it would be possible to just write str(a) instead of using your int_to_str() function to get the number as a string.

I can’t really tell what you’re trying to do but I feel like there should be some way to do it that avoids using strings - can you describe what you’re aiming to implement here?