TypeError running numba code

Hi,

Could someone help me fix this bug im getting running numba code. I have shared the code in reply.

The error is TypeError: Signature mismatch: 2 argument types given, but function takes 3 arguments

But the function definitely has more than 3 arguments (actual is 6).

$ python3 pyrebel_main_abs_to_con2.py --input deep2.png 
len(bound_data)= 31492
count= 17 567
abstraction complete.
Traceback (most recent call last):
  File "/home/ps_nithin/temp2/pyrebel/demo/bug_test/pyrebel_main_abs_to_con2.py", line 90, in <module>
    abs.abstract_to_concrete(1)
  File "/home/ps_nithin/temp2/pyrebel/demo/bug_test/pyrebel/abstract.py", line 536, in abstract_to_concrete
    get_max_dist3[math.ceil(len(ba_size_d)/32),32](ba_size_cum_d,ba_size_d,new_bound_data_d,dist_data_d,max_dist_i_d,max_dist_d)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/cuda/dispatcher.py", line 539, in __call__
    return self.dispatcher.call(args, self.griddim, self.blockdim,
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/cuda/dispatcher.py", line 681, in call
    kernel = _dispatcher.Dispatcher._cuda_call(self, *args)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/cuda/dispatcher.py", line 689, in _compile_for_args
    return self.compile(tuple(argtypes))
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/cuda/dispatcher.py", line 932, in compile
    kernel = _Kernel(self.py_func, argtypes, **self.targetoptions)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
    return func(*args, **kwargs)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/cuda/dispatcher.py", line 83, in __init__
    cres = compile_cuda(self.py_func, types.void, self.argtypes,
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
    return func(*args, **kwargs)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/cuda/compiler.py", line 196, in compile_cuda
    cres = compiler.compile_extra(typingctx=typingctx,
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 739, in compile_extra
    return pipeline.compile_extra(func)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 439, in compile_extra
    return self._compile_bytecode()
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 505, in _compile_bytecode
    return self._compile_core()
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 481, in _compile_core
    raise e
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 473, in _compile_core
    pm.run(self.state)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 363, in run
    raise e
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 356, in run
    self._runPass(idx, pass_inst, state)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
    return func(*args, **kwargs)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 311, in _runPass
    mutated |= check(pss.run_pass, internal_state)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 272, in check
    mangled = func(compiler_state)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typed_passes.py", line 114, in run_pass
    typemap, return_type, calltypes, errs = type_inference_stage(
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typed_passes.py", line 95, in type_inference_stage
    errs = infer.propagate(raise_errors=raise_errors)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typeinfer.py", line 1075, in propagate
    errors = self.constraints.propagate(self)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typeinfer.py", line 160, in propagate
    constraint(typeinfer)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typeinfer.py", line 572, in __call__
    self.resolve(typeinfer, typevars, fnty)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typeinfer.py", line 595, in resolve
    sig = typeinfer.resolve_call(fnty, pos_args, kw_args)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typeinfer.py", line 1569, in resolve_call
    return self.context.resolve_function_type(fnty, pos_args, kw_args)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/context.py", line 279, in resolve_function_type
    res = self._resolve_user_function_type(func, args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/context.py", line 335, in _resolve_user_function_type
    return func.get_call_type(self, args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/types/functions.py", line 314, in get_call_type
    raise e
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/types/functions.py", line 311, in get_call_type
    sig = temp.apply(nolitargs, nolitkws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/templates.py", line 358, in apply
    sig = generic(args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/templates.py", line 621, in generic
    disp, new_args = self._get_impl(args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/templates.py", line 720, in _get_impl
    impl, args = self._build_impl(cache_key, args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/templates.py", line 824, in _build_impl
    disp_type.get_call_type(self.context, args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/types/functions.py", line 541, in get_call_type
    self.dispatcher.get_call_template(args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/cuda/dispatcher.py", line 849, in get_call_template
    self.compile_device(tuple(args))
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/cuda/dispatcher.py", line 883, in compile_device
    cres = compile_cuda(self.py_func, return_type, args,
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
    return func(*args, **kwargs)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/cuda/compiler.py", line 196, in compile_cuda
    cres = compiler.compile_extra(typingctx=typingctx,
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 739, in compile_extra
    return pipeline.compile_extra(func)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 439, in compile_extra
    return self._compile_bytecode()
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 505, in _compile_bytecode
    return self._compile_core()
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 481, in _compile_core
    raise e
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 473, in _compile_core
    pm.run(self.state)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 363, in run
    raise e
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 356, in run
    self._runPass(idx, pass_inst, state)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
    return func(*args, **kwargs)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 311, in _runPass
    mutated |= check(pss.run_pass, internal_state)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 272, in check
    mangled = func(compiler_state)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typed_passes.py", line 114, in run_pass
    typemap, return_type, calltypes, errs = type_inference_stage(
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typed_passes.py", line 95, in type_inference_stage
    errs = infer.propagate(raise_errors=raise_errors)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typeinfer.py", line 1075, in propagate
    errors = self.constraints.propagate(self)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typeinfer.py", line 160, in propagate
    constraint(typeinfer)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typeinfer.py", line 572, in __call__
    self.resolve(typeinfer, typevars, fnty)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typeinfer.py", line 595, in resolve
    sig = typeinfer.resolve_call(fnty, pos_args, kw_args)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typeinfer.py", line 1569, in resolve_call
    return self.context.resolve_function_type(fnty, pos_args, kw_args)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/context.py", line 279, in resolve_function_type
    res = self._resolve_user_function_type(func, args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/context.py", line 335, in _resolve_user_function_type
    return func.get_call_type(self, args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/types/functions.py", line 404, in get_call_type
    raise exc
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/types/functions.py", line 401, in get_call_type
    out = template.apply(args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/templates.py", line 358, in apply
    sig = generic(args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/templates.py", line 1140, in generic
    sig = self._get_signature(self.context, fnty, args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/templates.py", line 1085, in _get_signature
    sig = fnty.get_call_type(typingctx, args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/types/functions.py", line 314, in get_call_type
    raise e
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/types/functions.py", line 311, in get_call_type
    sig = temp.apply(nolitargs, nolitkws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/templates.py", line 358, in apply
    sig = generic(args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/templates.py", line 621, in generic
    disp, new_args = self._get_impl(args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/templates.py", line 720, in _get_impl
    impl, args = self._build_impl(cache_key, args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/typing/templates.py", line 824, in _build_impl
    disp_type.get_call_type(self.context, args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/types/functions.py", line 541, in get_call_type
    self.dispatcher.get_call_template(args, kws)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/cuda/dispatcher.py", line 849, in get_call_template
    self.compile_device(tuple(args))
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/cuda/dispatcher.py", line 883, in compile_device
    cres = compile_cuda(self.py_func, return_type, args,
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
    return func(*args, **kwargs)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/cuda/compiler.py", line 196, in compile_cuda
    cres = compiler.compile_extra(typingctx=typingctx,
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 739, in compile_extra
    return pipeline.compile_extra(func)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 439, in compile_extra
    return self._compile_bytecode()
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 505, in _compile_bytecode
    return self._compile_core()
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 481, in _compile_core
    raise e
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler.py", line 473, in _compile_core
    pm.run(self.state)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 363, in run
    raise e
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 356, in run
    self._runPass(idx, pass_inst, state)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_lock.py", line 35, in _acquire_compile_lock
    return func(*args, **kwargs)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 311, in _runPass
    mutated |= check(pss.run_pass, internal_state)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/compiler_machinery.py", line 272, in check
    mangled = func(compiler_state)
  File "/home/ps_nithin/.local/lib/python3.10/site-packages/numba/core/untyped_passes.py", line 105, in run_pass
    raise TypeError("Signature mismatch: %d argument types given, "
TypeError: Signature mismatch: 2 argument types given, but function takes 3 arguments

Thanks,

The error appears to be in this code in numba kernel get_max_dist3(nz_s_cum_d,nz_s_d,bound_data_d,dist_data_d,max_dist_i_d,max_dist_d)

# Copyright (C) 2024-2025 Nithin PS.
# This file is part of Pyrebel.
#
# Pyrebel is free software: you can redistribute it and/or modify it under the terms of 
# the GNU General Public License as published by the Free Software Foundation, either 
# version 3 of the License, or (at your option) any later version.
#
# Pyrebel is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
# PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with Pyrebel.
# If not, see <https://www.gnu.org/licenses/>.
#

from pyrebel.getnonzeros import *
import numpy as np
from numba import cuda
import cmath,math
from math import sqrt

@cuda.jit
def get_new_bound_data_order(nz_s_cum_d,nz_s_d,bound_data_d,max_dist_i_d,max_dist_d):
    ci=cuda.grid(1)
    if ci<len(nz_ba_size_d):
        n=nz_ba_size_cum_d[ci]
        s=1
        index=nz_a_max_dist[ci][0]
        index2=nz_a_max_dist[ci][1]
        y=int(index/tmp_img.shape[1])
        x=index%tmp_img.shape[1]
        y2=int(index2/tmp_img.shape[1])
        x2=index2%tmp_img.shape[1]
        
        while 1:
            
            
            if s==nz_ba_size_d[ci]-1:
                break
            if n==nz_ba_size_cum_d[ci]+nz_ba_size_d[ci]-2:
                n=nz_ba_size_cum_d[ci]
            else:
                n+=1
            s+=1


@cuda.jit
def get_max_dist3(nz_s_cum_d,nz_s_d,bound_data_d,dist_data_d,max_dist_i_d,max_dist_d):
    ci=cuda.grid(1)
    if ci<len(nz_s_d):
        n=nz_s_cum_d[ci]
        s=0
        d_max=dist_data_d[n]
        d_max_i=n
        while 1:
            s+=1
            if dist_data_d[n]>d_max:
                d_max=dist_data_d[n]
                d_max_i=n
            if s==nz_s_d[ci]:
                break
            n+=1
        n=nz_s_cum_d[ci]
        s=0
        while 1:
            s+=1
            if dist_data_d[n]==d_max and n!=d_max_i:
                d_max2=dist_data_d[n]
                d_max_i2=n
            if s==nz_s_d[ci]:
                break
            n+=1

        max_dist_i_d[ci][0]=bound_data_d[d_max_i]
        max_dist_i_d[ci][1]=bound_data_d[d_max_i2]
        max_dist_d[ci]=dist_data_d[d_max_i]

@cuda.jit
def get_dist_data_init2(nz_s_d,nz_s_cum_d,bound_data_d,bound_mark_init_d,shape_d,dist_data_d):
    ci=cuda.grid(1)
    if ci<len(bound_data_d):
        index=bound_data_d[ci]
        y=int(index/shape_d[1])
        x=index%shape_d[1]
        n=nz_s_cum_d[bound_mark_init_d[ci]]
        s=0
        d_max=0.0
        while 1:
            s+=1
            index2=bound_data_d[n]
            r=int(index2/shape_d[1])
            c=index2%shape_d[1]
            d_cur=sqrt(float(pow(r-y,2)+pow(c-x,2)))
            if d_cur>d_max:
                d_max=d_cur
            if s==nz_s_d[bound_mark_init_d[ci]]:
                break
            n+=1
        dist_data_d[ci]=d_max

@cuda.jit
def find_ba_max_pd(nz_ba_d,nz_ba_size_d,bound_data_ordered_d,ba_max_pd_d,scaled_shape):
    """Finds the maximum perpendicular distance for each abstract segment."""
    
    ci=cuda.grid(1)
    if ci<len(nz_ba_d)-1:
        if nz_ba_d[ci]+1==nz_ba_d[ci+1]:
            return
        a=bound_data_ordered_d[nz_ba_d[ci]-1]
        b=bound_data_ordered_d[nz_ba_d[ci+1]-1]
        a0=int(a/scaled_shape[1])
        a1=a%scaled_shape[1]
        b0=int(b/scaled_shape[1])
        b1=b%scaled_shape[1]
        #threshold=bound_threshold_d[nz_ba_d[ci]]
        #threshold=cmath.sqrt(float(pow(b0-a0,2)+pow(b1-a1,2))).real/8
        n=nz_ba_d[ci]
        i=0
        pd_max=0.0
        pd_max_i=n
        while 1:
            if n==nz_ba_d[ci+1]:
                break
            c=bound_data_ordered_d[n-1]
            c0=int(c/scaled_shape[1])
            c1=c%scaled_shape[1]
            pd=abs((a1-b1)*(a0-c0)-(a0-b0)*(a1-c1))/cmath.sqrt(pow(a1-b1,2)+pow(a0-b0,2)).real

            if pd>pd_max:
                pd_max=pd
                pd_max_i=n
            n+=1
        ba_max_pd_d[ci][0]=pd_max
        ba_max_pd_d[ci][1]=pd_max_i
        """
        if pd_max>threshold:
            bound_abstract_d[pd_max_i]=pd_max_i
            seed_=bound_mark_d[nz_ba_d[ci]-1]
            #ba_size_d[seed_]+=1
            cuda.atomic.add(ba_size_d,seed_,1)
        """

@cuda.jit
def find_next_ba(ba_max_pd_d,nz_ba_size_d,nz_ba_size_cum_d,bound_abstract_d,ba_threshold,pd):
    """Finds one abstract pixel per boundary / blob."""
    
    ci=cuda.grid(1)
    if ci<len(nz_ba_size_d):
        n=nz_ba_size_cum_d[ci]
        s=1
        d_max=0.0
        d_max_i=n
        while 1:
            if ba_max_pd_d[n][0]>d_max:
                d_max=ba_max_pd_d[n][0]
                d_max_i=int(ba_max_pd_d[n][1])
            if s==nz_ba_size_d[ci]-1:
                break
            s+=1
            n+=1
        cuda.syncthreads()
        if d_max>ba_threshold:
            bound_abstract_d[d_max_i-1]=d_max_i
            nz_ba_size_d[ci]+=1
            pd[0]=d_max

@cuda.jit
def find_next_ba_all(ba_max_pd_d,nz_ba_size_d,nz_ba_size_cum_d,bound_abstract_d,ba_threshold):
    """Finds one abstract pixel for each abstract segment in a boundary / blob."""
    
    ci=cuda.grid(1)
    if ci<len(nz_ba_size_d):
        n=nz_ba_size_cum_d[ci]
        s=1
        ba_added=0
        #d_max=0.0
        #d_max_i=n
        while 1:
            if ba_max_pd_d[n][0]>ba_threshold:
                #d_max=ba_max_pd_d[n][0]
                d_max_i=int(ba_max_pd_d[n][1])
                bound_abstract_d[d_max_i-1]=d_max_i
                ba_added+=1
            if s==nz_ba_size_d[ci]-1:
                break
            s+=1
            n+=1
        cuda.syncthreads()
        cuda.atomic.add(nz_ba_size_d,ci,ba_added)
        #if d_max>ba_threshold:
        #    bound_abstract_d[d_max_i]=d_max_i
        #    nz_ba_size_d[ci]+=1

@cuda.jit
def remove_abstract(nz_ba_size_d,nz_ba_size_cum_d,nz_ba_d,ba_sign_d,new_bound_mark_init_d,pos_neg):
    ci=cuda.grid(1)
    if ci<len(nz_ba_size_d):
        n=nz_ba_size_cum_d[ci]
        s=1
        pos=0
        neg=0
        found_first=False
        first_pos=0
        while 1:
            if ba_sign_d[n]==1:
                pos+=1
            elif ba_sign_d[n]==-1:
                neg+=1
            if not found_first and ba_sign_d[n]!=pos_neg:
                found_first=True
                first_pos=n
                
            if s==nz_ba_size_d[ci]-1:
                break
            s+=1
            n+=1
        
        if pos_neg==1:
            if neg<3:
                return
        elif pos_neg==-1:
            if pos<3:
                return
        n=nz_ba_size_cum_d[ci]
        s=1
            
        ba_removed=0
        #d_max=0.0
        #d_max_i=n
        while 1:
            if ba_sign_d[n]==pos_neg:
                #d_max=ba_max_pd_d[n][0]                
                nz_ba_d[n]=0
                ba_removed+=1
            else:
                new_bound_mark_init_d[n]=ci
                
            if s==nz_ba_size_d[ci]-1:
                nz_ba_d[n+1]=nz_ba_d[first_pos]
                new_bound_mark_init_d[n+1]=new_bound_mark_init_d[first_pos]
                break
            s+=1
            n+=1
        cuda.syncthreads()
        cuda.atomic.add(nz_ba_size_d,ci,-ba_removed)

@cuda.jit
def find_change(nz_ba_size_d,nz_ba_size_cum_d,nz_ba_d,bound_data_ordered_d,scaled_shape,ba_change_d,ba_sign_d):
    """Finds signatures for the current layer of abstraction."""
    
    ci=cuda.grid(1)
    if ci<len(nz_ba_size_d):
        n=nz_ba_size_cum_d[ci]
        s=nz_ba_size_d[ci]-2
        a=bound_data_ordered_d[nz_ba_d[n+s]-1]
        b=bound_data_ordered_d[nz_ba_d[n]-1]
        c=bound_data_ordered_d[nz_ba_d[n+1]-1]
        a0=int(a/scaled_shape[1])
        a1=a%scaled_shape[1]
        b0=int(b/scaled_shape[1])
        b1=b%scaled_shape[1]
        c0=int(c/scaled_shape[1])
        c1=c%scaled_shape[1]
            
        angle_pre=math.atan2(np.float64(a1-b1),np.float64(a0-b0))*180/math.pi
        angle_cur=math.atan2(np.float64(b1-c1),np.float64(b0-c0))*180/math.pi
        diff=angle_diff(angle_pre,angle_cur)
        ba_change_d[n]=diff
        if diff<0:
            ba_sign_d[n]=-1
        elif diff>0:
            ba_sign_d[n]=1
        n=nz_ba_size_cum_d[ci]+1
        s=0
        while 1:
            if s==nz_ba_size_d[ci]-2:
                break
            a=bound_data_ordered_d[nz_ba_d[n+s-1]-1]
            b=bound_data_ordered_d[nz_ba_d[n+s]-1]
            c=bound_data_ordered_d[nz_ba_d[n+s+1]-1]
            a0=int(a/scaled_shape[1])
            a1=a%scaled_shape[1]
            b0=int(b/scaled_shape[1])
            b1=b%scaled_shape[1]
            c0=int(c/scaled_shape[1])
            c1=c%scaled_shape[1]
            
            angle_pre=math.atan2(np.float64(a1-b1),np.float64(a0-b0))*180/math.pi
            angle_cur=math.atan2(np.float64(b1-c1),np.float64(b0-c0))*180/math.pi
            diff=angle_diff(angle_pre,angle_cur)
            ba_change_d[n+s]=diff
            if diff<0:
                ba_sign_d[n+s]=-1
            elif diff>0:
                ba_sign_d[n+s]=1
            s+=1

@cuda.jit
def get_pixels_from_indices(nz_ba_d,bound_data_ordered_d,new_bound_data_d):
    ci=cuda.grid(1)
    if ci<len(nz_ba_d):
        new_bound_data_d[ci]=bound_data_ordered_d[nz_ba_d[ci]-1]

@cuda.jit(device=True)
def angle_diff(a,b):
    """Finds the change in direction between angles 'a' and 'b'."""
    
    diff=b-a
    if diff>180:
        diff=diff-360
    elif diff<-180:
        diff=diff+360
    return diff


class Abstract:
    def __init__(self,bound_data_ordered_h,n_bounds,bound_abstract_h,shape_h,is_closed):
        # Inputs
        self.bound_data_ordered_h=bound_data_ordered_h
        self.n_bounds=n_bounds
        self.init_bound_abstract_h=bound_abstract_h
        self.bound_abstract_h=bound_abstract_h
        self.shape_h=shape_h
        self.is_closed=is_closed
        self.pd=np.full(1,np.inf,dtype=np.float32)
        self.pd_change=0
        if is_closed:
            self.ba_size=np.full(n_bounds,3,dtype=np.int32)
            self.layer_count=3
        else:
            self.ba_size=np.full(n_bounds,2,dtype=np.int32)
            self.layer_count=2
 
        self.nz_ba=get_non_zeros(bound_abstract_h)
        self.ba_sign=[]
        
        #self.ba_size_travel=self.ba_size
        #self.nz_ba_travel=self.nz_ba
        #self.ba_sign_travel=self.ba_sign
        self.new_bound_data=[]
        self.new_nz_s=[]
        
    def do_abstract_all(self,ba_threshold):
        """Finds all layers of abstraction."""
        
        bound_data_ordered_d=cuda.to_device(self.bound_data_ordered_h)
        bound_abstract_d=cuda.to_device(self.bound_abstract_h)
        shape_d=cuda.to_device(self.shape_h)
        nz_ba=get_non_zeros(self.bound_abstract_h)
        nz_ba_d=cuda.to_device(nz_ba)

            
        ba_size_d=cuda.to_device(self.ba_size)
        ba_size_cum_=np.cumsum(self.ba_size)
        ba_size_cum=np.delete(np.insert(ba_size_cum_,0,0),-1)
        ba_size_cum_d=cuda.to_device(ba_size_cum)
        
        ba_max_pd=np.zeros([len(nz_ba),2],np.float64)
        ba_max_pd_d=cuda.to_device(ba_max_pd)
        ba_size_cum_old=ba_size_cum_[-1]

        while 1:
            find_ba_max_pd[math.ceil(len(nz_ba)/32),32](nz_ba_d,ba_size_d,bound_data_ordered_d,ba_max_pd_d,shape_d)
            cuda.synchronize()
            find_next_ba_all[math.ceil(len(self.ba_size)/32),32](ba_max_pd_d,ba_size_d,ba_size_cum_d,bound_abstract_d,ba_threshold)
            cuda.synchronize()


            bound_abstract_h=bound_abstract_d.copy_to_host()
            nz_ba=get_non_zeros(bound_abstract_h)
            nz_ba_d=cuda.to_device(nz_ba)
        
            ba_max_pd=np.zeros([len(nz_ba),2],np.float64)
            ba_max_pd_d=cuda.to_device(ba_max_pd)

            ba_size=ba_size_d.copy_to_host()
            ba_size_cum_=np.cumsum(ba_size)
            ba_size_cum=np.delete(np.insert(ba_size_cum_,0,0),-1)
            ba_size_cum_d=cuda.to_device(ba_size_cum)
            
            if ba_size_cum_[-1]==ba_size_cum_old:
                ba_change=np.zeros([len(nz_ba)],dtype=np.float64)
                ba_change_d=cuda.to_device(ba_change)
                ba_sign=np.zeros([len(nz_ba)],dtype=np.int32)
                ba_sign_d=cuda.to_device(ba_sign)
                find_change[math.ceil(len(nz_ba)/32),32](ba_size_d,ba_size_cum_d,nz_ba_d,bound_data_ordered_d,shape_d,ba_change_d,ba_sign_d)
                cuda.synchronize()
                ba_change_h=ba_change_d.copy_to_host()
                ba_sign_h=ba_sign_d.copy_to_host()
                print("count=",self.layer_count,ba_size_cum_[-1])
                print("abstraction complete.")
                break
            else:
                ba_size_cum_old=ba_size_cum_[-1]
                self.layer_count+=1
        self.ba_size=ba_size
        self.nz_ba=nz_ba
        self.ba_sign_h=ba_sign_h
        self.bound_abstract_h=bound_abstract_h
        
        self.ba_size_travel=ba_size
        self.nz_ba_travel=nz_ba
        self.ba_sign_travel=ba_sign_h
        
    def do_abstract_one(self,ba_threshold):
        """Finds one layer of abstraction."""
        
        is_final=False
        bound_data_ordered_d=cuda.to_device(self.bound_data_ordered_h)
        bound_abstract_d=cuda.to_device(self.bound_abstract_h)
        shape_d=cuda.to_device(self.shape_h)
        nz_ba=get_non_zeros(self.bound_abstract_h)
        nz_ba_d=cuda.to_device(nz_ba)
            
        ba_size_d=cuda.to_device(self.ba_size)
        ba_size_cum_=np.cumsum(self.ba_size)
        ba_size_cum=np.delete(np.insert(ba_size_cum_,0,0),-1)
        ba_size_cum_d=cuda.to_device(ba_size_cum)
        
        ba_max_pd=np.zeros([len(nz_ba),2],np.float64)
        ba_max_pd_d=cuda.to_device(ba_max_pd)
        ba_size_cum_old=ba_size_cum_[-1]
        pd=np.zeros(1,dtype=np.float32)
        pd_d=cuda.to_device(pd)
        while 1:
            find_ba_max_pd[math.ceil(len(nz_ba)/32),32](nz_ba_d,ba_size_d,bound_data_ordered_d,ba_max_pd_d,shape_d)
            cuda.synchronize()
            find_next_ba[math.ceil(len(self.ba_size)/32),32](ba_max_pd_d,ba_size_d,ba_size_cum_d,bound_abstract_d,ba_threshold,pd_d)
            cuda.synchronize()

            bound_abstract_h=bound_abstract_d.copy_to_host()
            nz_ba=get_non_zeros(bound_abstract_h)
            nz_ba_d=cuda.to_device(nz_ba)
        
            ba_max_pd=np.zeros([len(nz_ba),2],np.float64)
            ba_max_pd_d=cuda.to_device(ba_max_pd)

            ba_size=ba_size_d.copy_to_host()
            ba_size_cum_=np.cumsum(ba_size)
            ba_size_cum=np.delete(np.insert(ba_size_cum_,0,0),-1)
            ba_size_cum_d=cuda.to_device(ba_size_cum)
            
            if ba_size_cum_[-1]==ba_size_cum_old:
                is_final=True
                print("abstraction complete.")
            else:
                self.layer_count+=1
            ba_change=np.zeros([len(nz_ba)],dtype=np.float64)
            ba_change_d=cuda.to_device(ba_change)
            ba_sign=np.zeros([len(nz_ba)],dtype=np.int32)
            ba_sign_d=cuda.to_device(ba_sign)
            find_change[math.ceil(len(nz_ba)/32),32](ba_size_d,ba_size_cum_d,nz_ba_d,bound_data_ordered_d,shape_d,ba_change_d,ba_sign_d)
            cuda.synchronize()
            ba_change_h=ba_change_d.copy_to_host()
            ba_sign_h=ba_sign_d.copy_to_host()
            #print("count=",self.layer_count,ba_size_cum_[-1])
        
            ba_size_cum_old=ba_size_cum_[-1]
            break
        self.ba_size=ba_size
        self.nz_ba=nz_ba
        self.ba_sign_h=ba_sign_h
        self.bound_abstract_h=bound_abstract_h
        
        self.ba_size_travel=ba_size
        self.nz_ba_travel=nz_ba
        self.ba_sign_travel=ba_sign_h
        
        return is_final
    
    def travel_abstract(self,pos_neg):
        bound_data_ordered_d=cuda.to_device(self.bound_data_ordered_h)
        shape_d=cuda.to_device(self.shape_h)
        
        ba_size_d=cuda.to_device(self.ba_size_travel)
        ba_size_cum_=np.cumsum(self.ba_size_travel)
        ba_size_cum=np.delete(np.insert(ba_size_cum_,0,0),-1)
        ba_size_cum_d=cuda.to_device(ba_size_cum)
        
        nz_ba_d=cuda.to_device(self.nz_ba_travel)
        ba_sign_d=cuda.to_device(self.ba_sign_travel)
        new_bound_mark_init_d=cuda.to_device(np.full(len(self.nz_ba_travel),-1,dtype=np.int32))
        remove_abstract[math.ceil(len(self.nz_ba_travel)/32),32](ba_size_d,ba_size_cum_d,nz_ba_d,ba_sign_d,new_bound_mark_init_d,pos_neg)
        cuda.synchronize()
        nz_ba_h=get_non_zeros(nz_ba_d.copy_to_host())
        nz_ba_d=cuda.to_device(nz_ba_h)
        
        ba_size_h=ba_size_d.copy_to_host()
        ba_size_cum_=np.cumsum(ba_size_h)
        ba_size_cum=np.delete(np.insert(ba_size_cum_,0,0),-1)
        ba_size_cum_d=cuda.to_device(ba_size_cum)
        
        ba_change=np.zeros([len(nz_ba_h)],dtype=np.float64)
        ba_change_d=cuda.to_device(ba_change)
        ba_sign=np.zeros([len(nz_ba_h)],dtype=np.int32)
        ba_sign_d=cuda.to_device(ba_sign)
        find_change[math.ceil(len(nz_ba_h)/32),32](ba_size_d,ba_size_cum_d,nz_ba_d,bound_data_ordered_d,shape_d,ba_change_d,ba_sign_d)
        cuda.synchronize()
        print(nz_ba_h)
        self.ba_size_travel=ba_size_h
        self.nz_ba_travel=nz_ba_h
        self.ba_sign_travel=ba_sign_d.copy_to_host()
    
    def abstract_to_concrete(self,pos_neg):
        bound_data_ordered_d=cuda.to_device(self.bound_data_ordered_h)
        shape_d=cuda.to_device(self.shape_h)
        
        ba_size_d=cuda.to_device(self.ba_size_travel)
        ba_size_cum_=np.cumsum(self.ba_size_travel)
        ba_size_cum=np.delete(np.insert(ba_size_cum_,0,0),-1)
        ba_size_cum_d=cuda.to_device(ba_size_cum)
        
        nz_ba_d=cuda.to_device(self.nz_ba_travel)
        new_bound_mark_init_d=cuda.to_device(np.full(len(self.nz_ba_travel),-1,dtype=np.int32))
        ba_sign_d=cuda.to_device(self.ba_sign_travel)
        remove_abstract[math.ceil(len(self.nz_ba_travel)/32),32](ba_size_d,ba_size_cum_d,nz_ba_d,ba_sign_d,new_bound_mark_init_d,pos_neg)
        cuda.synchronize()
        nz_ba_h=get_non_zeros(nz_ba_d.copy_to_host())
        nz_ba_d=cuda.to_device(nz_ba_h)
        ba_size_h=ba_size_d.copy_to_host()
        ba_size_cum_=np.cumsum(ba_size_h)
        ba_size_cum=np.delete(np.insert(ba_size_cum_,0,0),-1)
        ba_size_cum_d=cuda.to_device(ba_size_cum)
        new_bound_data=np.zeros(len(nz_ba_h),dtype=np.int32)
        new_bound_data_d=cuda.to_device(new_bound_data)
        get_pixels_from_indices[math.ceil(len(new_bound_data)/32),32](nz_ba_d,bound_data_ordered_d,new_bound_data_d)
        cuda.synchronize()
        #print(ba_size_h)
        #print(nz_ba_h)
        dist_data_d=cuda.device_array([ba_size_cum_[-1],1],dtype=np.float64)
        get_dist_data_init2[math.ceil(ba_size_cum_[-1]/32),32](ba_size_d,ba_size_cum_d,new_bound_data_d,new_bound_mark_init_d,shape_d,dist_data_d)
        cuda.synchronize()
        
        max_dist_i_d=cuda.device_array([len(ba_size_h),2],dtype=np.int32)
        max_dist_d=cuda.device_array([len(ba_size_h),1],dtype=np.float64)
        get_max_dist3[math.ceil(len(ba_size_d)/32),32](ba_size_cum_d,ba_size_d,new_bound_data_d,dist_data_d,max_dist_i_d,max_dist_d)
        cuda.synchronize()
        
        self.new_bound_mark_init_h=get_non_zeros(new_bound_mark_init_d.copy_to_host(),-1)
        self.new_bound_data=new_bound_data_d.copy_to_host()
        self.new_nz_s=ba_size_h
        #print(self.new_bound_mark_init_h)
        
    
    def get_travel_abstract(self):
        return self.nz_ba_travel
        
    def get_travel_size(self):
        return self.ba_size_travel
        
    def get_travel_sign(self):
        return self.ba_sign_travel
    
    def get_sign(self):
        """Returns signatures for the current layer of abstraction."""
        return self.ba_sign_h
    
    def reset_abstract(self):
        """Resets abstraction."""
        if self.is_closed:
            self.ba_size=np.full(self.n_bounds,3,dtype=np.int32)
            self.layer_count=3
        else:
            self.ba_size=np.full(self.n_bounds,2,dtype=np.int32)
            self.layer_count=2
        self.bound_abstract_h=self.init_bound_abstract_h
        self.nz_ba=get_non_zeros(self.init_bound_abstract_h)
        self.ba_sign_h=[]
        
    def get_abstract(self):
        """Returns the current layer of abstraction."""
        return self.nz_ba
    
    def get_pd_change(self):
        """Returns change in perpendicular distance for the current layer of abstraction."""
        return self.pd_change
        
    def get_pd(self):
        """Returns perpendicular distance for the current layer of abstraction."""
        return self.pd
        
    def get_abstract_size(self):
        """Returns the number of abstract pixels for each boundary."""
        return self.ba_size

The main script pyrebel_main_abs_to_con2.py being

# Copyright (C) 2024-2025 Nithin PS.
# This file is part of Pyrebel.
#
# Pyrebel is free software: you can redistribute it and/or modify it under the terms of 
# the GNU General Public License as published by the Free Software Foundation, either 
# version 3 of the License, or (at your option) any later version.
#
# Pyrebel is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
# PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with Pyrebel.
# If not, see <https://www.gnu.org/licenses/>.
#

import numpy as np
from PIL import Image
import math,argparse,time
from pyrebel.preprocess import Preprocess
from pyrebel.abstract import Abstract
from pyrebel.utils import *

# This is a demo of abstraction of boundaries of blobs in the image.
# When you run this program the output is written to 'output.png'.
# The boundaries of blobs is in grey color and the abstract points are in white.
# The level of abstraction can be changed by giving '--threshold' argument. 
# The default value of threshold is 5.

parser=argparse.ArgumentParser()
parser.add_argument("-i","--input",help="Input file name.")
parser.add_argument("-t","--abs_threshold",help="Threshold of abstraction.")
args=parser.parse_args()
if args.abs_threshold:
    abs_threshold=int(args.abs_threshold)
else:
    abs_threshold=5
        
while 1:
    start_time=time.time()    
    if args.input:
        img_array=np.array(Image.open(args.input).convert('L'))
    else:
        print("No input file.")
    
    # Initialize the preprocessing class.
    pre=Preprocess(img_array)
    
    # Set the minimum and maximum size of boundaries of blobs in the image. Defaults to a minimum of 64.
    pre.set_bound_size(32)
    
    # Perform the preprocessing to get 1D array containing boundaries of blobs in the image.
    pre.preprocess_image()
    
    # Get the 1D array.
    bound_data=pre.get_bound_data()
    bound_data_d=cuda.to_device(bound_data)
    bound_mark=pre.get_bound_mark()
    bound_mark_d=cuda.to_device(bound_mark)
    
    # Initialize the abstract boundary.
    init_bound_abstract=pre.get_init_abstract()
    
    # Get 1D array containing size of boundaries of blobs in the array.
    bound_size=pre.get_bound_size()
    print("len(bound_data)=",len(bound_data))
    
    shape_d=cuda.to_device(img_array.shape)
    scaled_shape=[img_array.shape[0]*3,img_array.shape[1]*3]
    scaled_shape_d=cuda.to_device(scaled_shape)
    
    bound_data_orig=np.zeros(len(bound_data),dtype=np.int32)
    bound_data_orig_d=cuda.to_device(bound_data_orig)
    
    scale_down_pixels[len(bound_data),1](bound_data_d,bound_data_orig_d,scaled_shape_d,shape_d,3)
    cuda.synchronize()
    
    # Initialize the abstraction class
    abs=Abstract(bound_data,len(bound_size),init_bound_abstract,scaled_shape,True)
    
    # Get the abstract points
    abs.do_abstract_all(abs_threshold)
    abs_points=abs.get_abstract()
    abs_size=abs.get_abstract_size()
    abs_size_d=cuda.to_device(abs_size)
    #print(abs_size)
    abs_draw=decrement_by_one_cuda(abs_points)
    abs_draw_d=cuda.to_device(abs_draw)
    abs_sign=abs.get_sign()
    #print(abs_sign)
    abs.abstract_to_concrete(1)
    new_bound_data=abs.new_bound_data
    new_nz_s=abs.new_nz_s
    new_bound_data_d=cuda.to_device(new_bound_data)
    
    out_image=np.zeros(scaled_shape,dtype=np.uint8)
    out_image_d=cuda.to_device(out_image)

    # Draw the boundaries to the output image.
    draw_pixels_cuda(new_bound_data_d,200,out_image_d)
    out_image_h=out_image_d.copy_to_host()
    
    # Save the output to disk.
    Image.fromarray(out_image_h).convert('RGB').save("output.png")
    print("Finished in total of",time.time()-start_time,"seconds at",float(1/(time.time()-start_time)),"fps.")
    break

Thanks,

Issue is now resolved.