@Oyibo
I solved it in the following way.
1. Create a C-DLL which eposes the functions which should be called from Numba
//FileName Wrapper_test.c
// Compile using clang -shared -O3 -IC:\Python\include -LC:\Python\libs Wrapper_test.c -o Wrapper_test.dll
#include "Python.h"
#ifdef _WIN32
# define API __declspec(dllexport)
#else
# define API
#endif
struct bint {
int n;
int derivative;
};
struct complex128 {
double real;
double imag;
};
/* Fetch the address of the given function, as exposed by
a cython module */
static void * import_cython_function(const char *module_name, const char *function_name)
{
PyObject *module, *capi, *cobj;
void *res = NULL;
const char *capsule_name;
module = PyImport_ImportModule(module_name);
if (module == NULL)
return NULL;
capi = PyObject_GetAttrString(module, "__pyx_capi__");
Py_DECREF(module);
if (capi == NULL)
return NULL;
cobj = PyMapping_GetItemString(capi, (char *)function_name);
Py_DECREF(capi);
if (cobj == NULL) {
PyErr_Clear();
PyErr_Format(PyExc_ValueError,
"No function '%s' found in __pyx_capi__ of '%s'",
function_name, module_name);
return NULL;
}
/* 2.7+ => Cython exports a PyCapsule */
capsule_name = PyCapsule_GetName(cobj);
if (capsule_name != NULL) {
res = PyCapsule_GetPointer(cobj, capsule_name);
}
Py_DECREF(cobj);
return res;
}
///////////End of standard declarations////////
///////////Define function pointers////////////
double (*cy_voigt_profile)(double,double,double,int)=0;
struct complex128 (*cy_wofz)(struct complex128,int)=0;
//////////Init has to be called before any other function is called//////
API void init(){
cy_voigt_profile = import_cython_function("scipy.special.cython_special","voigt_profile");
cy_wofz = import_cython_function("scipy.special.cython_special","wofz");
}
//////////Numba compatible function//////////
API double nb_voigt_profile(double in1,double in2,double in3)
{
return cy_voigt_profile(in1,in2,in3,1);
}
API void nb_wofz(double in1_real,double in1_imag,double *out1_real,double *out1_imag)
{
struct complex128 c_in1;
c_in1.real = in1_real;
c_in1.imag = in1_imag;
struct complex128 cout1 = cy_wofz(c_in1,1);
out1_real[0] = cout1.real;
out1_imag[0] = cout1.imag;
}
2. Call wrap the functions in Numba
import numba as nb
from numba.core import types, typing
from llvmlite import binding
from numba import types
from numba.extending import intrinsic
from numba.core import cgutils
@intrinsic
def val_to_ptr(typingctx, data):
def impl(context, builder, signature, args):
ptr = cgutils.alloca_once_value(builder,args[0])
return ptr
sig = types.CPointer(nb.typeof(data).instance_type)(nb.typeof(data).instance_type)
return sig, impl
@intrinsic
def ptr_to_val(typingctx, data):
def impl(context, builder, signature, args):
val = builder.load(args[0])
return val
sig = data.dtype(types.CPointer(data.dtype))
return sig, impl
#####################################################################
binding.load_library_permanently('Wrapper_test.dll')
c_sig = types.void()
nb_init = types.ExternalFunction('init', c_sig)
c_sig = types.double(types.double, types.double, types.double)
nb_voigt_profile = types.ExternalFunction('nb_voigt_profile', c_sig)
c_sig = types.void(types.double, types.double, types.CPointer(types.double), types.CPointer(types.double))
nb_wofz = types.ExternalFunction('nb_wofz', c_sig)
c_sig = types.void(types.CPointer(types.double), types.CPointer(types.double), types.CPointer(types.double), types.CPointer(types.double))
nb_wofz_c = types.ExternalFunction('nb_wofz_c', c_sig)
########Has to be called first#####
nb_init()
###################################
@nb.njit(parallel=False)
def numba_wofz(in1):
out_real = val_to_ptr(nb.double(0.))
out_imag = val_to_ptr(nb.double(0.))
nb_wofz(in1.real, in1.imag, out_real, out_imag)
return ptr_to_val(out_real) + 1j * ptr_to_val(out_imag)
With this method functions which are using complex numbers can be wrapped and caching is possible. Of course don’t foerget to call the init mehtod first.
It would be very interesting to get to a solution where global varibales can be set with some simple Python code, in this case this are function pointer adresses and wrap this functions in a more direct way to Numba.
Returning structures is platfrom dependend, although an implementation for one major platform (x86) would be very interesting.
For example the bitcode vom Clang looks like
source_filename = "Wrapper_test.c"
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc19.35.32216"
%struct.complex128 = type { double, double }
$"??_C@_04IIOAPFIG@wofz?$AA@" = comdat any
@cy_wofz = dso_local local_unnamed_addr global void (%struct.complex128*, %struct.complex128*, i32)* null, align 8
@"??_C@_04IIOAPFIG@wofz?$AA@" = linkonce_odr dso_local unnamed_addr constant [5 x i8] c"wofz\00", comdat, align 1
%152 = tail call fastcc i8* @import_cython_function(i8* noundef getelementptr inbounds ([5 x i8], [5 x i8]* @"??_C@_04IIOAPFIG@wofz?$AA@", i64 0, i64 0))
store i8* %152, i8** bitcast (void (%struct.complex128*, %struct.complex128*, i32)** @cy_wofz to i8**), align 8, !tbaa !5
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: argmemonly mustprogress nofree nounwind willreturn
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #2
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: nounwind uwtable
define dso_local dllexport void @nb_wofz_c(%struct.complex128* noalias sret(%struct.complex128) align 8 %0, %struct.complex128* nocapture noundef readonly %1) local_unnamed_addr #0 {
%3 = alloca %struct.complex128, align 8
%4 = load void (%struct.complex128*, %struct.complex128*, i32)*, void (%struct.complex128*, %struct.complex128*, i32)** @cy_wofz, align 8, !tbaa !5
%5 = bitcast %struct.complex128* %3 to i8*
call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %5) #4
%6 = bitcast %struct.complex128* %1 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(16) %5, i8* noundef nonnull align 8 dereferenceable(16) %6, i64 16, i1 false), !tbaa.struct !12
call void %4(%struct.complex128* sret(%struct.complex128) align 8 %0, %struct.complex128* noundef nonnull %3, i32 noundef 1) #4
call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %5) #4
ret void
}