@milton yeah sure
So here is the function I am trying to run in pure C++ runtime:
@nb.cfunc(
nb.types.void(nb.types.CPointer(nb.types.intc), nb.types.intc),
nopython=True,
)
def my_cfunc(array_ptr, m):
a = nb.carray(array_ptr, (m,))
a += 1
So my question is: Is the C function pointed by my_cfunc.address completely free from Python? Or does it still need to do things like converting Python objects to pure C objects?
Full generated LLVM IR:
; ModuleID = 'my_cfunc'
source_filename = "<string>"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-grtev4-linux-gnu"
@_ZN08NumbaEnv8__main__8my_cfuncB2v1B52c8tJTIeFIjxB2IKSgI4CrvQClUYkACQB1EiFSRSVgFmaAA_3d_3dE8int32_2ai = common local_unnamed_addr global ptr null
@PyExc_StopIteration = external global i8
@PyExc_SystemError = external global i8
@".const.unknown error when calling native function" = internal constant [43 x i8] c"unknown error when calling native function\00"
@".const.<numba.core.cpu.CPUContext object at 0x1266fc441610>" = internal constant [53 x i8] c"<numba.core.cpu.CPUContext object at 0x1266fc441610>\00"
@_ZN08NumbaEnv5numba2np8arrayobj11impl_carray12_3clocals_3e4implB2v2B40c8tJTIeFIjxB2IKSgI4CrvQClcaMQ5hEUQmYpQkAE8int32_2a8UniTupleIiLi1EE27omitted_28default_3dNone_29 = common local_unnamed_addr global ptr null
define noundef range(i32 -1, -2) i32 @_ZN8__main__8my_cfuncB2v1B52c8tJTIeFIjxB2IKSgI4CrvQClUYkACQB1EiFSRSVgFmaAA_3d_3dE8int32_2ai(ptr noalias writeonly captures(none) %retptr, ptr noalias readnone captures(none) %excinfo, ptr captures(none) %arg.array_ptr, i32 %arg.m) local_unnamed_addr {
B0.endif:
%arg.m.fr = freeze i32 %arg.m
%.12.i = sext i32 %arg.m.fr to i64
tail call void @NRT_incref(ptr null)
%.8429 = icmp sgt i32 %arg.m.fr, 0
br i1 %.8429, label %for.body.lr.ph, label %common.ret
for.body.lr.ph: ; preds = %B0.endif
%0 = trunc i64 %.12.i to i32
%.86.not = icmp eq i32 %0, 1
br i1 %.86.not, label %for.end.loopexit, label %iter.check
iter.check: ; preds = %for.body.lr.ph
%1 = trunc i64 %.12.i to i32
%min.iters.check = icmp ult i32 %1, 4
br i1 %min.iters.check, label %for.body.us.preheader, label %vector.main.loop.iter.check
vector.main.loop.iter.check: ; preds = %iter.check
%2 = trunc i64 %.12.i to i32
%min.iters.check3 = icmp ult i32 %2, 32
br i1 %min.iters.check3, label %vec.epilog.ph, label %vector.ph
vector.ph: ; preds = %vector.main.loop.iter.check
%n.vec = and i64 %.12.i, 2147483616
%3 = lshr i64 %.12.i, 5
%4 = trunc i64 %3 to i26
%5 = zext i26 %4 to i64
%6 = shl nuw nsw i64 %5, 7
br label %vector.body
vector.body: ; preds = %vector.body, %vector.ph
%lsr.iv = phi i64 [ %lsr.iv.next, %vector.body ], [ 0, %vector.ph ]
%sunkaddr = getelementptr i8, ptr %arg.array_ptr, i64 %lsr.iv
%wide.load = load <8 x i32>, ptr %sunkaddr, align 4
%sunkaddr22 = getelementptr i8, ptr %arg.array_ptr, i64 %lsr.iv
%sunkaddr23 = getelementptr i8, ptr %sunkaddr22, i64 32
%wide.load4 = load <8 x i32>, ptr %sunkaddr23, align 4
%sunkaddr24 = getelementptr i8, ptr %arg.array_ptr, i64 %lsr.iv
%sunkaddr25 = getelementptr i8, ptr %sunkaddr24, i64 64
%wide.load5 = load <8 x i32>, ptr %sunkaddr25, align 4
%sunkaddr26 = getelementptr i8, ptr %arg.array_ptr, i64 %lsr.iv
%sunkaddr27 = getelementptr i8, ptr %sunkaddr26, i64 96
%wide.load6 = load <8 x i32>, ptr %sunkaddr27, align 4
%7 = add <8 x i32> %wide.load, splat (i32 1)
%8 = add <8 x i32> %wide.load4, splat (i32 1)
%9 = add <8 x i32> %wide.load5, splat (i32 1)
%10 = add <8 x i32> %wide.load6, splat (i32 1)
store <8 x i32> %7, ptr %sunkaddr, align 4
store <8 x i32> %8, ptr %sunkaddr23, align 4
store <8 x i32> %9, ptr %sunkaddr25, align 4
store <8 x i32> %10, ptr %sunkaddr27, align 4
%lsr.iv.next = add nuw nsw i64 %lsr.iv, 128
%11 = icmp eq i64 %6, %lsr.iv.next
br i1 %11, label %middle.block, label %vector.body, !llvm.loop !0
middle.block: ; preds = %vector.body
%cmp.n = icmp eq i64 %n.vec, %.12.i
br i1 %cmp.n, label %common.ret, label %vec.epilog.iter.check
vec.epilog.iter.check: ; preds = %middle.block
%12 = trunc i64 %.12.i to i32
%13 = and i32 %12, 28
%min.epilog.iters.check = icmp eq i32 %13, 0
br i1 %min.epilog.iters.check, label %for.body.us.preheader, label %vec.epilog.ph
vec.epilog.ph: ; preds = %vec.epilog.iter.check, %vector.main.loop.iter.check
%vec.epilog.resume.val = phi i64 [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ]
%n.vec8 = and i64 %.12.i, 2147483644
br label %vec.epilog.vector.body
vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %vec.epilog.ph
%index9 = phi i64 [ %vec.epilog.resume.val, %vec.epilog.ph ], [ %index.next11, %vec.epilog.vector.body ]
%14 = shl i64 %index9, 2
%scevgep13 = getelementptr i8, ptr %arg.array_ptr, i64 %14
%wide.load10 = load <4 x i32>, ptr %scevgep13, align 4
%15 = add <4 x i32> %wide.load10, splat (i32 1)
store <4 x i32> %15, ptr %scevgep13, align 4
%index.next11 = add nuw i64 %index9, 4
%16 = icmp eq i64 %n.vec8, %index.next11
br i1 %16, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !llvm.loop !3
vec.epilog.middle.block: ; preds = %vec.epilog.vector.body
%cmp.n12 = icmp eq i64 %n.vec8, %.12.i
br i1 %cmp.n12, label %common.ret, label %for.body.us.preheader
for.body.us.preheader: ; preds = %vec.epilog.iter.check, %vec.epilog.middle.block, %iter.check
%loop.index31.us.ph = phi i64 [ %n.vec, %vec.epilog.iter.check ], [ 0, %iter.check ], [ %n.vec8, %vec.epilog.middle.block ]
br label %for.body.us
for.body.us: ; preds = %for.body.us.preheader, %for.body.us
%loop.index31.us = phi i64 [ %.102.us, %for.body.us ], [ %loop.index31.us.ph, %for.body.us.preheader ]
%17 = shl i64 %loop.index31.us, 2
%scevgep = getelementptr i8, ptr %arg.array_ptr, i64 %17
%.94.us = load i32, ptr %scevgep, align 4
%.96.us = add i32 %.94.us, 1
store i32 %.96.us, ptr %scevgep, align 4
%.102.us = add nuw nsw i64 %loop.index31.us, 1
%exitcond.not = icmp eq i64 %.12.i, %.102.us
br i1 %exitcond.not, label %common.ret, label %for.body.us, !llvm.loop !4
common.ret: ; preds = %for.body.us, %middle.block, %vec.epilog.middle.block, %B0.endif, %for.end.loopexit
tail call void @NRT_decref(ptr null)
tail call void @NRT_decref(ptr null)
store ptr null, ptr %retptr, align 8
ret i32 0
for.end.loopexit: ; preds = %for.body.lr.ph
%.94.pre = load i32, ptr %arg.array_ptr, align 4
%.96 = add i32 %.94.pre, 1
store i32 %.96, ptr %arg.array_ptr, align 4
br label %common.ret
}
define ptr @cfunc._ZN8__main__8my_cfuncB2v1B52c8tJTIeFIjxB2IKSgI4CrvQClUYkACQB1EiFSRSVgFmaAA_3d_3dE8int32_2ai(ptr captures(none) %.1, i32 %.2) local_unnamed_addr {
entry:
%.4 = alloca ptr, align 8
store ptr null, ptr %.4, align 8
%.8 = call i32 @_ZN8__main__8my_cfuncB2v1B52c8tJTIeFIjxB2IKSgI4CrvQClUYkACQB1EiFSRSVgFmaAA_3d_3dE8int32_2ai(ptr nonnull %.4, ptr nonnull poison, ptr %.1, i32 %.2) #1
%.18 = load ptr, ptr %.4, align 8
%.20 = alloca i32, align 4
store i32 0, ptr %.20, align 4
%cond = icmp eq i32 %.8, 0
br i1 %cond, label %common.ret, label %entry.if
entry.if: ; preds = %entry
%.16 = icmp sgt i32 %.8, 0
call void @numba_gil_ensure(ptr nonnull %.20)
br i1 %.16, label %entry.if.if, label %entry.if.endif
common.ret: ; preds = %entry, %.23
ret ptr %.18
.23: ; preds = %entry.if.endif.endif.endif, %entry.if.endif.if, %entry.if.endif
%.71 = call ptr @PyUnicode_FromString(ptr nonnull @".const.<numba.core.cpu.CPUContext object at 0x1266fc441610>")
call void @PyErr_WriteUnraisable(ptr %.71)
call void @Py_DecRef(ptr %.71)
call void @numba_gil_release(ptr nonnull %.20)
br label %common.ret
entry.if.if: ; preds = %entry.if
call void @PyErr_Clear()
unreachable
entry.if.endif: ; preds = %entry.if
switch i32 %.8, label %entry.if.endif.endif.endif [
i32 -3, label %entry.if.endif.if
i32 -1, label %.23
]
entry.if.endif.if: ; preds = %entry.if.endif
call void @PyErr_SetNone(ptr nonnull @PyExc_StopIteration)
br label %.23
entry.if.endif.endif.endif: ; preds = %entry.if.endif
call void @PyErr_SetString(ptr nonnull @PyExc_SystemError, ptr nonnull @".const.unknown error when calling native function")
br label %.23
}
declare void @numba_gil_ensure(ptr) local_unnamed_addr
declare ptr @PyUnicode_FromString(ptr) local_unnamed_addr
declare void @PyErr_WriteUnraisable(ptr) local_unnamed_addr
declare void @Py_DecRef(ptr) local_unnamed_addr
declare void @numba_gil_release(ptr) local_unnamed_addr
declare void @PyErr_Clear() local_unnamed_addr
declare void @PyErr_SetString(ptr, ptr) local_unnamed_addr
declare void @PyErr_SetNone(ptr) local_unnamed_addr
; Function Attrs: mustprogress nofree noinline norecurse nounwind willreturn memory(argmem: readwrite)
define linkonce_odr void @NRT_incref(ptr %.1) local_unnamed_addr #0 {
.3:
%.4 = icmp eq ptr %.1, null
br i1 %.4, label %common.ret, label %.3.endif, !prof !5
common.ret: ; preds = %.3.endif, %.3
ret void
.3.endif: ; preds = %.3
%.4.i = atomicrmw add ptr %.1, i64 1 monotonic, align 8
br label %common.ret
}
; Function Attrs: noinline
define linkonce_odr void @NRT_decref(ptr %.1) local_unnamed_addr #1 {
.3:
%.4 = icmp eq ptr %.1, null
br i1 %.4, label %common.ret1, label %.3.endif, !prof !5
common.ret1: ; preds = %.3, %.3.endif
ret void
.3.endif: ; preds = %.3
fence release
%0 = tail call i8 @llvm.x86.atomic.sub.cc.i64(ptr nonnull %.1, i64 1, i32 4)
%1 = trunc i8 %0 to i1
br i1 %1, label %.3.endif.if, label %common.ret1, !prof !5
.3.endif.if: ; preds = %.3.endif
fence acquire
tail call void @NRT_MemInfo_call_dtor(ptr nonnull %.1)
ret void
}
; Function Attrs: nounwind
declare i8 @llvm.x86.atomic.sub.cc.i64(ptr, i64, i32 immarg) #2
declare void @NRT_MemInfo_call_dtor(ptr) local_unnamed_addr
attributes #0 = { mustprogress nofree noinline norecurse nounwind willreturn memory(argmem: readwrite) }
attributes #1 = { noinline }
attributes #2 = { nounwind }
!0 = distinct !{!0, !1, !2}
!1 = !{!"llvm.loop.isvectorized", i32 1}
!2 = !{!"llvm.loop.unroll.runtime.disable"}
!3 = distinct !{!3, !1, !2}
!4 = distinct !{!4, !2, !1}
!5 = !{!"branch_weights", i32 1, i32 99}