Here’s a portion of the code for kernel 2:
@cuda.jit
def kernel_2(s_matrix, cl_matrix, changed):
row, image_slice = cuda.grid(2) # the position of the row. All positions start at the 0th column
pos_s, pos_l = 0, 0 # the position of the row in the spans and label matrices
pre_s, pre_l = 0, 0 # the previous row
post_s, post_l = 0, 0 # the following row
down_s, down_l = 0, 0 # the downwards row
up_s, up_l = 0, 0 # the upwards row
if row < s_matrix.shape[0] and image_slice < s_matrix.shape[2]: # guard for rows and slices
for column in range(cl_matrix.shape[1]):
current_span_label = cl_matrix[row, pos_l, image_slice]
if current_span_label == -1: # guard for a valid label i.e. it's not a null label
break
current_span_start = s_matrix[row, pos_s, image_slice]
current_span_end = s_matrix[row, pos_s + 1, image_slice]
# previous row
if image_slice > 0: # makes no sense to check the previous row of the first slice
while -1 < s_matrix[row, pre_s, image_slice - 1] < current_span_end:
if current_span_start <= s_matrix[row, pre_s + 1, image_slice - 1] \
and s_matrix[row, pre_s, image_slice - 1] <= current_span_end:
interval_label = cl_matrix[row, pre_l, image_slice - 1]
if interval_label == current_span_label:
pre_s = pre_s + 2
pre_l = pre_l + 1
continue
if interval_label == -1:
break
min_label = min(current_span_label, interval_label)
cuda.atomic.min(cl_matrix, (row, pos_l, image_slice), min_label)
cuda.atomic.min(cl_matrix, (row, pre_l, image_slice - 1), min_label)
cuda.atomic.add(changed, 0, 1)
pre_s = pre_s + 2
pre_l = pre_l + 1
# ...omitted code for other rows
pos_l = pos_l + 1
pos_s = pos_s + 2