hi, I found a curious example of long compilation times. Admittedly, I brought it on myself by using code generation that resulted in a function which is 1500 lines long. What curious is that with a small change, the compilation goes from 30 seconds to not being done after 10 minutes.
The following are two reproducers with 200 lines each. One compiles in 3 seconds and the other one in 20 seconds, and the only difference seems the latter using a 2d array, and the former the flattened 1d version of the 2d array. Ie, arr[i * size + j] vs arr[i, j] type of thing.
In case the background matters, this is an unrolling of a sparse array multiplication, with some extra bits that mean it cannot be done as a usual sparse vector-matrix multiplication.
I am wondering if there’s a bug behind this, or something I should accept a move on with the 1d version (since the 2d version takes too long to compile).
Thanks,
Luk
@njit
def sp_dot(t, pol_data, state_vec, dec_data, out):
out[:246] = state_vec[:]
out[247] = dec_data[0][fc_0(t, pol_data)]*state_vec[0]
out[1] += out[247]
out[0] -= out[247]
out[248] = dec_data[1][fc_1(t, pol_data)]*state_vec[0]
out[2] += out[248]
out[0] -= out[248]
out[249] = dec_data[2][fc_2(t, pol_data)]*state_vec[0]
out[3] += out[249]
out[0] -= out[249]
out[250] = dec_data[3][fc_3(t, pol_data)]*state_vec[0]
out[4] += out[250]
out[0] -= out[250]
out[371] = dec_data[4][fc_4(t, pol_data)]*state_vec[0]
out[125] += out[371]
out[0] -= out[371]
out[1230] = dec_data[5][fc_5(t, pol_data)]*state_vec[4]
out[0] += out[1230]
out[4] -= out[1230]
out[1232] = dec_data[6][fc_6(t, pol_data)]*state_vec[4]
out[2] += out[1232]
out[4] -= out[1232]
out[1476] = dec_data[7][fc_7(t, pol_data)]*state_vec[5]
out[0] += out[1476]
out[5] -= out[1476]
out[1478] = dec_data[8][fc_8(t, pol_data)]*state_vec[5]
out[2] += out[1478]
out[5] -= out[1478]
out[1722] = dec_data[9][fc_9(t, pol_data)]*state_vec[6]
out[0] += out[1722]
out[6] -= out[1722]
out[1724] = dec_data[10][fc_10(t, pol_data)]*state_vec[6]
out[2] += out[1724]
out[6] -= out[1724]
out[1968] = dec_data[11][fc_11(t, pol_data)]*state_vec[7]
out[0] += out[1968]
out[7] -= out[1968]
out[1970] = dec_data[12][fc_12(t, pol_data)]*state_vec[7]
out[2] += out[1970]
out[7] -= out[1970]
out[2214] = dec_data[13][fc_13(t, pol_data)]*state_vec[8]
out[0] += out[2214]
out[8] -= out[2214]
out[2216] = dec_data[14][fc_14(t, pol_data)]*state_vec[8]
out[2] += out[2216]
out[8] -= out[2216]
out[2460] = dec_data[15][fc_15(t, pol_data)]*state_vec[9]
out[0] += out[2460]
out[9] -= out[2460]
out[2462] = dec_data[16][fc_16(t, pol_data)]*state_vec[9]
out[2] += out[2462]
out[9] -= out[2462]
out[2706] = dec_data[17][fc_17(t, pol_data)]*state_vec[10]
out[0] += out[2706]
out[10] -= out[2706]
out[2708] = dec_data[18][fc_18(t, pol_data)]*state_vec[10]
out[2] += out[2708]
out[10] -= out[2708]
out[2952] = dec_data[19][fc_19(t, pol_data)]*state_vec[11]
out[0] += out[2952]
out[11] -= out[2952]
out[2954] = dec_data[20][fc_20(t, pol_data)]*state_vec[11]
out[2] += out[2954]
out[11] -= out[2954]
out[3198] = dec_data[21][fc_21(t, pol_data)]*state_vec[12]
out[0] += out[3198]
out[12] -= out[3198]
out[3200] = dec_data[22][fc_22(t, pol_data)]*state_vec[12]
out[2] += out[3200]
out[12] -= out[3200]
out[3444] = dec_data[23][fc_23(t, pol_data)]*state_vec[13]
out[0] += out[3444]
out[13] -= out[3444]
out[3446] = dec_data[24][fc_24(t, pol_data)]*state_vec[13]
out[2] += out[3446]
out[13] -= out[3446]
out[3690] = dec_data[25][fc_25(t, pol_data)]*state_vec[14]
out[0] += out[3690]
out[14] -= out[3690]
out[3692] = dec_data[26][fc_26(t, pol_data)]*state_vec[14]
out[2] += out[3692]
out[14] -= out[3692]
out[3936] = dec_data[27][fc_27(t, pol_data)]*state_vec[15]
out[0] += out[3936]
out[15] -= out[3936]
out[3938] = dec_data[28][fc_28(t, pol_data)]*state_vec[15]
out[2] += out[3938]
out[15] -= out[3938]
out[4182] = dec_data[29][fc_29(t, pol_data)]*state_vec[16]
out[0] += out[4182]
out[16] -= out[4182]
out[4184] = dec_data[30][fc_30(t, pol_data)]*state_vec[16]
out[2] += out[4184]
out[16] -= out[4184]
out[4428] = dec_data[31][fc_31(t, pol_data)]*state_vec[17]
out[0] += out[4428]
out[17] -= out[4428]
out[4430] = dec_data[32][fc_32(t, pol_data)]*state_vec[17]
out[2] += out[4430]
out[17] -= out[4430]
out[4674] = dec_data[33][fc_33(t, pol_data)]*state_vec[18]
out[0] += out[4674]
out[18] -= out[4674]
out[4676] = dec_data[34][fc_34(t, pol_data)]*state_vec[18]
out[2] += out[4676]
out[18] -= out[4676]
out[4920] = dec_data[35][fc_35(t, pol_data)]*state_vec[19]
out[0] += out[4920]
out[19] -= out[4920]
out[4922] = dec_data[36][fc_36(t, pol_data)]*state_vec[19]
out[2] += out[4922]
out[19] -= out[4922]
out[5166] = dec_data[37][fc_37(t, pol_data)]*state_vec[20]
out[0] += out[5166]
out[20] -= out[5166]
out[5168] = dec_data[38][fc_38(t, pol_data)]*state_vec[20]
out[2] += out[5168]
out[20] -= out[5168]
out[5412] = dec_data[39][fc_39(t, pol_data)]*state_vec[21]
out[0] += out[5412]
out[21] -= out[5412]
out[5414] = dec_data[40][fc_40(t, pol_data)]*state_vec[21]
out[2] += out[5414]
out[21] -= out[5414]
out[5658] = dec_data[41][fc_41(t, pol_data)]*state_vec[22]
out[0] += out[5658]
out[22] -= out[5658]
out[5660] = dec_data[42][fc_42(t, pol_data)]*state_vec[22]
out[2] += out[5660]
out[22] -= out[5660]
out[5904] = dec_data[43][fc_43(t, pol_data)]*state_vec[23]
out[0] += out[5904]
out[23] -= out[5904]
out[5906] = dec_data[44][fc_44(t, pol_data)]*state_vec[23]
out[2] += out[5906]
out[23] -= out[5906]
out[6150] = dec_data[45][fc_45(t, pol_data)]*state_vec[24]
out[0] += out[6150]
out[24] -= out[6150]
out[6152] = dec_data[46][fc_46(t, pol_data)]*state_vec[24]
out[2] += out[6152]
out[24] -= out[6152]
out[6396] = dec_data[47][fc_47(t, pol_data)]*state_vec[25]
out[0] += out[6396]
out[25] -= out[6396]
out[6398] = dec_data[48][fc_48(t, pol_data)]*state_vec[25]
out[2] += out[6398]
out[25] -= out[6398]
out[6642] = dec_data[49][fc_49(t, pol_data)]*state_vec[26]
out[0] += out[6642]
out[26] -= out[6642]
out[6644] = dec_data[50][fc_50(t, pol_data)]*state_vec[26]
out[2] += out[6644]
out[26] -= out[6644]
out[6888] = dec_data[51][fc_51(t, pol_data)]*state_vec[27]
out[0] += out[6888]
out[27] -= out[6888]
out[6890] = dec_data[52][fc_52(t, pol_data)]*state_vec[27]
out[2] += out[6890]
out[27] -= out[6890]
out[7134] = dec_data[53][fc_53(t, pol_data)]*state_vec[28]
out[0] += out[7134]
out[28] -= out[7134]
out[7136] = dec_data[54][fc_54(t, pol_data)]*state_vec[28]
out[2] += out[7136]
out[28] -= out[7136]
out[7380] = dec_data[55][fc_55(t, pol_data)]*state_vec[29]
out[0] += out[7380]
out[29] -= out[7380]
out[7382] = dec_data[56][fc_56(t, pol_data)]*state_vec[29]
out[2] += out[7382]
out[29] -= out[7382]
out[7626] = dec_data[57][fc_57(t, pol_data)]*state_vec[30]
out[0] += out[7626]
out[30] -= out[7626]
out[7628] = dec_data[58][fc_58(t, pol_data)]*state_vec[30]
out[2] += out[7628]
out[30] -= out[7628]
out[7872] = dec_data[59][fc_59(t, pol_data)]*state_vec[31]
out[0] += out[7872]
out[31] -= out[7872]
out[7874] = dec_data[60][fc_60(t, pol_data)]*state_vec[31]
out[2] += out[7874]
out[31] -= out[7874]
out[8118] = dec_data[61][fc_61(t, pol_data)]*state_vec[32]
out[0] += out[8118]
out[32] -= out[8118]
out[8120] = dec_data[62][fc_62(t, pol_data)]*state_vec[32]
out[2] += out[8120]
out[32] -= out[8120]
out[8364] = dec_data[63][fc_63(t, pol_data)]*state_vec[33]
out[0] += out[8364]
out[33] -= out[8364]
out[8366] = dec_data[64][fc_64(t, pol_data)]*state_vec[33]
out[2] += out[8366]
out[33] -= out[8366]
out[8610] = dec_data[65][fc_65(t, pol_data)]*state_vec[34]
return out
%time sp_dot.compile((types.int64, types.int64, types.Array(types.float32, 1, 'C'), types.ListType(types.Array(types.float64, 1, 'C')) , types.Array(types.float64, 1, 'C')))
@njit
def sp_dot(t, pol_data, state_vec, dec_data, out):
out[:246] = state_vec[:]
out[0, 1] = dec_data[0][fc_0(t, pol_data)]*state_vec[0]
out[1] += out[0, 1]
out[0] -= out[0, 1]
out[0, 2] = dec_data[1][fc_1(t, pol_data)]*state_vec[0]
out[2] += out[0, 2]
out[0] -= out[0, 2]
out[0, 3] = dec_data[2][fc_2(t, pol_data)]*state_vec[0]
out[3] += out[0, 3]
out[0] -= out[0, 3]
out[0, 4] = dec_data[3][fc_3(t, pol_data)]*state_vec[0]
out[4] += out[0, 4]
out[0] -= out[0, 4]
out[0, 125] = dec_data[4][fc_4(t, pol_data)]*state_vec[0]
out[125] += out[0, 125]
out[0] -= out[0, 125]
out[4, 0] = dec_data[5][fc_5(t, pol_data)]*state_vec[4]
out[0] += out[4, 0]
out[4] -= out[4, 0]
out[4, 2] = dec_data[6][fc_6(t, pol_data)]*state_vec[4]
out[2] += out[4, 2]
out[4] -= out[4, 2]
out[5, 0] = dec_data[7][fc_7(t, pol_data)]*state_vec[5]
out[0] += out[5, 0]
out[5] -= out[5, 0]
out[5, 2] = dec_data[8][fc_8(t, pol_data)]*state_vec[5]
out[2] += out[5, 2]
out[5] -= out[5, 2]
out[6, 0] = dec_data[9][fc_9(t, pol_data)]*state_vec[6]
out[0] += out[6, 0]
out[6] -= out[6, 0]
out[6, 2] = dec_data[10][fc_10(t, pol_data)]*state_vec[6]
out[2] += out[6, 2]
out[6] -= out[6, 2]
out[7, 0] = dec_data[11][fc_11(t, pol_data)]*state_vec[7]
out[0] += out[7, 0]
out[7] -= out[7, 0]
out[7, 2] = dec_data[12][fc_12(t, pol_data)]*state_vec[7]
out[2] += out[7, 2]
out[7] -= out[7, 2]
out[8, 0] = dec_data[13][fc_13(t, pol_data)]*state_vec[8]
out[0] += out[8, 0]
out[8] -= out[8, 0]
out[8, 2] = dec_data[14][fc_14(t, pol_data)]*state_vec[8]
out[2] += out[8, 2]
out[8] -= out[8, 2]
out[9, 0] = dec_data[15][fc_15(t, pol_data)]*state_vec[9]
out[0] += out[9, 0]
out[9] -= out[9, 0]
out[9, 2] = dec_data[16][fc_16(t, pol_data)]*state_vec[9]
out[2] += out[9, 2]
out[9] -= out[9, 2]
out[10, 0] = dec_data[17][fc_17(t, pol_data)]*state_vec[10]
out[0] += out[10, 0]
out[10] -= out[10, 0]
out[10, 2] = dec_data[18][fc_18(t, pol_data)]*state_vec[10]
out[2] += out[10, 2]
out[10] -= out[10, 2]
out[11, 0] = dec_data[19][fc_19(t, pol_data)]*state_vec[11]
out[0] += out[11, 0]
out[11] -= out[11, 0]
out[11, 2] = dec_data[20][fc_20(t, pol_data)]*state_vec[11]
out[2] += out[11, 2]
out[11] -= out[11, 2]
out[12, 0] = dec_data[21][fc_21(t, pol_data)]*state_vec[12]
out[0] += out[12, 0]
out[12] -= out[12, 0]
out[12, 2] = dec_data[22][fc_22(t, pol_data)]*state_vec[12]
out[2] += out[12, 2]
out[12] -= out[12, 2]
out[13, 0] = dec_data[23][fc_23(t, pol_data)]*state_vec[13]
out[0] += out[13, 0]
out[13] -= out[13, 0]
out[13, 2] = dec_data[24][fc_24(t, pol_data)]*state_vec[13]
out[2] += out[13, 2]
out[13] -= out[13, 2]
out[14, 0] = dec_data[25][fc_25(t, pol_data)]*state_vec[14]
out[0] += out[14, 0]
out[14] -= out[14, 0]
out[14, 2] = dec_data[26][fc_26(t, pol_data)]*state_vec[14]
out[2] += out[14, 2]
out[14] -= out[14, 2]
out[15, 0] = dec_data[27][fc_27(t, pol_data)]*state_vec[15]
out[0] += out[15, 0]
out[15] -= out[15, 0]
out[15, 2] = dec_data[28][fc_28(t, pol_data)]*state_vec[15]
out[2] += out[15, 2]
out[15] -= out[15, 2]
out[16, 0] = dec_data[29][fc_29(t, pol_data)]*state_vec[16]
out[0] += out[16, 0]
out[16] -= out[16, 0]
out[16, 2] = dec_data[30][fc_30(t, pol_data)]*state_vec[16]
out[2] += out[16, 2]
out[16] -= out[16, 2]
out[17, 0] = dec_data[31][fc_31(t, pol_data)]*state_vec[17]
out[0] += out[17, 0]
out[17] -= out[17, 0]
out[17, 2] = dec_data[32][fc_32(t, pol_data)]*state_vec[17]
out[2] += out[17, 2]
out[17] -= out[17, 2]
out[18, 0] = dec_data[33][fc_33(t, pol_data)]*state_vec[18]
out[0] += out[18, 0]
out[18] -= out[18, 0]
out[18, 2] = dec_data[34][fc_34(t, pol_data)]*state_vec[18]
out[2] += out[18, 2]
out[18] -= out[18, 2]
out[19, 0] = dec_data[35][fc_35(t, pol_data)]*state_vec[19]
out[0] += out[19, 0]
out[19] -= out[19, 0]
out[19, 2] = dec_data[36][fc_36(t, pol_data)]*state_vec[19]
out[2] += out[19, 2]
out[19] -= out[19, 2]
out[20, 0] = dec_data[37][fc_37(t, pol_data)]*state_vec[20]
out[0] += out[20, 0]
out[20] -= out[20, 0]
out[20, 2] = dec_data[38][fc_38(t, pol_data)]*state_vec[20]
out[2] += out[20, 2]
out[20] -= out[20, 2]
out[21, 0] = dec_data[39][fc_39(t, pol_data)]*state_vec[21]
out[0] += out[21, 0]
out[21] -= out[21, 0]
out[21, 2] = dec_data[40][fc_40(t, pol_data)]*state_vec[21]
out[2] += out[21, 2]
out[21] -= out[21, 2]
out[22, 0] = dec_data[41][fc_41(t, pol_data)]*state_vec[22]
out[0] += out[22, 0]
out[22] -= out[22, 0]
out[22, 2] = dec_data[42][fc_42(t, pol_data)]*state_vec[22]
out[2] += out[22, 2]
out[22] -= out[22, 2]
out[23, 0] = dec_data[43][fc_43(t, pol_data)]*state_vec[23]
out[0] += out[23, 0]
out[23] -= out[23, 0]
out[23, 2] = dec_data[44][fc_44(t, pol_data)]*state_vec[23]
out[2] += out[23, 2]
out[23] -= out[23, 2]
out[24, 0] = dec_data[45][fc_45(t, pol_data)]*state_vec[24]
out[0] += out[24, 0]
out[24] -= out[24, 0]
out[24, 2] = dec_data[46][fc_46(t, pol_data)]*state_vec[24]
out[2] += out[24, 2]
out[24] -= out[24, 2]
out[25, 0] = dec_data[47][fc_47(t, pol_data)]*state_vec[25]
out[0] += out[25, 0]
out[25] -= out[25, 0]
out[25, 2] = dec_data[48][fc_48(t, pol_data)]*state_vec[25]
out[2] += out[25, 2]
out[25] -= out[25, 2]
out[26, 0] = dec_data[49][fc_49(t, pol_data)]*state_vec[26]
out[0] += out[26, 0]
out[26] -= out[26, 0]
out[26, 2] = dec_data[50][fc_50(t, pol_data)]*state_vec[26]
out[2] += out[26, 2]
out[26] -= out[26, 2]
out[27, 0] = dec_data[51][fc_51(t, pol_data)]*state_vec[27]
out[0] += out[27, 0]
out[27] -= out[27, 0]
out[27, 2] = dec_data[52][fc_52(t, pol_data)]*state_vec[27]
out[2] += out[27, 2]
out[27] -= out[27, 2]
out[28, 0] = dec_data[53][fc_53(t, pol_data)]*state_vec[28]
out[0] += out[28, 0]
out[28] -= out[28, 0]
out[28, 2] = dec_data[54][fc_54(t, pol_data)]*state_vec[28]
out[2] += out[28, 2]
out[28] -= out[28, 2]
out[29, 0] = dec_data[55][fc_55(t, pol_data)]*state_vec[29]
out[0] += out[29, 0]
out[29] -= out[29, 0]
out[29, 2] = dec_data[56][fc_56(t, pol_data)]*state_vec[29]
out[2] += out[29, 2]
out[29] -= out[29, 2]
out[30, 0] = dec_data[57][fc_57(t, pol_data)]*state_vec[30]
out[0] += out[30, 0]
out[30] -= out[30, 0]
out[30, 2] = dec_data[58][fc_58(t, pol_data)]*state_vec[30]
out[2] += out[30, 2]
out[30] -= out[30, 2]
out[31, 0] = dec_data[59][fc_59(t, pol_data)]*state_vec[31]
out[0] += out[31, 0]
out[31] -= out[31, 0]
out[31, 2] = dec_data[60][fc_60(t, pol_data)]*state_vec[31]
out[2] += out[31, 2]
out[31] -= out[31, 2]
out[32, 0] = dec_data[61][fc_61(t, pol_data)]*state_vec[32]
out[0] += out[32, 0]
out[32] -= out[32, 0]
out[32, 2] = dec_data[62][fc_62(t, pol_data)]*state_vec[32]
out[2] += out[32, 2]
out[32] -= out[32, 2]
out[33, 0] = dec_data[63][fc_63(t, pol_data)]*state_vec[33]
out[0] += out[33, 0]
out[33] -= out[33, 0]
out[33, 2] = dec_data[64][fc_64(t, pol_data)]*state_vec[33]
out[2] += out[33, 2]
out[33] -= out[33, 2]
out[34, 0] = dec_data[65][fc_65(t, pol_data)]*state_vec[34]
return out
%time sp_dot.compile((types.int64, types.int64, types.Array(types.float32, 1, 'C'), types.ListType(types.Array(types.float64, 1, 'C')) , types.Array(types.float64, 2, 'C')))