Hoping someone has an idea of what can be done, even if its not a numba-specific issue.
I am looking for advice on the error-message ctypes objects containing pointers cannot be pickled
, with the goal of running njitted scipy.special.cython_special
-functions on remote workers.
- I collect the cythonfunction-addresses using
numba.extending
as in this documentation. I am using this approach. Running the resuling function locally works great.
(This is using `scipy.special.cython_special.huber` just as an example)
from numba.extending import get_cython_function_address
from numba import njit
import ctypes
import scipy
addr = get_cython_function_address('scipy.special.cython_special', 'huber')
functype = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double, ctypes.c_double)
chuber = functype(addr)
@njit
def nchuber(delta, r):
return chuber(delta, r)
nchuber(1.0, 4.0)
Have not yet found a way to run the function on a remote worker.
What would be a way around this?
(Using distributed’s LocalCluster here just to easier create an MRE. But the goal is to run on GatewayCluster).
MRE
from distributed import Client, LocalCluster
cluster=LocalCluster()
client=Client(cluster)
submitted = client.submit(nchuber, 1.0, 4.0)
Which leads to:
ValueError: ctypes objects containing pointers cannot be pickled;
Full traceback, when running on remote worker
2023-11-18 20:05:51,525 - distributed.protocol.pickle - INFO - Failed to serialize CPUDispatcher(<function Matern_SpaceTimeCovariance_Nblocks_latlon_GenExp at 0x7fb8f21a9cf0>). Exception: ctypes objects containing pointers cannot be pickled
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File /srv/conda/envs/notebook/lib/python3.10/site-packages/distributed/protocol/pickle.py:46, in dumps(x, buffer_callback, protocol)
45 buffers.clear()
---> 46 result = pickle.dumps(x, **dump_kwargs)
47 if b"__main__" in result or (
48 CLOUDPICKLE_GTE_20
49 and getattr(inspect.getmodule(x), "__name__", None)
50 in cloudpickle.list_registry_pickle_by_value()
51 ):
File /srv/conda/envs/notebook/lib/python3.10/site-packages/numba/core/serialize.py:106, in _pickle__CustomPickled(cp)
102 """standard pickling for `_CustomPickled`.
103
104 Uses `NumbaPickler` to dump.
105 """
--> 106 serialized = dumps((cp.ctor, cp.states))
107 return _unpickle__CustomPickled, (serialized,)
File /srv/conda/envs/notebook/lib/python3.10/site-packages/numba/core/serialize.py:57, in dumps(obj)
56 p = pickler(buf, protocol=4)
---> 57 p.dump(obj)
58 pickled = buf.getvalue()
File /srv/conda/envs/notebook/lib/python3.10/site-packages/numba/cloudpickle/cloudpickle_fast.py:568, in CloudPickler.dump(self, obj)
567 try:
--> 568 return Pickler.dump(self, obj)
569 except RuntimeError as e:
ValueError: ctypes objects containing pointers cannot be pickled
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[27], line 1
----> 1 Ktheta = client.run(covf.Matern_SpaceTimeCovariance_Nblocks_latlon_GenExp, n, lats, lons, times, const, const, const, const, const, const )
File /srv/conda/envs/notebook/lib/python3.10/site-packages/distributed/client.py:2901, in Client.run(self, function, workers, wait, nanny, on_error, *args, **kwargs)
2818 def run(
2819 self,
2820 function,
(...)
2826 **kwargs,
2827 ):
2828 """
2829 Run a function on all workers outside of task scheduling system
2830
(...)
2899 >>> c.run(print_state, wait=False) # doctest: +SKIP
2900 """
-> 2901 return self.sync(
2902 self._run,
2903 function,
2904 *args,
2905 workers=workers,
2906 wait=wait,
2907 nanny=nanny,
2908 on_error=on_error,
2909 **kwargs,
2910 )
File /srv/conda/envs/notebook/lib/python3.10/site-packages/distributed/utils.py:339, in SyncMethodMixin.sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
337 return future
338 else:
--> 339 return sync(
340 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
341 )
File /srv/conda/envs/notebook/lib/python3.10/site-packages/distributed/utils.py:406, in sync(loop, func, callback_timeout, *args, **kwargs)
404 if error:
405 typ, exc, tb = error
--> 406 raise exc.with_traceback(tb)
407 else:
408 return result
File /srv/conda/envs/notebook/lib/python3.10/site-packages/distributed/utils.py:379, in sync.<locals>.f()
377 future = asyncio.wait_for(future, callback_timeout)
378 future = asyncio.ensure_future(future)
--> 379 result = yield future
380 except Exception:
381 error = sys.exc_info()
File /srv/conda/envs/notebook/lib/python3.10/site-packages/tornado/gen.py:769, in Runner.run(self)
766 exc_info = None
768 try:
--> 769 value = future.result()
770 except Exception:
771 exc_info = sys.exc_info()
File /srv/conda/envs/notebook/lib/python3.10/site-packages/distributed/client.py:2781, in Client._run(self, function, nanny, workers, wait, on_error, *args, **kwargs)
2768 async def _run(
2769 self,
2770 function,
(...)
2776 **kwargs,
2777 ):
2778 responses = await self.scheduler.broadcast(
2779 msg=dict(
2780 op="run",
-> 2781 function=dumps(function),
2782 args=dumps(args),
2783 wait=wait,
2784 kwargs=dumps(kwargs),
2785 ),
2786 workers=workers,
2787 nanny=nanny,
2788 on_error="return_pickle",
2789 )
2790 results = {}
2791 for key, resp in responses.items():
File /srv/conda/envs/notebook/lib/python3.10/site-packages/distributed/protocol/pickle.py:58, in dumps(x, buffer_callback, protocol)
56 try:
57 buffers.clear()
---> 58 result = cloudpickle.dumps(x, **dump_kwargs)
59 except Exception as e:
60 logger.info("Failed to serialize %s. Exception: %s", x, e)
File /srv/conda/envs/notebook/lib/python3.10/site-packages/cloudpickle/cloudpickle_fast.py:73, in dumps(obj, protocol, buffer_callback)
69 with io.BytesIO() as file:
70 cp = CloudPickler(
71 file, protocol=protocol, buffer_callback=buffer_callback
72 )
---> 73 cp.dump(obj)
74 return file.getvalue()
File /srv/conda/envs/notebook/lib/python3.10/site-packages/cloudpickle/cloudpickle_fast.py:632, in CloudPickler.dump(self, obj)
630 def dump(self, obj):
631 try:
--> 632 return Pickler.dump(self, obj)
633 except RuntimeError as e:
634 if "recursion" in e.args[0]:
File /srv/conda/envs/notebook/lib/python3.10/site-packages/numba/core/serialize.py:106, in _pickle__CustomPickled(cp)
101 def _pickle__CustomPickled(cp):
102 """standard pickling for `_CustomPickled`.
103
104 Uses `NumbaPickler` to dump.
105 """
--> 106 serialized = dumps((cp.ctor, cp.states))
107 return _unpickle__CustomPickled, (serialized,)
File /srv/conda/envs/notebook/lib/python3.10/site-packages/numba/core/serialize.py:57, in dumps(obj)
55 with io.BytesIO() as buf:
56 p = pickler(buf, protocol=4)
---> 57 p.dump(obj)
58 pickled = buf.getvalue()
60 return pickled
File /srv/conda/envs/notebook/lib/python3.10/site-packages/numba/cloudpickle/cloudpickle_fast.py:568, in CloudPickler.dump(self, obj)
566 def dump(self, obj):
567 try:
--> 568 return Pickler.dump(self, obj)
569 except RuntimeError as e:
570 if "recursion" in e.args[0]:
ValueError: ctypes objects containing pointers cannot be pickled