Comments (13)
What is the reason to pip-installing urllib3? Was this a response to the original error?
You may do well to explicitly check the versions not just of the dask components (client.get_versions()
) but also all the implicated packages
def ver():
import gcsfs
return gcsfs.__version__
client.run(ver)
from dask-kubernetes.
No specific reason for the pip install, took it out of the config.yaml and redid all again from scratch.
gcloud container clusters create dask --num-nodes=5 --machine-type=n1-standard-4 --zone=europe-west3-a
gcloud container clusters get-credentials dask --zone europe-west3-a
curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get | bash
kubectl --namespace kube-system create sa tiller
kubectl create clusterrolebinding tiller --clusterrole cluster-admin --serviceaccount=kube-system:tiller
helm init --service-account tiller
helm repo update
helm install stable/dask
helm upgrade <NAME> stable/dask -f config.yaml
I start a local Jupyter Notebook with Dask 0.18.1
def ver():
import gcsfs
return gcsfs.__version__
client.run(ver)
gives error:
distributed.protocol.core - CRITICAL - Failed to deserialize
Traceback (most recent call last):
File "C:\Users\Andreas.Hopfgartner\Anaconda3\lib\site-packages\distributed\protocol\core.py", line 122, in loads
value = _deserialize(head, fs, deserializers=deserializers)
File "C:\Users\Andreas.Hopfgartner\Anaconda3\lib\site-packages\distributed\protocol\serialize.py", line 235, in deserialize
dumps, loads = families[name]
KeyError: None
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-4-b8d0e10d9fd0> in <module>()
2 import gcsfs
3 return gcsfs.__version__
----> 4 client.run(ver)
~\Anaconda3\lib\site-packages\distributed\client.py in run(self, function, *args, **kwargs)
1999 '192.168.0.101:9000': 'running}
2000 """
-> 2001 return self.sync(self._run, function, *args, **kwargs)
2002
2003 @gen.coroutine
~\Anaconda3\lib\site-packages\distributed\client.py in sync(self, func, *args, **kwargs)
650 return future
651 else:
--> 652 return sync(self.loop, func, *args, **kwargs)
653
654 def __repr__(self):
~\Anaconda3\lib\site-packages\distributed\utils.py in sync(loop, func, *args, **kwargs)
273 e.wait(10)
274 if error[0]:
--> 275 six.reraise(*error[0])
276 else:
277 return result[0]
~\Anaconda3\lib\site-packages\six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
685 raise value.with_traceback(tb)
--> 686 raise value
687
688 else:
~\Anaconda3\lib\site-packages\distributed\utils.py in f()
258 yield gen.moment
259 thread_state.asynchronous = True
--> 260 result[0] = yield make_coro()
261 except Exception as exc:
262 error[0] = sys.exc_info()
~\Anaconda3\lib\site-packages\tornado\gen.py in run(self)
1053
1054 try:
-> 1055 value = future.result()
1056 except Exception:
1057 self.had_exception = True
~\Anaconda3\lib\site-packages\tornado\concurrent.py in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
~\Anaconda3\lib\site-packages\tornado\util.py in raise_exc_info(exc_info)
~\Anaconda3\lib\site-packages\tornado\gen.py in run(self)
1061 if exc_info is not None:
1062 try:
-> 1063 yielded = self.gen.throw(*exc_info)
1064 finally:
1065 # Break up a reference to itself
~\Anaconda3\lib\site-packages\distributed\client.py in _run(self, function, *args, **kwargs)
1947 args=dumps(args),
1948 kwargs=dumps(kwargs)),
-> 1949 workers=workers, nanny=nanny)
1950 results = {}
1951 for key, resp in responses.items():
~\Anaconda3\lib\site-packages\tornado\gen.py in run(self)
1053
1054 try:
-> 1055 value = future.result()
1056 except Exception:
1057 self.had_exception = True
~\Anaconda3\lib\site-packages\tornado\concurrent.py in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
~\Anaconda3\lib\site-packages\tornado\util.py in raise_exc_info(exc_info)
~\Anaconda3\lib\site-packages\tornado\gen.py in run(self)
1061 if exc_info is not None:
1062 try:
-> 1063 yielded = self.gen.throw(*exc_info)
1064 finally:
1065 # Break up a reference to itself
~\Anaconda3\lib\site-packages\distributed\core.py in send_recv_from_rpc(**kwargs)
550 try:
551 comm = yield self.live_comm()
--> 552 result = yield send_recv(comm=comm, op=key, **kwargs)
553 except (RPCClosed, CommClosedError) as e:
554 raise e.__class__("%s: while trying to call remote method %r"
~\Anaconda3\lib\site-packages\tornado\gen.py in run(self)
1053
1054 try:
-> 1055 value = future.result()
1056 except Exception:
1057 self.had_exception = True
~\Anaconda3\lib\site-packages\tornado\concurrent.py in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
~\Anaconda3\lib\site-packages\tornado\util.py in raise_exc_info(exc_info)
~\Anaconda3\lib\site-packages\tornado\gen.py in run(self)
1061 if exc_info is not None:
1062 try:
-> 1063 yielded = self.gen.throw(*exc_info)
1064 finally:
1065 # Break up a reference to itself
~\Anaconda3\lib\site-packages\distributed\core.py in send_recv(comm, reply, deserialize, serializers, deserializers, **kwargs)
430 yield comm.write(msg, serializers=serializers, on_error='raise')
431 if reply:
--> 432 response = yield comm.read(deserializers=deserializers)
433 else:
434 response = None
~\Anaconda3\lib\site-packages\tornado\gen.py in run(self)
1053
1054 try:
-> 1055 value = future.result()
1056 except Exception:
1057 self.had_exception = True
~\Anaconda3\lib\site-packages\tornado\concurrent.py in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
~\Anaconda3\lib\site-packages\tornado\util.py in raise_exc_info(exc_info)
~\Anaconda3\lib\site-packages\tornado\gen.py in run(self)
1061 if exc_info is not None:
1062 try:
-> 1063 yielded = self.gen.throw(*exc_info)
1064 finally:
1065 # Break up a reference to itself
~\Anaconda3\lib\site-packages\distributed\comm\tcp.py in read(self, deserializers)
201 msg = yield from_frames(frames,
202 deserialize=self.deserialize,
--> 203 deserializers=deserializers)
204 except EOFError:
205 # Frames possibly garbled or truncated by communication error
~\Anaconda3\lib\site-packages\tornado\gen.py in run(self)
1053
1054 try:
-> 1055 value = future.result()
1056 except Exception:
1057 self.had_exception = True
~\Anaconda3\lib\site-packages\tornado\concurrent.py in result(self, timeout)
236 if self._exc_info is not None:
237 try:
--> 238 raise_exc_info(self._exc_info)
239 finally:
240 self = None
~\Anaconda3\lib\site-packages\tornado\util.py in raise_exc_info(exc_info)
~\Anaconda3\lib\site-packages\tornado\gen.py in wrapper(*args, **kwargs)
305 try:
306 orig_stack_contexts = stack_context._state.contexts
--> 307 yielded = next(result)
308 if stack_context._state.contexts is not orig_stack_contexts:
309 yielded = TracebackFuture()
~\Anaconda3\lib\site-packages\distributed\comm\utils.py in from_frames(frames, deserialize, deserializers)
77 res = yield offload(_from_frames)
78 else:
---> 79 res = _from_frames()
80
81 raise gen.Return(res)
~\Anaconda3\lib\site-packages\distributed\comm\utils.py in _from_frames()
63 return protocol.loads(frames,
64 deserialize=deserialize,
---> 65 deserializers=deserializers)
66 except EOFError:
67 if size > 1000:
~\Anaconda3\lib\site-packages\distributed\protocol\core.py in loads(frames, deserialize, deserializers)
120 fs = decompress(head, fs)
121 fs = merge_frames(head, fs)
--> 122 value = _deserialize(head, fs, deserializers=deserializers)
123 else:
124 value = Serialized(head, fs)
~\Anaconda3\lib\site-packages\distributed\protocol\serialize.py in deserialize(header, frames, deserializers)
233 raise TypeError("Data serialized with %s but only able to deserialize "
234 "data with %s" % (name, str(list(deserializers))))
--> 235 dumps, loads = families[name]
236 return loads(header, frames)
237
KeyError: None
from dask-kubernetes.
families
is to do with the new type of serialisation carried out in the latest version. That you are having trouble there suggests to me that you have a version match with distributed. You should try running client.check_versions
.
from dask-kubernetes.
Sorry, get_versions
from dask-kubernetes.
No worries, thanks! ;-)
client.get_versions()
gives:
{'client': {'host': [('python', '3.6.2.final.0'),
('python-bits', 64),
('OS', 'Windows'),
('OS-release', '10'),
('machine', 'AMD64'),
('processor', 'Intel64 Family 6 Model 142 Stepping 9, GenuineIntel'),
('byteorder', 'little'),
('LC_ALL', 'None'),
('LANG', 'None'),
('LOCALE', 'None.None')],
'packages': {'optional': [('numpy', '1.13.1'),
('pandas', '0.22.0'),
('bokeh', '0.12.7'),
('lz4', None),
('blosc', None)],
'required': [('dask', '0.18.1'),
('distributed', '1.22.0'),
('msgpack', '0.4.8'),
('cloudpickle', '0.4.0'),
('tornado', '4.5.2'),
('toolz', '0.8.2')]}},
'scheduler': {'host': (('python', '3.6.0.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.4.111+'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'optional': (('numpy', '1.14.2'),
('pandas', '0.22.0'),
('bokeh', '0.12.15'),
('lz4', None),
('blosc', '1.5.1')),
'required': (('dask', '0.17.4'),
('distributed', '1.21.8'),
('msgpack', '0.5.6'),
('cloudpickle', '0.5.2'),
('tornado', '5.0.2'),
('toolz', '0.9.0'))}},
'workers': {'tcp://10.52.0.5:37425': {'host': (('python', '3.6.0.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.4.111+'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'optional': (('numpy', '1.14.5'),
('pandas', '0.23.2'),
('bokeh', '0.12.15'),
('lz4', None),
('blosc', '1.5.1')),
'required': (('dask', '0.18.1'),
('distributed', '1.21.8'),
('msgpack', '0.5.6'),
('cloudpickle', '0.5.3'),
('tornado', '5.0.2'),
('toolz', '0.9.0'))}},
'tcp://10.52.1.8:38399': {'host': (('python', '3.6.0.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.4.111+'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'optional': (('numpy', '1.14.5'),
('pandas', '0.23.2'),
('bokeh', '0.12.15'),
('lz4', None),
('blosc', '1.5.1')),
'required': (('dask', '0.18.1'),
('distributed', '1.21.8'),
('msgpack', '0.5.6'),
('cloudpickle', '0.5.3'),
('tornado', '5.0.2'),
('toolz', '0.9.0'))}},
'tcp://10.52.2.6:36785': {'host': (('python', '3.6.0.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.4.111+'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'optional': (('numpy', '1.14.5'),
('pandas', '0.23.2'),
('bokeh', '0.12.15'),
('lz4', None),
('blosc', '1.5.1')),
'required': (('dask', '0.18.1'),
('distributed', '1.21.8'),
('msgpack', '0.5.6'),
('cloudpickle', '0.5.3'),
('tornado', '5.0.2'),
('toolz', '0.9.0'))}},
'tcp://10.52.3.9:40582': {'host': (('python', '3.6.0.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.4.111+'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'optional': (('numpy', '1.14.5'),
('pandas', '0.23.2'),
('bokeh', '0.12.15'),
('lz4', None),
('blosc', '1.5.1')),
'required': (('dask', '0.18.1'),
('distributed', '1.21.8'),
('msgpack', '0.5.6'),
('cloudpickle', '0.5.3'),
('tornado', '5.0.2'),
('toolz', '0.9.0'))}},
'tcp://10.52.4.5:35159': {'host': (('python', '3.6.0.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.4.111+'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'optional': (('numpy', '1.14.2'),
('pandas', '0.22.0'),
('bokeh', '0.12.15'),
('lz4', None),
('blosc', '1.5.1')),
'required': (('dask', '0.17.4'),
('distributed', '1.21.8'),
('msgpack', '0.5.6'),
('cloudpickle', '0.5.2'),
('tornado', '5.0.2'),
('toolz', '0.9.0'))}},
'tcp://10.52.4.7:34746': {'host': (('python', '3.6.0.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.4.111+'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'optional': (('numpy', '1.14.5'),
('pandas', '0.23.2'),
('bokeh', '0.12.15'),
('lz4', None),
('blosc', '1.5.1')),
'required': (('dask', '0.18.1'),
('distributed', '1.21.8'),
('msgpack', '0.5.6'),
('cloudpickle', '0.5.3'),
('tornado', '5.0.2'),
('toolz', '0.9.0'))}}}}
from dask-kubernetes.
Workers: ('dask', '0.18.1'), ('distributed', '1.21.8')
Scheduler: ('dask', '0.17.4'), ('distributed', '1.21.8')
Client: ('dask', '0.18.1'),('distributed', '1.22.0')
You should get these versions to match.
from dask-kubernetes.
from dask-kubernetes.
Hm. Schedulers and Workers are packed in the helm (kubernetes) package, I haven't changed anything. Is this a bug? How can I correct the versions?
from dask-kubernetes.
Should I get the version match horizontally or vertically :-) or both?
from dask-kubernetes.
from dask-kubernetes.
from dask-kubernetes.
Okay, I guess I have to edit the config.yaml to achieve this?
from dask-kubernetes.
That's it, swapped packages order in the conda install list for scheduler / worker. Beautiful, thanks for support, gcs, s3 working!
from dask-kubernetes.
Related Issues (20)
- Allow graceful shutdown HOT 3
- Dask dashboard not loading HOT 4
- Env var duplication HOT 2
- Ability to add different scheduler address to workers outside of standard format HOT 2
- Add a Changelog HOT 4
- Cluster creation constantly failing because of existing scheduler in "Terminating" status HOT 3
- Does dask-kubernetes compatible with newer version of k8rs? HOT 4
- Can not connect to k8s websocket deployed in Rancher HOT 5
- Update dask-kubernetes to a newer kr8s HOT 4
- Add Python 3.12 support HOT 1
- TOCTOU Bug while scaling down workers HOT 5
- Worker RestartPolicy not setable HOT 2
- Dask cluster creation issue with TLS HOT 1
- KubeCluster is shut down automatically even if shutdown_on_close is False HOT 1
- Go code failing to lint
- Dask Cluster with name longer than 53 chars is stuck in Created state, cannot be deleted
- Cannot Overwrite DASK_SCHEDULER_ADDRESS in Worker env HOT 1
- ConnectionClosedError during Dask Cluster Creation with k8s HOT 1
- Missing idleTimeout key in daskcluster_autoshutdown HOT 8
- Add IngressSpec besides ServiceSpec to Scheduler HOT 2
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from dask-kubernetes.