devintdha / spark-nlp-starter-scripts Goto Github PK
View Code? Open in Web Editor NEWLicense: Apache License 2.0
License: Apache License 2.0
Hi Devin - I ran the updated script from yesterday, but now need help with this issue:
Py4JJavaError Traceback (most recent call last)
in
9 #.setInputCols(["document"])
10 #.setOutputCol("sentence")
---> 11 sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")
12 .setInputCols(["document"])
13 .setOutputCol("sentence")
~/opt/anaconda3/lib/python3.8/site-packages/sparknlp/annotator.py in pretrained(name, lang, remote_loc)
3119 def pretrained(name="sentence_detector_dl", lang="en", remote_loc=None):
3120 from sparknlp.pretrained import ResourceDownloader
-> 3121 return ResourceDownloader.downloadModel(SentenceDetectorDLModel, name, lang, remote_loc)
3122
3123
~/opt/anaconda3/lib/python3.8/site-packages/sparknlp/pretrained.py in downloadModel(reader, name, language, remote_loc, j_dwn)
45 def downloadModel(reader, name, language, remote_loc=None, j_dwn='PythonResourceDownloader'):
46 print(name + " download started this may take some time.")
---> 47 file_size = _internal._GetResourceSize(name, language, remote_loc).apply()
48 if file_size == "-1":
49 print("Can not find the model to download please check the name!")
~/opt/anaconda3/lib/python3.8/site-packages/sparknlp/internal.py in init(self, name, language, remote_loc)
204 class _GetResourceSize(ExtendedJavaWrapper):
205 def init(self, name, language, remote_loc):
--> 206 super(_GetResourceSize, self).init(
207 "com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.getDownloadSize", name, language, remote_loc)
208
~/opt/anaconda3/lib/python3.8/site-packages/sparknlp/internal.py in init(self, java_obj, *args)
142 super(ExtendedJavaWrapper, self).init(java_obj)
143 self.sc = SparkContext._active_spark_context
--> 144 self._java_obj = self.new_java_obj(java_obj, *args)
145 self.java_obj = self._java_obj
146
~/opt/anaconda3/lib/python3.8/site-packages/sparknlp/internal.py in new_java_obj(self, java_class, *args)
152
153 def new_java_obj(self, java_class, *args):
--> 154 return self._new_java_obj(java_class, *args)
155
156 def new_java_array(self, pylist, java_class):
~/opt/anaconda3/lib/python3.8/site-packages/pyspark/ml/wrapper.py in _new_java_obj(java_class, *args)
67 java_obj = getattr(java_obj, name)
68 java_args = [_py2java(sc, arg) for arg in args]
---> 69 return java_obj(*java_args)
70
71 @staticmethod
~/opt/anaconda3/lib/python3.8/site-packages/py4j/java_gateway.py in call(self, *args)
1302
1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
1305 answer, self.gateway_client, self.target_id, self.name)
1306
~/opt/anaconda3/lib/python3.8/site-packages/pyspark/sql/utils.py in deco(*a, **kw)
126 def deco(*a, **kw):
127 try:
--> 128 return f(*a, **kw)
129 except py4j.protocol.Py4JJavaError as e:
130 converted = convert_exception(e.java_exception)
~/opt/anaconda3/lib/python3.8/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling z:com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.getDownloadSize.
: com.amazonaws.services.s3.model.AmazonS3Exception: Access Denied (Service: Amazon S3; Status Code: 403; Error Code: AccessDenied; Request ID: DA6V9X53ZDK9R1Z6; S3 Extended Request ID: M3m0sRb7IkM0WQxxP4myXJY1wZO4QuXetnvdT3f/07pBK8V6s5RLEsZQ6tLjKJome855Xsk58WM=), S3 Extended Request ID: M3m0sRb7IkM0WQxxP4myXJY1wZO4QuXetnvdT3f/07pBK8V6s5RLEsZQ6tLjKJome855Xsk58WM=
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1712)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1367)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1113)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:770)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:744)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:726)
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:686)
at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:668)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:532)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:512)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4921)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4867)
at com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1467)
at com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1326)
at com.johnsnowlabs.nlp.pretrained.S3ResourceDownloader.downloadMetadataIfNeed(S3ResourceDownloader.scala:84)
at com.johnsnowlabs.nlp.pretrained.S3ResourceDownloader.resolveLink(S3ResourceDownloader.scala:96)
at com.johnsnowlabs.nlp.pretrained.S3ResourceDownloader.getDownloadSize(S3ResourceDownloader.scala:174)
at com.johnsnowlabs.nlp.pretrained.ResourceDownloader$.getDownloadSize(ResourceDownloader.scala:423)
at com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader$.getDownloadSize(ResourceDownloader.scala:521)
at com.johnsnowlabs.nlp.pretrained.PythonResourceDownloader.getDownloadSize(ResourceDownloader.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Running !bash ./spark-nlp-setup-macOS-catalina.sh gave me this error:
/removed/
Is this an error with the license?
AnalysisException Traceback (most recent call last)
in
----> 1 result = pipeline().transform(pdf_example_df).cache()
~/opt/anaconda3/lib/python3.8/site-packages/pyspark/ml/base.py in transform(self, dataset, params)
168 return self.copy(params)._transform(dataset)
169 else:
--> 170 return self._transform(dataset)
171 else:
172 raise ValueError("Params must be a param map but got %s." % type(params))
~/opt/anaconda3/lib/python3.8/site-packages/pyspark/ml/pipeline.py in _transform(self, dataset)
260 def _transform(self, dataset):
261 for t in self.stages:
--> 262 dataset = t.transform(dataset)
263 return dataset
264
~/opt/anaconda3/lib/python3.8/site-packages/pyspark/ml/base.py in transform(self, dataset, params)
168 return self.copy(params)._transform(dataset)
169 else:
--> 170 return self._transform(dataset)
171 else:
172 raise ValueError("Params must be a param map but got %s." % type(params))
~/opt/anaconda3/lib/python3.8/site-packages/pyspark/ml/wrapper.py in _transform(self, dataset)
336 def _transform(self, dataset):
337 self._transfer_params_to_java()
--> 338 return DataFrame(self._java_obj.transform(dataset._jdf), dataset.sql_ctx)
339
340
~/opt/anaconda3/lib/python3.8/site-packages/py4j/java_gateway.py in call(self, *args)
1302
1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
1305 answer, self.gateway_client, self.target_id, self.name)
1306
~/opt/anaconda3/lib/python3.8/site-packages/pyspark/sql/utils.py in deco(*a, **kw)
132 # Hide where the exception came from that shows a non-Pythonic
133 # JVM exception message.
--> 134 raise_from(converted)
135 else:
136 raise
~/opt/anaconda3/lib/python3.8/site-packages/pyspark/sql/utils.py in raise_from(e)
AnalysisException: You're using untyped Scala UDF, which does not have the input type information. Spark may blindly pass null to the Scala closure with primitive-type argument, and the closure will see the default value of the Java type for the null argument, e.g. udf((x: Int) => x, IntegerType)
, the result is 0 for null input. To get rid of this error, you could:
udf((x: Int) => x)
udf(new UDF1[String, Integer] { override def call(s: String): Integer = s.length() }, IntegerType)
, if input types are all non primitiveA declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.