Thanks for this awesome library! I'm still quite new at all this, so this is probably something simple. I'm just trying to simply train and load the categorizer. I'm using Python 3.9.
import spacy
import classy_classification
import json
data = json.load(open('./../aitrain_small.json', 'r'))
nlp = spacy.load("en_core_web_md")
nlp.add_pipe(
"text_categorizer",
config={
"data": data,
"model": "spacy"
}
)
print(nlp("Ability to multi-task in a fast paced detail oriented environment.")._.cats)
nlp.to_disk('small_text_cat')
This seems to save successfully.
Trying to load this separately is when the error happens.
import spacy
import classy_classification
nlp = spacy.load("small_text_cat")
print(nlp("Ability to multi-task in a fast paced detail oriented environment.")._.cats)
Here's the error:
Traceback (most recent call last):
File "/Users/manicho/Mine/Projects/AIVizi/trainingData/usesaved.py", line 6, in
nlp = spacy.load("small_text_cat")
File "/opt/homebrew/lib/python3.9/site-packages/spacy/init.py", line 51, in load
return util.load_model(
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 422, in load_model
return load_model_from_path(Path(name), **kwargs) # type: ignore[arg-type]
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 488, in load_model_from_path
nlp = load_model_from_config(
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 528, in load_model_from_config
nlp = lang_cls.from_config(
File "/opt/homebrew/lib/python3.9/site-packages/spacy/language.py", line 1783, in from_config
nlp.add_pipe(
File "/opt/homebrew/lib/python3.9/site-packages/spacy/language.py", line 792, in add_pipe
pipe_component = self.create_pipe(
File "/opt/homebrew/lib/python3.9/site-packages/spacy/language.py", line 674, in create_pipe
resolved = registry.resolve(cfg, validate=validate)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/config.py", line 746, in resolve
resolved, _ = cls._make(
File "/opt/homebrew/lib/python3.9/site-packages/thinc/config.py", line 795, in _make
filled, _, resolved = cls._fill(
File "/opt/homebrew/lib/python3.9/site-packages/thinc/config.py", line 867, in _fill
getter_result = getter(args, **kwargs)
File "/opt/homebrew/lib/python3.9/site-packages/classy_classification/init.py", line 41, in make_text_categorizer
return classySpacyInternal(
File "/opt/homebrew/lib/python3.9/site-packages/classy_classification/classifiers/spacy_internal.py", line 22, in init
self.set_training_data()
File "/opt/homebrew/lib/python3.9/site-packages/classy_classification/classifiers/classy_skeleton.py", line 113, in set_training_data
self.X = self.get_embeddings(X)
File "/opt/homebrew/lib/python3.9/site-packages/classy_classification/classifiers/spacy_internal.py", line 36, in get_embeddings
embeddings = [self.get_embeddings_from_doc(doc) for doc in docs]
File "/opt/homebrew/lib/python3.9/site-packages/classy_classification/classifiers/spacy_internal.py", line 36, in
embeddings = [self.get_embeddings_from_doc(doc) for doc in docs]
File "/opt/homebrew/lib/python3.9/site-packages/spacy/language.py", line 1576, in pipe
for doc in docs:
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 1602, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/transition_parser.pyx", line 230, in pipe
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 1551, in minibatch
batch = list(itertools.islice(items, int(batch_size)))
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 1602, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/pipe.pyx", line 53, in pipe
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 1602, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/pipe.pyx", line 53, in pipe
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 1602, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/trainable_pipe.pyx", line 73, in pipe
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 1551, in minibatch
batch = list(itertools.islice(items, int(batch_size)))
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 1602, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/transition_parser.pyx", line 230, in pipe
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 1551, in minibatch
batch = list(itertools.islice(items, int(batch_size)))
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 1602, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/trainable_pipe.pyx", line 73, in pipe
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 1551, in minibatch
batch = list(itertools.islice(items, int(batch_size)))
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 1602, in _pipe
yield from proc.pipe(docs, **kwargs)
File "spacy/pipeline/trainable_pipe.pyx", line 79, in pipe
File "/opt/homebrew/lib/python3.9/site-packages/spacy/util.py", line 1621, in raise_error
raise e
File "spacy/pipeline/trainable_pipe.pyx", line 75, in spacy.pipeline.trainable_pipe.TrainablePipe.pipe
File "/opt/homebrew/lib/python3.9/site-packages/spacy/pipeline/tok2vec.py", line 125, in predict
tokvecs = self.model.predict(docs)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/model.py", line 315, in predict
return self._func(self, X, is_train=False)[0]
File "/opt/homebrew/lib/python3.9/site-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/model.py", line 291, in call
return self._func(self, X, is_train=is_train)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/model.py", line 291, in call
return self._func(self, X, is_train=is_train)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/layers/concatenate.py", line 44, in forward
Ys, callbacks = zip([layer(X, is_train=is_train) for layer in model.layers])
File "/opt/homebrew/lib/python3.9/site-packages/thinc/layers/concatenate.py", line 44, in
Ys, callbacks = zip([layer(X, is_train=is_train) for layer in model.layers])
File "/opt/homebrew/lib/python3.9/site-packages/thinc/model.py", line 291, in call
return self._func(self, X, is_train=is_train)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/model.py", line 291, in call
return self._func(self, X, is_train=is_train)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/layers/with_array.py", line 30, in forward
return _ragged_forward(
File "/opt/homebrew/lib/python3.9/site-packages/thinc/layers/with_array.py", line 90, in _ragged_forward
Y, get_dX = layer(Xr.dataXd, is_train)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/model.py", line 291, in call
return self._func(self, X, is_train=is_train)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/layers/concatenate.py", line 44, in forward
Ys, callbacks = zip([layer(X, is_train=is_train) for layer in model.layers])
File "/opt/homebrew/lib/python3.9/site-packages/thinc/layers/concatenate.py", line 44, in
Ys, callbacks = zip(*[layer(X, is_train=is_train) for layer in model.layers])
File "/opt/homebrew/lib/python3.9/site-packages/thinc/model.py", line 291, in call
return self._func(self, X, is_train=is_train)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/layers/chain.py", line 54, in forward
Y, inc_layer_grad = layer(X, is_train=is_train)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/model.py", line 291, in call
return self._func(self, X, is_train=is_train)
File "/opt/homebrew/lib/python3.9/site-packages/thinc/layers/hashembed.py", line 61, in forward
vectors = cast(Floats2d, model.get_param("E"))
File "/opt/homebrew/lib/python3.9/site-packages/thinc/model.py", line 216, in get_param
raise KeyError(
KeyError: "Parameter 'E' for model 'hashembed' has not been allocated yet."
Looking at the KeyError, it looks it's something with the Tok2Vec pipeline, but I'm unsure how to rectify this. I'm guessing I'm doing something wrong in general for saving and loading, and hoping you can point me in the right direction? Thanks in advance!