When calling model.predict_model_docs() with the clean_outliers=True , model.predict_model_docs() produces an "IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)" error. If clean_outliers=False, there is no issues with predict_model_docs(). It appears that clean_outliers() is creating a dimension mismatch?
model = lbl2vec.Lbl2TransformerVec(transformer_model=transformer_model_loop, label_names=labels, keywords_list=keys,
documents=df['name'].apply(str.lower), device=torch.device('cuda'), similarity_threshold=.5, clean_outliers=True)
model.fit()
torch.set_default_tensor_type('torch.cuda.FloatTensor')
## Produces issues with clean_outliers=True
model_out = model_loop.predict_model_docs()
Error: IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
Lbl2TransformerVec - INFO - Calculate document<->label similarities
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-6-c5d23d521a48> in <module>
---> 15 model_out = model.predict_model_docs()
~/.local/lib/python3.8/site-packages/lbl2vec/lbl2transformervec.py in predict_model_docs(self, doc_idxs)
333 self.logger.info('Calculate document<->label similarities')
334 # calculate document vector <-> label vector similarities
--> 335 labeled_docs = self._get_document_label_similarities(labeled_docs=labeled_docs, doc_key_column=doc_key_column,
336 most_similar_label_column=most_similar_label_column,
337 highest_similarity_score_column=highest_similarity_score_column)
~/.local/lib/python3.8/site-packages/lbl2vec/lbl2transformervec.py in _get_document_label_similarities(self, labeled_docs, doc_key_column, most_similar_label_column, highest_similarity_score_column)
532 label_similarities = []
533 for label_vector in list(self.labels['label_vector_from_docs']):
--> 534 similarities = top_similar_vectors(key_vector=label_vector, candidate_vectors=list(labeled_docs['doc_vec']))
535 similarities.sort(key=lambda x: x[1])
536 similarities = [elem[0] for elem in similarities]
~/.local/lib/python3.8/site-packages/lbl2vec/utils.py in top_similar_vectors(key_vector, candidate_vectors)
178 A descending sorted of tuples of (cos_similarity, list_idx) by cosine similarities for each candidate vector in the list
179 '''
--> 180 cos_scores = util.cos_sim(key_vector, np.asarray(candidate_vectors))[0]
181 top_results = torch.topk(cos_scores, k=len(candidate_vectors))
182 top_cos_scores = top_results[0].detach().cpu().numpy()
~/.local/lib/python3.8/site-packages/sentence_transformers/util.py in cos_sim(a, b)
45 b = b.unsqueeze(0)
46
---> 47 a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
48 b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
49 return torch.mm(a_norm, b_norm.transpose(0, 1))
~/.local/lib/python3.8/site-packages/torch/nn/functional.py in normalize(input, p, dim, eps, out)
4630 return handle_torch_function(normalize, (input, out), input, p=p, dim=dim, eps=eps, out=out)
4631 if out is None:
-> 4632 denom = input.norm(p, dim, keepdim=True).clamp_min(eps).expand_as(input)
4633 return input / denom
4634 else:
~/.local/lib/python3.8/site-packages/torch/_tensor.py in norm(self, p, dim, keepdim, dtype)
636 Tensor.norm, (self,), self, p=p, dim=dim, keepdim=keepdim, dtype=dtype
637 )
--> 638 return torch.norm(self, p, dim, keepdim, dtype=dtype)
639
640 def solve(self, other):
~/.local/lib/python3.8/site-packages/torch/functional.py in norm(input, p, dim, keepdim, out, dtype)
1527 if out is None:
1528 if dtype is None:
-> 1529 return _VF.norm(input, p, _dim, keepdim=keepdim) # type: ignore[attr-defined]
1530 else:
1531 return _VF.norm(input, p, _dim, keepdim=keepdim, dtype=dtype) # type: ignore[attr-defined]
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)