talmago / spacy_crfsuite Goto Github PK
View Code? Open in Web Editor NEWsequence tagging with spaCy and crfsuite
Home Page: https://pypi.org/project/spacy-crfsuite
License: MIT License
sequence tagging with spaCy and crfsuite
Home Page: https://pypi.org/project/spacy-crfsuite
License: MIT License
Hi,
Thanks for your clarification on the issue which I have raised later. It helped me a lot.
Could you please mention how to train an existing mode. say, I have trained the model with 1000 data and to enhance the model accuracy further I want to train the model on another 500 data points. How to do this?.
Hi, I get attribute not defined error while running the jupyter notebook file.
Please find the code and error messages as listed below.
from typing import List, Dict, Optional
from spacy_crfsuite.crf_extractor import CRFExtractor, CRFToken
from spacy_crfsuite.tokenizer import Tokenizer, SpacyTokenizer
from spacy_crfsuite.bilou import get_entity_offsets
def to_crfsuite(
examples: List[Dict],
crf_extractor: Optional[CRFExtractor] = None,
tokenizer: Optional[SpacyTokenizer] = None,
) -> List[List[CRFToken]]:
"""Translate training examples to CRF features.
Args:
examples (list): training examples.
crf_extractor (CRFExtractor): crf component.
tokenizer (Tokenizer): optional, tokenizer. Default is `SpacyTokenizer`.
Returns:
List[List[CRFToken]], CRF dataset.
"""
tokenizer = tokenizer or SpacyTokenizer()
assert isinstance(tokenizer, Tokenizer)
crf_extractor = crf_extractor or CRFExtractor()
assert isinstance(crf_extractor, CRFExtractor)
dataset = []
for example in examples:
if not example:
continue
if "tokens" in example:
pass
elif "text" in example:
example["tokens"] = tokenizer.tokenize(example, attribute="text")
else:
try:
from wasabi import msg
msg.warn(f"Empty example: {example}")
except ImportError:
pass
continue
entity_offsets = get_entity_offsets(example)
entities = crf_extractor.from_json_to_crf(example, entity_offsets)
dataset.append(entities)
return dataset
crf_extractor = CRFExtractor({"c1": 0.01, "c2": 0.06})
train_dataset = to_crfsuite(train_data, crf_extractor=crf_extractor)
crf_extractor.train(train_dataset)
AttributeError Traceback (most recent call last)
in
51
52 crf_extractor = CRFExtractor({"c1": 0.01, "c2": 0.06})
---> 53 train_dataset = to_crfsuite(train_data, crf_extractor=crf_extractor)
54 crf_extractor.train(train_dataset)
in to_crfsuite(examples, crf_extractor, tokenizer)
20 """
21 #tokenizer = tokenizer or SpacyTokenizer()
---> 22 tokenizer = SpacyTokenizer()
23 assert isinstance(tokenizer, Tokenizer)
24
~\Desktop\spacycrf\spacy_crfsuite\tokenizer.py in init(self, nlp)
61 class SpacyTokenizer(Tokenizer):
62 def init(self, nlp=None):
---> 63 self.nlp = nlp or spacy.blank("en")
64
65 def tokenize(self, message: Dict, attribute: Text = "text") -> List[Token]:
~\AppData\Local\Continuum\anaconda3\envs\mypy\lib\site-packages\spacy_init_.py in blank(name, **kwargs)
33 def blank(name, **kwargs):
34 LangClass = util.get_lang_class(name)
---> 35 return LangClass(**kwargs)
36
37
~\AppData\Local\Continuum\anaconda3\envs\mypy\lib\site-packages\spacy\language.py in init(self, vocab, make_doc, max_length, meta, **kwargs)
167 RETURNS (Language): The newly constructed object.
168 """
--> 169 user_factories = util.registry.factories.get_all()
170 self.factories.update(user_factories)
171 self._meta = dict(meta)
~\AppData\Local\Continuum\anaconda3\envs\mypy\lib\site-packages\catalogue.py in get_all(self)
110 result = OrderedDict()
111 if self.entry_points:
--> 112 result.update(self.get_entry_points())
113 for keys, value in REGISTRY.items():
114 if len(self.namespace) == len(keys) - 1 and all(
~\AppData\Local\Continuum\anaconda3\envs\mypy\lib\site-packages\catalogue.py in get_entry_points(self)
125 result = {}
126 for entry_point in AVAILABLE_ENTRY_POINTS.get(self.entry_point_namespace, []):
--> 127 result[entry_point.name] = entry_point.load()
128 return result
129
~\AppData\Local\Continuum\anaconda3\envs\mypy\lib\site-packages\importlib_metadata_init_.py in load(self)
95 module = import_module(match.group('module'))
96 attrs = filter(None, (match.group('attr') or '').split('.'))
---> 97 return functools.reduce(getattr, attrs, module)
98
99 @Property
AttributeError: module 'spacy_crfsuite.crf_extractor' has no attribute 'CRFEntityExtractorFactory'
Hi , I can see the potential of this project but it lacks clear documentation.
Can you please explain elaborately about how to train the model and the markdown example.
It's good work.
As adding custom pipelines is different in Spacy 3.0 from Spacy 2, can you add sample code for usage of this library in Spacy 3.0?
A declarative, efficient, and flexible JavaScript library for building user interfaces.
๐ Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. ๐๐๐
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google โค๏ธ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.