hit-scir / ltp Goto Github PK
View Code? Open in Web Editor NEWLanguage Technology Platform
Home Page: http://ltp.ai
Language Technology Platform
Home Page: http://ltp.ai
我分词想在你们模型的基础上增加一些自定义词库。。这个怎么处理呢?
以下列表中的文件LTP已经不再使用
__util/conversion_utf.h
__util/decode_gbk.h
__util/EncodeUtil.cpp
__util/EncodeUtil.h
__util/gbk_u16.h
__util/IniReader.cpp
__util/IniReader.h
__util/Logger.cpp
__util/Logger.h
__util/md5.cpp
__util/md5.h
__util/SBC2DBC.cpp
__util/SBC2DBC.h
__util/TextProcess.cpp
__util/TextProcess.h
__util/Timer.h
extractord单件在运行结束时没有合理释放,valgrind memcheck报告内存泄露。
单件正确的实现方法应参考:http://stackoverflow.com/questions/1008019/c-singleton-design-pattern
current performance of ltp-server is list below.
here goes some problems:
我的代码如下:
作者能提供一个srl调用例子么?
/*
* NodeJS LTP 扩展
* @author 蜗眼
* 第一次写C/C++ 见笑了。。。
*/
#include < v8.h > #include "Xml4nlp.h"#include "Ltp.h"#include "segment_dll.h"#include "postag_dll.h"#include "ner_dll.h"#include "parser_dll.h"#include "SRL_DLL.h"#include < iostream > #include < string > #include < node.h > using namespace node;
using namespace v8;
using namespace std;
//using namespace ltp::strutils::codecs;
#include "DepSRL.h"static DepSRL g_depSRL;
Handle < Value > Method(const Arguments & args) {
HandleScope scope;
const char * path = "/Users/iceet/Mine/Bill/tools/xls/ltp_data/cws.model";
const char * selfs = "/Users/iceet/Mine/Bill/src/BillNLP/self.dic";
void * engine = segmentor_create_segmentor(path,
selfs);
vector < string > words;
//分词接口
int len = segmentor_segment(engine,
"我们都是**人。", words);
for (int i = 0; i < len; ++ i) {
std :: cout << words[i] << "|";
}
std :: cout << std :: endl;
segmentor_release_segmentor(engine);
//词性标注
void * engine1 = postagger_create_postagger("/Users/iceet/Mine/Bill/tools/xls/ltp_data/pos.model");
std :: vector < std :: string > tags;
postagger_postag(engine1, words, tags);
for (int i = 0; i < tags.size(); ++ i) {
std :: cout << words[i] << "/" << tags[i];
if (i == tags.size() - 1) std :: cout << std :: endl;
else std :: cout << " ";
}
postagger_release_postagger(engine1);
//命名实体识别接口
void * engin2 = ner_create_recognizer("/Users/iceet/Mine/Bill/tools/xls/ltp_data/ner.model");
int ret;
std :: vector < string > vec;
ret = ner_recognize(engin2, words, tags, vec);
//std::cont << vec.size() <<std::endl;
for (int i = 0; i < vec.size(); ++ i) {
std :: cout << vec[i] << "<<" << i;
if (i == vec.size() - 1) std :: cout << std :: endl;
else std :: cout << " ";
}
//
//依存句法分析接口
//parse
void * engine3 = parser_create_parser("/Users/iceet/Mine/Bill/tools/xls/ltp_data/parser.model");
vector < int > heads;
vector < std :: string > deprels;
vector < pair < int, string > > parser;
parser_parse(engine3, words, tags, heads, deprels);
for (int i = 0; i < heads.size(); ++ i) {
std :: cout << words[i] << "\t" << tags[i] << "\t" << heads[i] << "\t" << deprels[i] << std :: endl;
//parser[i].first = heads[i];
cout << heads[i] << endl;
// int parentIdx = atoi( heads[i].c_str() );
//parser[i].second = deprels[i];
parser.push_back(make_pair(static_cast < int > (heads[i]),
deprels[i]));
}
SRL_LoadResource("/Users/iceet/Mine/Bill/tools/xls/ltp_data/srl/");
vector < pair < int, vector < pair < const char * , pair < int, int > > > > > vecSRLResult;
SRL(words, tags, vec, parser, vecSRLResult);
//srl 结果,这里为什么为0呢
cout << vecSRLResult.size() << endl;
int j = 0;
for (; j < vecSRLResult.size(); ++j) {
vector < string > vecType;
vector < pair < int,
int > > vecBegEnd;
int k = 0;
for (; k < vecSRLResult[j].second.size(); ++k) {
// vecType.push_back(vecSRLResult[j].second[k].first);
//vecBegEnd.push_back(vecSRLResult[j].second[k].second);
// std::cout << vecSRLResult[j].second[k].first[0] << "\t" <<endl;
std :: cout << k << endl;
}
cout << "--" << endl;
}
return scope.Close(Undefined());
}
void init(Handle < Object > exports) {
exports -> Set(String :: NewSymbol("analyze"),
FunctionTemplate :: New(Method) -> GetFunction());
}
NODE_MODULE(BillNLP, init)
目前中英文混合文本应该非常常见了,不知道LTP有没有考虑去实现这块。
doc(二进制)格式文档无法追踪更改 不适合在github和开源项目中使用 建议使用TeX或Markdown等重新制作
我是用cpp写的nodejs扩展,然后我这个编译透过了。在调用的时候提示这样。。
帮我分析下是什么原因呢。。
THX
node t.js
[TRACE] 2013/09/24 17:35:03 Loading segmentor model from "ltp_data/cws.model" ...
[TRACE] 2013/09/24 17:35:03 segmentor model is loaded.
[TRACE] 2013/09/24 17:35:03 Loading postagger model from "ltp_data/pos.model" ...
dyld: lazy symbol binding failed: Symbol not found: __Z26postagger_create_postaggerPKc
Referenced from: /Users/iceet/Mine/ltp/src/node/build/Release/hello.node
Expected in: dynamic lookup
dyld: Symbol not found: __Z26postagger_create_postaggerPKc
Referenced from: /Users/iceet/Mine/ltp/src/node/build/Release/hello.node
Expected in: dynamic lookup
我尝试用如下代码解密例句“我们都是**人。”
//依存句法分析接口
//parse
void * engine3 = parser_create_parser("/Users/iceet/Mine/Bill/tools/xls/ltp_data/parser.model");
vector<int> heads;
vector<std::string> deprels;
vector< pair<int, string> > parse;
parser_parse(engine3, words, postags, heads, deprels);
for (int i = 0; i < heads.size(); ++ i) {
//std::cout << words[i] << "\t" << tags[i] << "\t"
// << heads[i] << "\t" << deprels[i] << std::endl;
//parser[i].first = heads[i];
// heads[i] = heads[i];
cout << heads[i] << deprels[i] << endl;
// int parentIdx = atoi( heads[i].c_str() );
//parser[i].second = deprels[i];
parse.push_back(make_pair(heads[i],deprels[i]));
}
期望得到结果A:
我们/r 2 SBV
都/d 2 ADV
是/v -1 HED
**/ns 4 ATT
人/n 2 VOB
。/wp 2 WP
实际上得到的结果B:
我们/r 3 SBV
都/d 3 ADV
是/v 0 HED
**/ns 5 ATT
人/n 3 VOB
。/wp 3 WP
发现在 parser_dll.cpp 的 62 行
int len = inst->size();
heads.resize(len - 1);
deprels.resize(len - 1);
for (int i = 1; i < len; ++ i) {
heads[i - 1] = inst->predicted_heads[i];//这里是否应该减去1?
deprels[i - 1] = ltp::parser::Parser::model->deprels.at(
inst->predicted_deprelsidx[i]);
}
在ltp中如何启用这个 wsd参数呢。。
现在的返回结果里面没有这个参数了
RT
Scanning dependencies of target postagger
[ 40%] Building CXX object src/_svmtagger/CMakeFiles/postagger.dir/dict.cpp.o
In file included from /usr/include/sys/signal.h:148,
from /usr/include/sys/wait.h:116,
from /usr/include/stdlib.h:65,
from /usr/include/c++/4.2.1/cstdlib:72,
from /usr/include/c++/4.2.1/bits/stl_algobase.h:68,
from /usr/include/c++/4.2.1/bits/char_traits.h:46,
from /usr/include/c++/4.2.1/string:47,
from /usr/local/include/boost/regex/v4/cregex.hpp:207,
from /usr/local/include/boost/cregex.hpp:27,
from /Users/wanxiang/Documents/workspace/ltp/src/_svmtagger/er.h:24,
from /Users/wanxiang/Documents/workspace/ltp/src/_svmtagger/dict.cpp:27:
/usr/include/sys/_structs.h:218: error: conflicting declaration ‘typedef struct __darwin_sigaltstack stack_t’
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/swindow.h:44: error: ‘struct stack_t’ has a previous declaration as ‘struct stack_t’
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp: In member function ‘void dictionary::dictWrite(char)’:
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp:136: warning: deprecated conversion from string constant to ‘char’
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp: In member function ‘void dictionary::dictCreate(FILE, int, int)’:
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp:250: warning: deprecated conversion from string constant to ‘char’
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp: In member function ‘void dictionary::dictRepairFromFile(char)’:
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp:325: warning: deprecated conversion from string constant to ‘char’
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp: In member function ‘void dictionary::dictAddBackup(char)’:
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp:462: warning: deprecated conversion from string constant to ‘char’
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp: In constructor ‘dictionary::dictionary(char, char)’:
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp:582: warning: deprecated conversion from string constant to ‘char’
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp:586: warning: deprecated conversion from string constant to ‘char’
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp: In constructor ‘dictionary::dictionary(char)’:
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp:593: warning: deprecated conversion from string constant to ‘char’
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp: In constructor ‘dictionary::dictionary(char, int, int)’:
/Users/wanxiang/Documents/workspace/ltp/src/svmtagger/dict.cpp:601: warning: deprecated conversion from string constant to ‘char’
make[3]: *** [src/_svmtagger/CMakeFiles/postagger.dir/dict.cpp.o] Error 1
make[2]: *** [src/_svmtagger/CMakeFiles/postagger.dir/all] Error 2
make[1]: *** [all] Error 2
make: *** [all] Error 2
现在实现的逻辑是同时将所有模块的模型load进来。
这个设计并不是很美观,考虑到有些用户只想做分词,但是却连parser的模型也加载进来。
在配置文件中指定加载的模型以及要做的任务。
ltp-3.1.0和github上最新的co试过了都有这个问题
$ bin/ltp_test ltp_data/ltp.cnf dp 1.txt
[TRACE] 2014/04/03 12:02:18 Loading segmentor model from "ltp_data/cws.model" ...
[TRACE] 2014/04/03 12:02:18 segmentor model is loaded.
[WARNING] 2014/04/03 12:02:18 No "postagger-model" config is found
[TRACE] 2014/04/03 12:02:18 Loading parser resource from "ltp_data/parser.model"
1792
-1
[ERROR] 2014/04/03 12:02:18 /home/feng/fun/ltp/src/__ltp_dll/LTPResource.cpp: line 183: LoadParserResource(): Failed to create parser
[ERROR] 2014/04/03 12:02:18 /home/feng/fun/ltp/src/__ltp_dll/Ltp.cpp: line 128: ReadConfFile(): in LTP::parser, failed to load parser resource
Failed to load LTP
[TRACE] 2014/04/03 12:02:18 segmentor model is released.
步进调试发现是feat_opt.use_sibling被置位了,但是parser.model里面却只有一个collections
$ xxd parser.model |grep collections
0000700: 636f 6c6c 6563 7469 6f6e 7300 0000 0000 collections.....
http://q.weibo.com/849045 欢迎大家讨论有关于LTP的问题或者提出建议和意见。
相应的Traiva CI也有问
系统 windows 7,
visual studio 2008完全安装,
cmake 2.8.11.2,
报错如下:
CMake Error at thirdparty/maxent/CMakeLists.txt:80 (add_executable):
Cannot find source file:
getopt.c
Tried extensions .c .C .c++ .cc .cpp .cxx .m .M .mm .h .hh .h++ .hm .hpp
.hxx .in .txx
Wanxiangs-MacBook-Pro:ltp wanxiang$ make
[ 10%] Built target crfpp
[ 17%] Built target maxent
[ 21%] Built target tinyxml
[ 22%] Building CXX object src/__util/CMakeFiles/util.dir/Logger.cpp.o
In file included from /Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:10:
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.h:71: error: ‘MAX_PATH’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.h:76: error: ‘semaphore’ does not name a type
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.h:77: error: ‘semaphore’ has not been declared
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.h:78: error: ‘semaphore’ has not been declared
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.h:79: error: ‘semaphore’ has not been declared
/Users/wanxiang/Documents/workspace/ltp/src/_util/Logger.cpp: In constructor ‘CLogger::CLogger(const char, int)’:
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:16: error: ‘m_csLogger’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp: At global scope:
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:32: error: variable or field ‘InitializeCriticalSection’ declared void
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:32: error: ‘semaphore’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:32: error: ‘s’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:42: error: variable or field ‘EnterCriticalSection’ declared void
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:42: error: ‘semaphore’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:42: error: ‘s’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:66: error: variable or field ‘LeaveCriticalSection’ declared void
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:66: error: ‘semaphore’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:66: error: ‘s’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/_util/Logger.cpp: In member function ‘void CLogger::Log(int, const char, _va_list_tag)’:
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:99: error: ‘m_csLogger’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:108: error: ‘m_OutputBuf’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:141: error: ‘m_OutputBuf’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:158: error: ‘m_OutputBuf’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:176: error: ‘m_OutputBuf’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.cpp:190: error: ‘m_OutputBuf’ was not declared in this scope
/Users/wanxiang/Documents/workspace/ltp/src/__util/Logger.h: At global scope:
/Users/wanxiang/Documents/workspace/ltp/src/_util/Logger.h:77: warning: inline function ‘static void CLogger::InitializeCriticalSection(int)’ used but never defined
/Users/wanxiang/Documents/workspace/ltp/src/_util/Logger.h:78: warning: inline function ‘static void CLogger::EnterCriticalSection(int)’ used but never defined
/Users/wanxiang/Documents/workspace/ltp/src/_util/Logger.h:79: warning: inline function ‘static void CLogger::LeaveCriticalSection(int)’ used but never defined
make[3]: *** [src/__util/CMakeFiles/util.dir/Logger.cpp.o] Error 1
make[2]: *** [src/__util/CMakeFiles/util.dir/all] Error 2
make[1]: *** [all] Error 2
make: *** [all] Error 2
整理相关文档,撰写Wiki等。
正常运行多线程ltp_server不会有问题,因为理论上讲server不应该停机。但是在multi_cws_cmdline上会出现写在segmentor::rulebase里面的两个正则表达式析构时出core。
在PoSTagging的结果中含有“z”词性(依照北大标注规范),但是在Parser的训练数据中没有"z"词性。
一个解决方法是用自动词性+god dep-relation训练一个parser model。
另外,是否需要z词性还是需要再讨论。
编译成功后。执行测试:
./bin/ltp_test "ws" "test_data/test_gb.txt"
有错误:
terminate called after throwing an instance of 'std::string'
请把 _svmtagger/weight.cpp里面的相应处改为如下,否则mac编译会报 can not delete char[100]
class weight_struct_t {
public:
char *key = new char[100];
hash_t *hash;
@Oneplus 帮忙看看
我复制的你们的代码,在linux下命名为seg.cc
#include <iostream>
#include <string>
#include "segment_dll.h"
int main(int argc, char * argv[]) {
if (argc < 2) {
std::cerr << "cws [model path]" << std::endl;
return 1;
}
void * engine = segmentor_create_segmentor(argv[1]);
if (!engine) {
return -1;
}
std::vector<std::string> words;
int len = segmentor_segment(engine,
"爱上一匹野马,可我的家里没有草原。", words);
for (int i = 0; i < len; ++ i) {
std::cout << words[i] << "|";
}
std::cout << std::endl;
segmentor_release_segmentor(engine);
return 0;
}
然后执行
g++ seg.cc segmentor.a -o seg
好像编译通不过(ps:我对linux下c++编程不熟悉)
segmentor.cpp:(.text._ZN5boost9re_detail12perl_matcherIN9__gnu_cxx17__normal_iteratorIPKcSsEESaINS_9sub_matchIS6_EEENS_12regex_traitsIcNS_16cpp_regex_traitsIcEEEEE8find_impEv[boost::re_detail::perl_matcher<__gnu_cxx::__normal_iterator<char const*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<boost::sub_match<__gnu_cxx::__normal_iterator<char const*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >, boost::regex_traits<char, boost::cpp_regex_traits<char> > >::find_imp()]+0x18f): undefined reference to `boost::re_detail::put_mem_block(void*)'
segmentor.cpp:(.text._ZN5boost9re_detail12perl_matcherIN9__gnu_cxx17__normal_iteratorIPKcSsEESaINS_9sub_matchIS6_EEENS_12regex_traitsIcNS_16cpp_regex_traitsIcEEEEE8find_impEv[boost::re_detail::perl_matcher<__gnu_cxx::__normal_iterator<char const*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<boost::sub_match<__gnu_cxx::__normal_iterator<char const*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >, boost::regex_traits<char, boost::cpp_regex_traits<char> > >::find_imp()]+0x361): undefined reference to `boost::re_detail::put_mem_block(void*)'
collect2: ld returned 1 exit status
As I know, main reason for compiling error is the wrongly defined macro
ifdef __WIN32__
these kind of macro is designed for MSVC, however mingw
under windows also triggered this macro.
another issue is the usage of hashmap under mingw
is still a mystery to me.
Hi all,
The srl module has been updated, as well as a new maxent package. Some details are shown below:
请问与3.0.0 alpha 相匹配的最新模型文件,哪里可以下载到??
系统是ubuntu12.04
Input sentence is: **你好?
-->>debug make xml
[XML4NLP ERROR REPORT]
description : Error document empty.
location :
row : 0
col : 0
===================
[ERROR] 2013/09/02 08:51:15 /home/parallels/ltp/src/__ltp_dll/Ltp.cpp: line 147: splitSentence_dummy(): in LTP::splitsent, There is no paragraph in doc,
[ERROR] 2013/09/02 08:51:15 /home/parallels/ltp/src/__ltp_dll/Ltp.cpp: line 148: splitSentence_dummy(): you may have loaded a blank file or have not loaded a file yet.
[ERROR] 2013/09/02 08:51:15 /home/parallels/ltp/src/__ltp_dll/Ltp.cpp: line 182: wordseg(): in LTP::wordseg, failed to perform split sentence preprocess.
[ERROR] 2013/09/02 08:51:15 /home/parallels/ltp/src/__ltp_dll/Ltp.cpp: line 233: postag(): in LTP::postag, failed to perform word segment preprocess
[ERROR] 2013/09/02 08:51:15 /home/parallels/ltp/src/__ltp_dll/Ltp.cpp: line 284: ner(): in LTP::ner, failed to perform postag preprocess
[ERROR] 2013/09/02 08:51:15 /home/parallels/ltp/src/__ltp_dll/Ltp.cpp: line 397: srl(): in LTP::srl, failed to perform ner preprocess
Result is: HTTP/1.1 200 OK
【视频日媒称越南因南海争端停播**央视节目越南停播央视日媒新浪视频】
最后一个】会标错
Ⅰ、大豆及其制品。
Ⅰ会标错
应该加入一个特殊字符识别的特征。
我下载了新版本的源码然后编译:
[ 34%] Building CXX object src/segmentor/CMakeFiles/otcws.dir/otcws.cpp.o
In file included from 。。。/LTP/src/segmentor/otcws.cpp:2:
。。。/LTP/src/utils/cfgparser.hpp:11:10: fatal error:
'tr1/unordered_map' file not found
^
1 error generated.
make[3]: *** [src/segmentor/CMakeFiles/otcws.dir/otcws.cpp.o] Error 1
make[2]: *** [src/segmentor/CMakeFiles/otcws.dir/all] Error 2
make[1]: *** [all] Error 2
make: *** [all] Error 2
现在LTP 完整运行需要的内存大概在1400M-2000M左右,,
这个有考虑把内存降低的打算么?
已经用不上了。
src/segmentor/segment_dll.cpp
line 67-71
在根目录下的CMakeLists.txt
会在win32环境下排斥编译unittest和ltp_server,但是在win64下仍旧会编译这两个模块。
字母和数字混合时,错误的将字母和数字分隔开,如“惠普d2015打印机”中,“的”和“2015”分成了两个词。
RT
您好,文档中的的模型没有下载链接,能给个链接吗?
谢谢!
there should be a CXX_FLAGS -DHAVE_CONFIG_H
set in thirdparty/maxent/CMakeFiles.
在mac和linux上编译出了ltp_test2,和ltp_test2,还有ltp_test_xml,这3个可执行程序怎么用,另外linux上有python的接口吗?还是必须自己写程序调用链接库的函数?
在这个行数里面的: SRLBaselineExt::ExtractPrgFeatures(vector< vector >& vecPrgFeatures) 的83行。。
我的调用如下:
SRL_LoadResource("/Users/iceet/Mine/Bill/tools/xls/ltp_data/srl/");
vector< pair< int, vector< pair<const char *, pair< int, int > > > > > vecSRLResult;
std::cout << "==ing===" << endl;
SRL(words, tags, vec, parser, vecSRLResult);//这里执行的时候报错了
std::cout << "=======e" << std::endl;
最终定位到代码 SRLBaselineExt.cpp
for (size_t row = 1; row <= row_count; ++row)
{
vector<string> instance;
for (size_t i = 0; i < m_prgFeatureNumbers.size(); ++i)
{
string feature = m_prgFeaturePrefixes[i] + "@"
+ vec_feature_values[i][row]; //这里有问题
instance.push_back(feature);
}
vecPrgFeatures.push_back(instance);
}
ltp::utility::Template
是整个ltp里面最常用的基础数据结构,提供从特征模板中实例化特征字符串的功能。例如,特征模板T=3={w0}-{p0}
。在w0=am
,p0=v
的情况下被实例化为3=am-v
。
特征模板的实现方法是将特征模板拆解成若干tokens(3=, w0, -, p0)
,将这些tokens存储在Template_Token_Cache的单件里。然后将Template转换为token对应的index。在这个例子里,模板T被表示为(0,1,2,3)
的index列表。
每次实例化前需要定义一个Template::Data的类型,用以存储每个实例化的token。Template::Data对于Token_Cache建立一个副本,然后在调用set
方法时将相应key实例化为value。render模板的过程变成将模板里的index列表对应的token拼接的过程。
旧版的实现在设置key
的时候需要一步查找。由于认为token的个数比较少,这一过程直接使用线性查找。最近的unittest中发现这一过程可以采用hashmap替换从而获得更快的速度。
这一修改将会直接影响各模块的速度(训练,解码)。从初步实验来看,这一修改在二阶sibling模型上能够获得40%的速度提升,使得分析速度从7.35句/s提高到12.7句/s(因测试机不同而有差异)。还需进一步进行测试。
句子:你好勤快
理想分词结果:你 好 勤快
实际分词结果:你好 勤快
A declarative, efficient, and flexible JavaScript library for building user interfaces.
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google ❤️ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.