Giter Club home page Giter Club logo

Comments (4)

bachan avatar bachan commented on July 26, 2024

You have most likely edited your build_java.sh unintentionally. Check with git diff command if you have any changes made to any files in the source code tree.

from coccoc-tokenizer.

dinhan92 avatar dinhan92 commented on July 26, 2024

here is when I run git diff:
root@Admin:/mnt/c/Dev Programs/CTokenizer/coccoc-tokenizer/build# git diff
diff --git a/.clang-format b/.clang-format
index d6b3ff3..0425316 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,22 +1,22 @@
-# learn more at http://clang.llvm.org/docs/ClangFormatStyleOptions.html

-AccessModifierOffset: -8
-AlignAfterOpenBracket: false
-AlignEscapedNewlinesLeft: false
-AllowShortFunctionsOnASingleLine: false
-AllowShortIfStatementsOnASingleLine: true
-AlwaysBreakTemplateDeclarations: true
:...skipping...
diff --git a/.clang-format b/.clang-format
index d6b3ff3..0425316 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,22 +1,22 @@
-# learn more at http://clang.llvm.org/docs/ClangFormatStyleOptions.html

-AccessModifierOffset: -8
-AlignAfterOpenBracket: false
-AlignEscapedNewlinesLeft: false
-AllowShortFunctionsOnASingleLine: false
-AllowShortIfStatementsOnASingleLine: true
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: false
-BinPackParameters: false
-BreakBeforeBraces: Allman
-BreakBeforeTernaryOperators: true
-ColumnLimit: 120
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
-ContinuationIndentWidth: 8
-IndentFunctionDeclarationAfterType: true
-IndentWidth: 8
:...skipping...
diff --git a/.clang-format b/.clang-format
index d6b3ff3..0425316 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,22 +1,22 @@
-# learn more at http://clang.llvm.org/docs/ClangFormatStyleOptions.html

-AccessModifierOffset: -8
-AlignAfterOpenBracket: false
-AlignEscapedNewlinesLeft: false
-AllowShortFunctionsOnASingleLine: false
-AllowShortIfStatementsOnASingleLine: true
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: false
-BinPackParameters: false
-BreakBeforeBraces: Allman
-BreakBeforeTernaryOperators: true
-ColumnLimit: 120
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
-ContinuationIndentWidth: 8
-IndentFunctionDeclarationAfterType: true
-IndentWidth: 8
-SpaceAfterCStyleCast: true
-SpacesInAngles: true
-SpacesInContainerLiterals: true
-SpacesInParentheses: false
-UseTab: Always
+# learn more at http://clang.llvm.org/docs/ClangFormatStyleOptions.html^M
+^M
+AccessModifierOffset: -8^M
+AlignAfterOpenBracket: false^M
+AlignEscapedNewlinesLeft: false^M
+AllowShortFunctionsOnASingleLine: false^M
+AllowShortIfStatementsOnASingleLine: true^M
+AlwaysBreakTemplateDeclarations: true^M
+BinPackArguments: false^M
+BinPackParameters: false^M
+BreakBeforeBraces: Allman^M
+BreakBeforeTernaryOperators: true^M
+ColumnLimit: 120^M
+ConstructorInitializerAllOnOneLineOrOnePerLine: true^M
+ContinuationIndentWidth: 8^M
+IndentFunctionDeclarationAfterType: true^M
+IndentWidth: 8^M
+SpaceAfterCStyleCast: true^M
+SpacesInAngles: true^M
+SpacesInContainerLiterals: true^M
+SpacesInParentheses: false^M
+UseTab: Always^M
diff --git a/.gitignore b/.gitignore
index 27b9e69..e9757de 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
-build
-install
-python/.c
-python/
.cpp
-python/.html
+build^M
+install^M
+python/
.c^M
+python/.cpp^M
+python/
.html^M
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7feaeee..8bb6ec5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,77 +1,77 @@
-PROJECT (coccoc-tokenizer)
-CMAKE_MINIMUM_REQUIRED (VERSION 2.8)
-INCLUDE (CMakeMacro.cmake)

-INCLUDE_DIRECTORIES (${PROJECT_BINARY_DIR}/auto)
-INCLUDE_DIRECTORIES (tokenizer)

-ADD_EXECUTABLE (dict_compiler utils/dict_compiler.cpp)
-ADD_EXECUTABLE (tokenizer utils/tokenizer.cpp)
-ADD_EXECUTABLE (vn_lang_tool utils/vn_lang_tool.cpp)

-SET (MULTITERM_DICT_DUMP "multiterm_trie.dump")
-SET (SYLLABLE_DICT_DUMP "syllable_trie.dump")
-SET (NONTONE_PAIR_DICT_DUMP "nontone_pair_freq_map.dump")

-LIST (APPEND DICT_DUMPS ${MULTITERM_DICT_DUMP})
-LIST (APPEND DICT_DUMPS ${SYLLABLE_DICT_DUMP})
-LIST (APPEND DICT_DUMPS ${NONTONE_PAIR_DICT_DUMP})

-# We don't use GLOB here because it also takes hidden files and we are too lazy to cope with it
-LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/acronyms")
-LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/chemical_comp")
-LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/Freq2NontoneUniFile")
-LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/keyword.freq")
-LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/nontone_pair_freq")
-LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/special_token.strong")
-LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/special_token.weak")
-LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/vndic_multiterm")

-ADD_CUSTOM_TARGET (compile_dict ALL DEPENDS ${DICT_DUMPS})
-ADD_CUSTOM_COMMAND (

  •   OUTPUT ${DICT_DUMPS}
    
  •   COMMAND ${CMAKE_BINARY_DIR}/dict_compiler "${CMAKE_SOURCE_DIR}/dicts" "${CMAKE_BINARY_DIR}"
    
  •   DEPENDS dict_compiler ${DICT_SOURCES}
    
  •   VERBATIM
    

-)

-INSTALL (TARGETS tokenizer DESTINATION bin)
-INSTALL (TARGETS vn_lang_tool DESTINATION bin)
-INSTALL (TARGETS dict_compiler DESTINATION bin)
-INSTALL (DIRECTORY tokenizer/ DESTINATION include/tokenizer FILES_MATCHING PATTERN ".h")
-INSTALL (DIRECTORY tokenizer/ DESTINATION include/tokenizer FILES_MATCHING PATTERN "
.hpp")
-INSTALL (DIRECTORY tokenizer/ DESTINATION include/tokenizer FILES_MATCHING PATTERN "*.tcc")
-INSTALL_TEMPLATE (tokenizer/config.h.in DESTINATION include/tokenizer)
-INSTALL (DIRECTORY dicts/ DESTINATION share/tokenizer/dicts_text)
-INSTALL (DIRECTORY dicts/vn_lang_tool/ DESTINATION share/tokenizer/dicts)
-INSTALL (FILES ${CMAKE_BINARY_DIR}/${MULTITERM_DICT_DUMP} DESTINATION share/tokenizer/dicts)
-INSTALL (FILES ${CMAKE_BINARY_DIR}/${SYLLABLE_DICT_DUMP} DESTINATION share/tokenizer/dicts)
-INSTALL (FILES ${CMAKE_BINARY_DIR}/${NONTONE_PAIR_DICT_DUMP} DESTINATION share/tokenizer/dicts)

-IF (${BUILD_JAVA})

  •   ADD_CUSTOM_TARGET (compile_java ALL DEPENDS ${CMAKE_BINARY_DIR}/coccoc-tokenizer.jar)
    
  •   ADD_CUSTOM_COMMAND (
    
  •           OUTPUT ${CMAKE_BINARY_DIR}/coccoc-tokenizer.jar
    
  •           COMMAND ${CMAKE_SOURCE_DIR}/java/build_java.sh ${CMAKE_BINARY_DIR}
    
  •           VERBATIM
    
  •   )
    
  •   INSTALL (FILES ${CMAKE_BINARY_DIR}/coccoc-tokenizer.jar DESTINATION share/java)
    
  •   IF(CMAKE_SYSTEM_NAME STREQUAL Darwin)
    
  •           INSTALL (FILES ${CMAKE_BINARY_DIR}/libcoccoc_tokenizer_jni.dylib DESTINATION lib)
    
  •   ELSE ()
    
  •           INSTALL (FILES ${CMAKE_BINARY_DIR}/libcoccoc_tokenizer_jni.so DESTINATION lib)
    
  •   ENDIF ()
    

-ENDIF ()

-IF (${BUILD_PYTHON})

  •   # XXX Some build files in this target are generated inside source tree, should fix later
    
  •   ADD_CUSTOM_TARGET (compile_python ALL DEPENDS ${CMAKE_BINARY_DIR}/python/lib)
    
  •   ADD_CUSTOM_COMMAND (
    
  •           OUTPUT ${CMAKE_BINARY_DIR}/python/lib
    
  •           COMMAND ${CMAKE_SOURCE_DIR}/python/build_python.sh ${CMAKE_BINARY_DIR} install --prefix=${CMAKE_BINARY_DIR}/python
    
  •           VERBATIM
    
  •   )
    
  •   INSTALL (DIRECTORY ${CMAKE_BINARY_DIR}/python/lib/ DESTINATION lib)
    

-ENDIF ()
+PROJECT (coccoc-tokenizer)^M
+CMAKE_MINIMUM_REQUIRED (VERSION 2.8)^M
+INCLUDE (CMakeMacro.cmake)^M
+^M
+INCLUDE_DIRECTORIES (${PROJECT_BINARY_DIR}/auto)^M
+INCLUDE_DIRECTORIES (tokenizer)^M
+^M
+ADD_EXECUTABLE (dict_compiler utils/dict_compiler.cpp)^M
+ADD_EXECUTABLE (tokenizer utils/tokenizer.cpp)^M
+ADD_EXECUTABLE (vn_lang_tool utils/vn_lang_tool.cpp)^M
+^M
+SET (MULTITERM_DICT_DUMP "multiterm_trie.dump")^M
+SET (SYLLABLE_DICT_DUMP "syllable_trie.dump")^M
+SET (NONTONE_PAIR_DICT_DUMP "nontone_pair_freq_map.dump")^M
+^M
+LIST (APPEND DICT_DUMPS ${MULTITERM_DICT_DUMP})^M
+LIST (APPEND DICT_DUMPS ${SYLLABLE_DICT_DUMP})^M
+LIST (APPEND DICT_DUMPS ${NONTONE_PAIR_DICT_DUMP})^M
+^M
+# We don't use GLOB here because it also takes hidden files and we are too lazy to cope with it^M
+LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/acronyms")^M
+LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/chemical_comp")^M
+LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/Freq2NontoneUniFile")^M
+LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/keyword.freq")^M
+LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/nontone_pair_freq")^M
+LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/special_token.strong")^M
+LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/special_token.weak")^M
+LIST (APPEND DICT_SOURCES "${CMAKE_SOURCE_DIR}/dicts/tokenizer/vndic_multiterm")^M
+^M
+ADD_CUSTOM_TARGET (compile_dict ALL DEPENDS ${DICT_DUMPS})^M
+ADD_CUSTOM_COMMAND (^M

  •   OUTPUT ${DICT_DUMPS}^M
    
  •   COMMAND ${CMAKE_BINARY_DIR}/dict_compiler "${CMAKE_SOURCE_DIR}/dicts" "${CMAKE_BINARY_DIR}"^M
    
  •   DEPENDS dict_compiler ${DICT_SOURCES}^M
    
  •   VERBATIM^M
    

+)^M
+^M
+INSTALL (TARGETS tokenizer DESTINATION bin)^M
+INSTALL (TARGETS vn_lang_tool DESTINATION bin)^M
+INSTALL (TARGETS dict_compiler DESTINATION bin)^M
+INSTALL (DIRECTORY tokenizer/ DESTINATION include/tokenizer FILES_MATCHING PATTERN ".h")^M
+INSTALL (DIRECTORY tokenizer/ DESTINATION include/tokenizer FILES_MATCHING PATTERN "
.hpp")^M
+INSTALL (DIRECTORY tokenizer/ DESTINATION include/tokenizer FILES_MATCHING PATTERN "*.tcc")^M
+INSTALL_TEMPLATE (tokenizer/config.h.in DESTINATION include/tokenizer)^M
+INSTALL (DIRECTORY dicts/ DESTINATION share/tokenizer/dicts_text)^M
+INSTALL (DIRECTORY dicts/vn_lang_tool/ DESTINATION share/tokenizer/dicts)^M
+INSTALL (FILES ${CMAKE_BINARY_DIR}/${MULTITERM_DICT_DUMP} DESTINATION share/tokenizer/dicts)^M
+INSTALL (FILES ${CMAKE_BINARY_DIR}/${SYLLABLE_DICT_DUMP} DESTINATION share/tokenizer/dicts)^M
+INSTALL (FILES ${CMAKE_BINARY_DIR}/${NONTONE_PAIR_DICT_DUMP} DESTINATION share/tokenizer/dicts)^M
+^M
+IF (${BUILD_JAVA})^M

  •   ADD_CUSTOM_TARGET (compile_java ALL DEPENDS ${CMAKE_BINARY_DIR}/coccoc-tokenizer.jar)^M
    
  •   ADD_CUSTOM_COMMAND (^M
    
  •           OUTPUT ${CMAKE_BINARY_DIR}/coccoc-tokenizer.jar^M
    
  •           COMMAND ${CMAKE_SOURCE_DIR}/java/build_java.sh ${CMAKE_BINARY_DIR}^M
    
  •           VERBATIM^M
    
  •   )^M
    
  •   INSTALL (FILES ${CMAKE_BINARY_DIR}/coccoc-tokenizer.jar DESTINATION share/java)^M
    

+^M

  •   IF(CMAKE_SYSTEM_NAME STREQUAL Darwin)^M
    
  •           INSTALL (FILES ${CMAKE_BINARY_DIR}/libcoccoc_tokenizer_jni.dylib DESTINATION lib)^M
    
  •   ELSE ()^M
    
  •           INSTALL (FILES ${CMAKE_BINARY_DIR}/libcoccoc_tokenizer_jni.so DESTINATION lib)^M
    
  •   ENDIF ()^M
    

+^M
+ENDIF ()^M
+^M
+IF (${BUILD_PYTHON})^M

  •   # XXX Some build files in this target are generated inside source tree, should fix later^M
    
  •   ADD_CUSTOM_TARGET (compile_python ALL DEPENDS ${CMAKE_BINARY_DIR}/python/lib)^M
    
  •   ADD_CUSTOM_COMMAND (^M
    
  •           OUTPUT ${CMAKE_BINARY_DIR}/python/lib^M
    
  •           COMMAND ${CMAKE_SOURCE_DIR}/python/build_python.sh ${CMAKE_BINARY_DIR} install --prefix=${CMAKE_BINARY_DIR}/python^M
    
  •           VERBATIM^M
    
  •   )^M
    
  •   INSTALL (DIRECTORY ${CMAKE_BINARY_DIR}/python/lib/ DESTINATION lib)^M
    

+ENDIF ()^M
diff --git a/CMakeMacro.cmake b/CMakeMacro.cmake
index bd0aaf6..4591db2 100644
--- a/CMakeMacro.cmake
+++ b/CMakeMacro.cmake
@@ -1,236 +1,236 @@
-SET (FLAGS_DEFAULT "-fPIC -pipe")
-SET (FLAGS_WARNING "-Wall -Werror -Wno-long-long -Wno-variadic-macros -Wno-strict-aliasing")# -Wextra -pedantic")
-SET (FLAGS_CXX_LANG "-std=c++11 -Wno-deprecated")
-SET (FLAGS_RELEASE "-O3 -DNDEBUG") # -fomit-frame-pointer -funroll-loops
-SET (FLAGS_DEBUG "-ggdb")

-# This is needed because debian package builder sets -DCMAKE_BUILD_TYPE=None
-IF (CMAKE_BUILD_TYPE STREQUAL None)

  • SET (CMAKE_BUILD_TYPE Release)
    -ENDIF ()

-SET (CMAKE_C_FLAGS_DEBUG "${FLAGS_DEFAULT} ${FLAGS_WARNING} ${FLAGS_DEBUG}")
-SET (CMAKE_C_FLAGS_RELEASE "${FLAGS_DEFAULT} ${FLAGS_WARNING} ${FLAGS_DEBUG} ${FLAGS_RELEASE}")

-SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${FLAGS_CXX_LANG}")
-SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${FLAGS_CXX_LANG}")

-IF (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)

  • SET (CMAKE_BUILD_TYPE RELEASE)
  • SET (CMAKE_BUILD_TYPE RELEASE CACHE STRING "Build type" FORCE)
    -ENDIF (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)

-###############################################################################

-# Enable printf format macros from <inttypes.h> in C++ code.
-ADD_DEFINITIONS (-D__STDC_FORMAT_MACROS)

-# Enable type limit macros from <stdint.h> in C++ code.
-ADD_DEFINITIONS (-D__STDC_LIMIT_MACROS)

-# Enable 64-bit off_t type to work with big files.
-ADD_DEFINITIONS (-D_FILE_OFFSET_BITS=64)

-# Make FIND_LIBRARY search for static libs first and make it search inside lib64/
-# directory in addition to the usual lib/ one.

-SET (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX} ${CMAKE_SHARED_LIBRARY_SUFFIX})
-SET (CMAKE_FIND_LIBRARY_PREFIXES ${CMAKE_STATIC_LIBRARY_PREFIX} ${CMAKE_SHARED_LIBRARY_PREFIX})
-SET (FIND_LIBRARY_USE_LIB64_PATHS TRUE)
-SET (LINK_SEARCH_END_STATIC TRUE)

-# Include source tree root, include directory inside it and build tree root,
-# which is for files, generated by cmake from templates (e.g. autogenerated
-# C/C++ includes).

-INCLUDE_DIRECTORIES (${PROJECT_BINARY_DIR})
-INCLUDE_DIRECTORIES (${PROJECT_SOURCE_DIR})

-###############################################################################
-# USE_PROGRAM (bin)
-# -----------------------------------------------------------------------------
-# Find program [bin] using standard FIND_PROGRAM command and save its path into
-# variable named BIN_[bin].

-MACRO (USE_PROGRAM bin)

  • FIND_PROGRAM (BIN_${bin} ${bin})
  • IF (BIN_${bin})
  • MESSAGE (STATUS "FOUND ${BIN_${bin}}")
  • ELSE ()
  • MESSAGE (STATUS "ERROR ${BIN_${bin}}")
  • ENDIF ()
    -ENDMACRO (USE_PROGRAM)

-# USE_INCLUDE (inc [FIND_PATH_ARGS ...])
-# -----------------------------------------------------------------------------
-# Find include [inc] using standard FIND_PATH command and save its dirname into
-# variable named INC_[inc]. Also include its dirname into project.

-MACRO (USE_INCLUDE inc)

  • FIND_PATH (INC_${inc} ${inc} ${ARGN})
  • IF (INC_${inc})
  • MESSAGE (STATUS "FOUND ${INC_${inc}}/${inc}") # SHOULD BE BOLD GREEN
  • INCLUDE_DIRECTORIES (${INC_${inc}})
  • ELSE ()
  • MESSAGE (STATUS "ERROR ${INC_${inc}}/${inc}") # SHOULD BE BOLD RED
  • ENDIF ()
    -ENDMACRO (USE_INCLUDE)

-# USE_LIBRARY (lib [FIND_LIBRARY_ARGS ...])
-# -----------------------------------------------------------------------------
-# Find library [lib] using standard FIND_LIBRARY command and save its path into
-# variable named LIB_[lib].

-MACRO (USE_LIBRARY lib)

  • FIND_LIBRARY (LIB_${lib} ${lib} ${ARGN})
  • IF (LIB_${lib})
  • MESSAGE (STATUS "FOUND ${LIB_${lib}}") # SHOULD BE BOLD GREEN
  • ELSE ()
  • MESSAGE (STATUS "ERROR ${LIB_${lib}}") # SHOULD BE BOLD RED
  • ENDIF ()
    -ENDMACRO (USE_LIBRARY)

-# USE_PACKAGE (var lib inc [FIND_PATH_ARGS ...])
-# -----------------------------------------------------------------------------
-# Find package using USE_LIBRARY and USE_INCLUDE macros.

-MACRO (USE_PACKAGE lib inc)

  • USE_LIBRARY (${lib} ${ARGN})
  • USE_INCLUDE (${inc} ${ARGN})
    -ENDMACRO (USE_PACKAGE)

-MACRO (USE_PACKAGE_STATIC lib inc)

  • SET (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX})
  • USE_PACKAGE (${lib} ${inc} ${ARGN})
  • SET (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX} ${CMAKE_SHARED_LIBRARY_SUFFIX})
    -ENDMACRO (USE_PACKAGE_STATIC)

-MACRO (USE_PACKAGE_SHARED lib inc)

  • SET (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_SHARED_LIBRARY_SUFFIX})
  • USE_PACKAGE (${lib} ${inc} ${ARGN})
  • SET (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX} ${CMAKE_SHARED_LIBRARY_SUFFIX})
    -ENDMACRO (USE_PACKAGE_SHARED)

-# USE_SUBPATH (var sub)
-# -----------------------------------------------------------------------------
-# Find subpath [sub] using standard FIND_PATH command and save its dirname into
-# variable named [var].

-MACRO (USE_SUBPATH var sub)

  • FIND_PATH (${var}_PREFIX ${sub} ONLY_CMAKE_FIND_ROOT_PATH)
  • IF (${var}_PREFIX)
  • GET_FILENAME_COMPONENT (${var} "${${var}_PREFIX}/${sub}" PATH)
  • MESSAGE (STATUS "FOUND ${var}=${${var}}")
  • ELSE (${var}_PREFIX)
  • MESSAGE (STATUS "ERROR ${var}")
  • ENDIF (${var}_PREFIX)
    -ENDMACRO (USE_SUBPATH)

-###############################################################################
-# MAKE_LIBRARY (apath <SHARED|STATIC> [LIBRARIES_TO_LINK_WITH [...]])
-# -----------------------------------------------------------------------------
-# Make library of SHARED or STATIC type from source code inside the [apath]
-# subfolder and install it and all header files from the subfolder.

-MACRO (MAKE_LIBRARY apath atype)

  • GET_FILENAME_COMPONENT (${apath}_NAME "${apath}" NAME)
  • AUX_SOURCE_DIRECTORY (${apath} SRC_${${apath}_NAME})
  • ADD_LIBRARY (${${apath}NAME} ${atype} ${SRC${${apath}_NAME}})
  • IF (${ARGC} GREATER 2)
  • TARGET_LINK_LIBRARIES (${${apath}_NAME} ${ARGN})
  • ENDIF (${ARGC} GREATER 2)
  • TODO SET_TARGET_PROPERTIES (...)

  • INSTALL (TARGETS ${${apath}_NAME} DESTINATION lib)
  • INSTALL (DIRECTORY ${apath} DESTINATION include FILES_MATCHING PATTERN "*.h")
  • INSTALL (DIRECTORY ${apath} DESTINATION include FILES_MATCHING PATTERN "*.hpp")
  • INSTALL (DIRECTORY ${apath} DESTINATION include FILES_MATCHING PATTERN "*.tcc")
    -ENDMACRO (MAKE_LIBRARY)

-# MAKE_SHARED (apath [LIBRARIES_TO_LINK_WITH [...]])
-# -----------------------------------------------------------------------------
-# Make SHARED library with MAKE_LIBRARY macro.

-MACRO (MAKE_SHARED apath)

  • MAKE_LIBRARY (${apath} SHARED ${ARGN})
    -ENDMACRO (MAKE_SHARED)

-# MAKE_STATIC (apath [LIBRARIES_TO_LINK_WITH [...]])
-# -----------------------------------------------------------------------------
-# Make STATIC library with MAKE_LIBRARY macro.

-MACRO (MAKE_STATIC apath)

  • MAKE_LIBRARY (${apath} STATIC ${ARGN})
    -ENDMACRO (MAKE_STATIC)

-# MAKE_PROGRAM (apath)
-# -----------------------------------------------------------------------------
-# Make program (executable) from source code inside the [apath] subfolder and
-# install it.

-MACRO (MAKE_PROGRAM apath)

  • GET_FILENAME_COMPONENT (${apath}_NAME "${apath}" NAME)
  • AUX_SOURCE_DIRECTORY (${apath} SRC_${${apath}_NAME})
  • ADD_EXECUTABLE (${${apath}NAME} ${SRC${${apath}_NAME}})
  • IF (${ARGC} GREATER 1)
  • TARGET_LINK_LIBRARIES (${${apath}_NAME} ${ARGN})
  • ENDIF (${ARGC} GREATER 1)
  • INSTALL (TARGETS ${${apath}_NAME} DESTINATION bin)
    -ENDMACRO (MAKE_PROGRAM)

-# MAKE_TEST (apath)
-# -----------------------------------------------------------------------------
-# Make test from source code inside the [apath] subfolder.

-MACRO (MAKE_TEST apath)

  • GET_FILENAME_COMPONENT (${apath}_NAME "${apath}" NAME)
  • AUX_SOURCE_DIRECTORY (${apath} SRC_test_${${apath}_NAME})
  • ADD_EXECUTABLE (test_${${apath}NAME} ${SRC_test${${apath}_NAME}})
  • IF (${ARGC} GREATER 1)
  • TARGET_LINK_LIBRARIES (test_${${apath}_NAME} ${ARGN})
  • ENDIF (${ARGC} GREATER 1)
  • ADD_TEST (test_${${apath}NAME} test${${apath}_NAME}})
    -ENDMACRO (MAKE_TEST)

-# INSTALL_TEMPLATE (sub [INSTALL_ARGS [...]])
-# -----------------------------------------------------------------------------
-# Install template files (*.in) with one line of code, all arguments except the
-# first one will be left untouched and proxied to INSTALL (FILES) call.

-MACRO (INSTALL_TEMPLATE sub)

  • STRING (REGEX REPLACE "\.in$" "" ${sub}_NOIN ${sub})
  • CONFIGURE_FILE (${sub} ${PROJECT_BINARY_DIR}/auto/${${sub}_NOIN})
  • INSTALL (FILES ${PROJECT_BINARY_DIR}/auto/${${sub}_NOIN} ${ARGN})
    -ENDMACRO (INSTALL_TEMPLATE)

-###############################################################################

-function(PROTOBUF_GENERATE_CPP SRCS HDRS)

  • if(NOT ARGN)
  • message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files")
  • return()
  • endif(NOT ARGN)
  • set(${SRCS})
  • set(${HDRS})
  • foreach(FIL ${ARGN})
  • get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
  • string(REPLACE ".proto" "" FIL_WE ${FIL})
  • list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
  • list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")
  • add_custom_command(
  •  OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc"
    
  •         "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h"
    
  •  COMMAND protoc
    
  •  ARGS --cpp_out  ${CMAKE_CURRENT_BINARY_DIR} --proto_path ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL}
    
  •  DEPENDS ${ABS_FIL}
    
  •  COMMENT "Running C++ protocol buffer compiler on ${FIL}"
    
  •  VERBATIM )
    
  • endforeach()
  • set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
  • set(${SRCS} ${${SRCS}} PARENT_SCOPE)
  • set(${HDRS} ${${HDRS}} PARENT_SCOPE)
    -endfunction()

+SET (FLAGS_DEFAULT "-fPIC -pipe")^M
+SET (FLAGS_WARNING "-Wall -Werror -Wno-long-long -Wno-variadic-macros -Wno-strict-aliasing")# -Wextra -pedantic")^M
+SET (FLAGS_CXX_LANG "-std=c++11 -Wno-deprecated")^M
+SET (FLAGS_RELEASE "-O3 -DNDEBUG") # -fomit-frame-pointer -funroll-loops^M
+SET (FLAGS_DEBUG "-ggdb")^M
+^M
+# This is needed because debian package builder sets -DCMAKE_BUILD_TYPE=None^M
+IF (CMAKE_BUILD_TYPE STREQUAL None)^M

  • SET (CMAKE_BUILD_TYPE Release)^M
    +ENDIF ()^M
    +^M
    +SET (CMAKE_C_FLAGS_DEBUG "${FLAGS_DEFAULT} ${FLAGS_WARNING} ${FLAGS_DEBUG}")^M
    +SET (CMAKE_C_FLAGS_RELEASE "${FLAGS_DEFAULT} ${FLAGS_WARNING} ${FLAGS_DEBUG} ${FLAGS_RELEASE}")^M
    +^M
    +SET (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} ${FLAGS_CXX_LANG}")^M
    +SET (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${FLAGS_CXX_LANG}")^M
    +^M
    +IF (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)^M
  • SET (CMAKE_BUILD_TYPE RELEASE)^M
  • SET (CMAKE_BUILD_TYPE RELEASE CACHE STRING "Build type" FORCE)^M
    +ENDIF (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)^M
    +^M
    +###############################################################################^M
    +^M
    +# Enable printf format macros from <inttypes.h> in C++ code.^M
    +ADD_DEFINITIONS (-D__STDC_FORMAT_MACROS)^M
    +^M
    +# Enable type limit macros from <stdint.h> in C++ code.^M
    +ADD_DEFINITIONS (-D__STDC_LIMIT_MACROS)^M
    +^M
    +# Enable 64-bit off_t type to work with big files.^M
    +ADD_DEFINITIONS (-D_FILE_OFFSET_BITS=64)^M
    +^M
    +# Make FIND_LIBRARY search for static libs first and make it search inside lib64/^M
    +# directory in addition to the usual lib/ one.^M
    +^M
    +SET (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX} ${CMAKE_SHARED_LIBRARY_SUFFIX})^M
    +SET (CMAKE_FIND_LIBRARY_PREFIXES ${CMAKE_STATIC_LIBRARY_PREFIX} ${CMAKE_SHARED_LIBRARY_PREFIX})^M
    +SET (FIND_LIBRARY_USE_LIB64_PATHS TRUE)^M
    +SET (LINK_SEARCH_END_STATIC TRUE)^M
    +^M
    +# Include source tree root, include directory inside it and build tree root,^M
    +# which is for files, generated by cmake from templates (e.g. autogenerated^M
    +# C/C++ includes).^M
    +^M
    +INCLUDE_DIRECTORIES (${PROJECT_BINARY_DIR})^M
    +INCLUDE_DIRECTORIES (${PROJECT_SOURCE_DIR})^M
    +^M
    +###############################################################################^M
    +# USE_PROGRAM (bin)^M
    +# -----------------------------------------------------------------------------^M
    +# Find program [bin] using standard FIND_PROGRAM command and save its path into^M
    +# variable named BIN_[bin].^M
    +^M
    +MACRO (USE_PROGRAM bin)^M
  • FIND_PROGRAM (BIN_${bin} ${bin})^M
  • IF (BIN_${bin})^M
  • MESSAGE (STATUS "FOUND ${BIN_${bin}}")^M
  • ELSE ()^M
  • MESSAGE (STATUS "ERROR ${BIN_${bin}}")^M
  • ENDIF ()^M
    +ENDMACRO (USE_PROGRAM)^M
    +^M
    +# USE_INCLUDE (inc [FIND_PATH_ARGS ...])^M
    +# -----------------------------------------------------------------------------^M
    +# Find include [inc] using standard FIND_PATH command and save its dirname into^M
    +# variable named INC_[inc]. Also include its dirname into project.^M
    +^M
    +MACRO (USE_INCLUDE inc)^M
  • FIND_PATH (INC_${inc} ${inc} ${ARGN})^M
  • IF (INC_${inc})^M
  • MESSAGE (STATUS "FOUND ${INC_${inc}}/${inc}") # SHOULD BE BOLD GREEN^M
  • INCLUDE_DIRECTORIES (${INC_${inc}})^M
  • ELSE ()^M
  • MESSAGE (STATUS "ERROR ${INC_${inc}}/${inc}") # SHOULD BE BOLD RED^M
  • ENDIF ()^M
    +ENDMACRO (USE_INCLUDE)^M
    +^M
    +# USE_LIBRARY (lib [FIND_LIBRARY_ARGS ...])^M
    +# -----------------------------------------------------------------------------^M
    +# Find library [lib] using standard FIND_LIBRARY command and save its path into^M
    +# variable named LIB_[lib].^M
    +^M
    +MACRO (USE_LIBRARY lib)^M
  • FIND_LIBRARY (LIB_${lib} ${lib} ${ARGN})^M
  • IF (LIB_${lib})^M
  • MESSAGE (STATUS "FOUND ${LIB_${lib}}") # SHOULD BE BOLD GREEN^M
  • ELSE ()^M
  • MESSAGE (STATUS "ERROR ${LIB_${lib}}") # SHOULD BE BOLD RED^M
  • ENDIF ()^M
    +ENDMACRO (USE_LIBRARY)^M
    +^M
    +# USE_PACKAGE (var lib inc [FIND_PATH_ARGS ...])^M
    +# -----------------------------------------------------------------------------^M
    +# Find package using USE_LIBRARY and USE_INCLUDE macros.^M
    +^M
    +MACRO (USE_PACKAGE lib inc)^M
  • USE_LIBRARY (${lib} ${ARGN})^M
  • USE_INCLUDE (${inc} ${ARGN})^M
    +ENDMACRO (USE_PACKAGE)^M
    +^M
    +MACRO (USE_PACKAGE_STATIC lib inc)^M
  • SET (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX})^M
  • USE_PACKAGE (${lib} ${inc} ${ARGN})^M
  • SET (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX} ${CMAKE_SHARED_LIBRARY_SUFFIX})^M
    +ENDMACRO (USE_PACKAGE_STATIC)^M
    +^M
    +MACRO (USE_PACKAGE_SHARED lib inc)^M
  • SET (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_SHARED_LIBRARY_SUFFIX})^M
  • USE_PACKAGE (${lib} ${inc} ${ARGN})^M
  • SET (CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_STATIC_LIBRARY_SUFFIX} ${CMAKE_SHARED_LIBRARY_SUFFIX})^M
    +ENDMACRO (USE_PACKAGE_SHARED)^M
    +^M
    +# USE_SUBPATH (var sub)^M
    +# -----------------------------------------------------------------------------^M
    +# Find subpath [sub] using standard FIND_PATH command and save its dirname into^M
    +# variable named [var].^M
    +^M
    +MACRO (USE_SUBPATH var sub)^M
  • FIND_PATH (${var}_PREFIX ${sub} ONLY_CMAKE_FIND_ROOT_PATH)^M
  • IF (${var}_PREFIX)^M
  • GET_FILENAME_COMPONENT (${var} "${${var}_PREFIX}/${sub}" PATH)^M
  • MESSAGE (STATUS "FOUND ${var}=${${var}}")^M
  • ELSE (${var}_PREFIX)^M
  • MESSAGE (STATUS "ERROR ${var}")^M
  • ENDIF (${var}_PREFIX)^M
    +ENDMACRO (USE_SUBPATH)^M
    +^M
    +###############################################################################^M
    +# MAKE_LIBRARY (apath <SHARED|STATIC> [LIBRARIES_TO_LINK_WITH [...]])^M
    +# -----------------------------------------------------------------------------^M
    +# Make library of SHARED or STATIC type from source code inside the [apath]^M
    +# subfolder and install it and all header files from the subfolder.^M
    +^M
    +MACRO (MAKE_LIBRARY apath atype)^M
  • GET_FILENAME_COMPONENT (${apath}_NAME "${apath}" NAME)^M
  • AUX_SOURCE_DIRECTORY (${apath} SRC_${${apath}_NAME})^M
  • ADD_LIBRARY (${${apath}NAME} ${atype} ${SRC${${apath}_NAME}})^M
  • IF (${ARGC} GREATER 2)^M
  • TARGET_LINK_LIBRARIES (${${apath}_NAME} ${ARGN})^M
  • ENDIF (${ARGC} GREATER 2)^M
  • TODO SET_TARGET_PROPERTIES (...)^M

  • INSTALL (TARGETS ${${apath}_NAME} DESTINATION lib)^M
  • INSTALL (DIRECTORY ${apath} DESTINATION include FILES_MATCHING PATTERN "*.h")^M
  • INSTALL (DIRECTORY ${apath} DESTINATION include FILES_MATCHING PATTERN "*.hpp")^M
  • INSTALL (DIRECTORY ${apath} DESTINATION include FILES_MATCHING PATTERN "*.tcc")^M
    +ENDMACRO (MAKE_LIBRARY)^M
    +^M
    +# MAKE_SHARED (apath [LIBRARIES_TO_LINK_WITH [...]])^M
    +# -----------------------------------------------------------------------------^M
    +# Make SHARED library with MAKE_LIBRARY macro.^M
    +^M
    +MACRO (MAKE_SHARED apath)^M
  • MAKE_LIBRARY (${apath} SHARED ${ARGN})^M
    +ENDMACRO (MAKE_SHARED)^M
    +^M
    +# MAKE_STATIC (apath [LIBRARIES_TO_LINK_WITH [...]])^M
    +# -----------------------------------------------------------------------------^M
    +# Make STATIC library with MAKE_LIBRARY macro.^M
    +^M
    +MACRO (MAKE_STATIC apath)^M
  • MAKE_LIBRARY (${apath} STATIC ${ARGN})^M
    +ENDMACRO (MAKE_STATIC)^M
    +^M
    +# MAKE_PROGRAM (apath)^M
    +# -----------------------------------------------------------------------------^M
    +# Make program (executable) from source code inside the [apath] subfolder and^M
    +# install it.^M
    +^M
    +MACRO (MAKE_PROGRAM apath)^M
  • GET_FILENAME_COMPONENT (${apath}_NAME "${apath}" NAME)^M
  • AUX_SOURCE_DIRECTORY (${apath} SRC_${${apath}_NAME})^M
  • ADD_EXECUTABLE (${${apath}NAME} ${SRC${${apath}_NAME}})^M
  • IF (${ARGC} GREATER 1)^M
  • TARGET_LINK_LIBRARIES (${${apath}_NAME} ${ARGN})^M
  • ENDIF (${ARGC} GREATER 1)^M
  • INSTALL (TARGETS ${${apath}_NAME} DESTINATION bin)^M
    +ENDMACRO (MAKE_PROGRAM)^M
    +^M
    +# MAKE_TEST (apath)^M
    +# -----------------------------------------------------------------------------^M
    +# Make test from source code inside the [apath] subfolder.^M
    +^M
    +MACRO (MAKE_TEST apath)^M
  • GET_FILENAME_COMPONENT (${apath}_NAME "${apath}" NAME)^M
  • AUX_SOURCE_DIRECTORY (${apath} SRC_test_${${apath}_NAME})^M
  • ADD_EXECUTABLE (test_${${apath}NAME} ${SRC_test${${apath}_NAME}})^M
  • IF (${ARGC} GREATER 1)^M
  • TARGET_LINK_LIBRARIES (test_${${apath}_NAME} ${ARGN})^M
  • ENDIF (${ARGC} GREATER 1)^M
  • ADD_TEST (test_${${apath}NAME} test${${apath}_NAME}})^M
    +ENDMACRO (MAKE_TEST)^M
    +^M
    +# INSTALL_TEMPLATE (sub [INSTALL_ARGS [...]])^M
    +# -----------------------------------------------------------------------------^M
    +# Install template files (*.in) with one line of code, all arguments except the^M
    +# first one will be left untouched and proxied to INSTALL (FILES) call.^M
    +^M
    +MACRO (INSTALL_TEMPLATE sub)^M
  • STRING (REGEX REPLACE "\.in$" "" ${sub}_NOIN ${sub})^M
  • CONFIGURE_FILE (${sub} ${PROJECT_BINARY_DIR}/auto/${${sub}_NOIN})^M
  • INSTALL (FILES ${PROJECT_BINARY_DIR}/auto/${${sub}_NOIN} ${ARGN})^M
    +ENDMACRO (INSTALL_TEMPLATE)^M
    +^M
    +###############################################################################^M
    +^M
    +function(PROTOBUF_GENERATE_CPP SRCS HDRS)^M
  • if(NOT ARGN)^M
  • message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files")^M
  • return()^M
  • endif(NOT ARGN)^M
    +^M
  • set(${SRCS})^M
  • set(${HDRS})^M
  • foreach(FIL ${ARGN})^M
  • get_filename_component(ABS_FIL ${FIL} ABSOLUTE)^M
  • string(REPLACE ".proto" "" FIL_WE ${FIL})^M
    +^M
  • list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")^M
  • list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")^M
    +^M
  • add_custom_command(^M
  •  OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc"^M
    
  •         "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h"^M
    
  •  COMMAND protoc^M
    
  •  ARGS --cpp_out  ${CMAKE_CURRENT_BINARY_DIR} --proto_path ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL}^M
    
  •  DEPENDS ${ABS_FIL}^M
    
  •  COMMENT "Running C++ protocol buffer compiler on ${FIL}"^M
    
  •  VERBATIM )^M
    
  • endforeach()^M
    +^M
  • set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)^M
  • set(${SRCS} ${${SRCS}} PARENT_SCOPE)^M
  • set(${HDRS} ${${HDRS}} PARENT_SCOPE)^M
    +endfunction()^M
    +^M
    diff --git a/LICENSE b/LICENSE
    index 0a04128..11e8067 100644
    --- a/LICENSE
    +++ b/LICENSE
    @@ -1,165 +1,165 @@
  •               GNU LESSER GENERAL PUBLIC LICENSE
    
  •                   Version 3, 29 June 2007
    
  • Copyright (C) 2007 Free Software Foundation, Inc. https://fsf.org/
  • Everyone is permitted to copy and distribute verbatim copies
  • of this license document, but changing it is not allowed.
  • This version of the GNU Lesser General Public License incorporates
    -the terms and conditions of version 3 of the GNU General Public
    -License, supplemented by the additional permissions listed below.
    1. Additional Definitions.
  • As used herein, "this License" refers to version 3 of the GNU Lesser
    -General Public License, and the "GNU GPL" refers to version 3 of the GNU
    -General Public License.
  • "The Library" refers to a covered work governed by this License,
    -other than an Application or a Combined Work as defined below.
  • An "Application" is any work that makes use of an interface provided
    -by the Library, but which is not otherwise based on the Library.
    -Defining a subclass of a class defined by the Library is deemed a mode
    -of using an interface provided by the Library.
  • A "Combined Work" is a work produced by combining or linking an
    -Application with the Library. The particular version of the Library
    -with which the Combined Work was made is also called the "Linked
    -Version".
  • The "Minimal Corresponding Source" for a Combined Work means the
    -Corresponding Source for the Combined Work, excluding any source code
    -for portions of the Combined Work that, considered in isolation, are
    -based on the Application, and not on the Linked Version.
  • The "Corresponding Application Code" for a Combined Work means the
    -object code and/or source code for the Application, including any data
    -and utility programs needed for reproducing the Combined Work from the
    -Application, but excluding the System Libraries of the Combined Work.
    1. Exception to Section 3 of the GNU GPL.
  • You may convey a covered work under sections 3 and 4 of this License
    -without being bound by section 3 of the GNU GPL.
    1. Conveying Modified Versions.
  • If you modify a copy of the Library, and, in your modifications, a
    -facility refers to a function or data to be supplied by an Application
    -that uses the facility (other than as an argument passed when the
    -facility is invoked), then you may convey a copy of the modified
    -version:
  • a) under this License, provided that you make a good faith effort to
  • ensure that, in the event an Application does not supply the
  • function or data, the facility still operates, and performs
  • whatever part of its purpose remains meaningful, or
  • b) under the GNU GPL, with none of the additional permissions of
  • this License applicable to that copy.
    1. Object Code Incorporating Material from Library Header Files.
  • The object code form of an Application may incorporate material from
    -a header file that is part of the Library. You may convey such object
    -code under terms of your choice, provided that, if the incorporated
    -material is not limited to numerical parameters, data structure
    -layouts and accessors, or small macros, inline functions and templates
    -(ten or fewer lines in length), you do both of the following:
  • a) Give prominent notice with each copy of the object code that the
  • Library is used in it and that the Library and its use are
  • covered by this License.
  • b) Accompany the object code with a copy of the GNU GPL and this license
  • document.
    1. Combined Works.
  • You may convey a Combined Work under terms of your choice that,
    -taken together, effectively do not restrict modification of the
    -portions of the Library contained in the Combined Work and reverse
    -engineering for debugging such modifications, if you also do each of
    -the following:
  • a) Give prominent notice with each copy of the Combined Work that
  • the Library is used in it and that the Library and its use are
  • covered by this License.
  • b) Accompany the Combined Work with a copy of the GNU GPL and this license
  • document.
  • c) For a Combined Work that displays copyright notices during
  • execution, include the copyright notice for the Library among
  • these notices, as well as a reference directing the user to the
  • copies of the GNU GPL and this license document.
  • d) Do one of the following:
  •   0) Convey the Minimal Corresponding Source under the terms of this
    
  •   License, and the Corresponding Application Code in a form
    
  •   suitable for, and under terms that permit, the user to
    
  •   recombine or relink the Application with a modified version of
    
  •   the Linked Version to produce a modified Combined Work, in the
    
  •   manner specified by section 6 of the GNU GPL for conveying
    
  •   Corresponding Source.
    
  •   1) Use a suitable shared library mechanism for linking with the
    
  •   Library.  A suitable mechanism is one that (a) uses at run time
    
  •   a copy of the Library already present on the user's computer
    
  •   system, and (b) will operate properly with a modified version
    
  •   of the Library that is interface-compatible with the Linked
    
  •   Version.
    
  • e) Provide Installation Information, but only if you would otherwise
  • be required to provide such information under section 6 of the
  • GNU GPL, and only to the extent that such information is
  • necessary to install and execute a modified version of the
  • Combined Work produced by recombining or relinking the
  • Application with a modified version of the Linked Version. (If
  • you use option 4d0, the Installation Information must accompany
  • the Minimal Corresponding Source and Corresponding Application
  • Code. If you use option 4d1, you must provide the Installation
  • Information in the manner specified by section 6 of the GNU GPL
  • for conveying Corresponding Source.)
    1. Combined Libraries.
  • You may place library facilities that are a work based on the
    -Library side by side in a single library together with other library
    -facilities that are not Applications and are not covered by this
    -License, and convey such a combined library under terms of your
    -choice, if you do both of the following:
  • a) Accompany the combined library with a copy of the same work based
  • on the Library, uncombined with any other library facilities,
  • conveyed under the terms of this License.
  • b) Give prominent notice with the combined library that part of it
  • is a work based on the Library, and explaining where to find the
  • accompanying uncombined form of the same work.
    1. Revised Versions of the GNU Lesser General Public License.
  • The Free Software Foundation may publish revised and/or new versions
    -of the GNU Lesser General Public License from time to time. Such new
    -versions will be similar in spirit to the present version, but may
    -differ in detail to address new problems or concerns.
  • Each version is given a distinguishing version number. If the
    -Library as you received it specifies that a certain numbered version
    -of the GNU Lesser General Public License "or any later version"
    -applies to it, you have the option of following the terms and
    -conditions either of that published version or of any later version
    -published by the Free Software Foundation. If the Library as you
    -received it does not specify a version number of the GNU Lesser
    -General Public License, you may choose any version of the GNU Lesser
    -General Public License ever published by the Free Software Foundation.
  • If the Library as you received it specifies that a proxy can decide
    -whether future versions of the GNU Lesser General Public License shall
    -apply, that proxy's public statement of acceptance of any version is
    -permanent authorization for you to choose that version for the
    -Library.
  •               GNU LESSER GENERAL PUBLIC LICENSE^M
    
  •                   Version 3, 29 June 2007^M
    

+^M

  • Copyright (C) 2007 Free Software Foundation, Inc. https://fsf.org/^M
  • Everyone is permitted to copy and distribute verbatim copies^M
  • of this license document, but changing it is not allowed.^M
    +^M
    +^M
  • This version of the GNU Lesser General Public License incorporates^M
    +the terms and conditions of version 3 of the GNU General Public^M
    +License, supplemented by the additional permissions listed below.^M
    +^M
    1. Additional Definitions.^M
      +^M
  • As used herein, "this License" refers to version 3 of the GNU Lesser^M
    +General Public License, and the "GNU GPL" refers to version 3 of the GNU^M
    +General Public License.^M
    +^M
  • "The Library" refers to a covered work governed by this License,^M
    +other than an Application or a Combined Work as defined below.^M
    +^M
  • An "Application" is any work that makes use of an interface provided^M
    +by the Library, but which is not otherwise based on the Library.^M
    +Defining a subclass of a class defined by the Library is deemed a mode^M
    +of using an interface provided by the Library.^M
    +^M
  • A "Combined Work" is a work produced by combining or linking an^M
    +Application with the Library. The particular version of the Library^M
    +with which the Combined Work was made is also called the "Linked^M
    +Version".^M
    +^M
  • The "Minimal Corresponding Source" for a Combined Work means the^M
    +Corresponding Source for the Combined Work, excluding any source code^M
    +for portions of the Combined Work that, considered in isolation, are^M
    +based on the Application, and not on the Linked Version.^M
    +^M
  • The "Corresponding Application Code" for a Combined Work means the^M
    +object code and/or source code for the Application, including any data^M
    +and utility programs needed for reproducing the Combined Work from the^M
    +Application, but excluding the System Libraries of the Combined Work.^M
    +^M
    1. Exception to Section 3 of the GNU GPL.^M
      +^M
  • You may convey a covered work under sections 3 and 4 of this License^M
    +without being bound by section 3 of the GNU GPL.^M
    +^M
    1. Conveying Modified Versions.^M
      +^M
  • If you modify a copy of the Library, and, in your modifications, a^M
    +facility refers to a function or data to be supplied by an Application^M
    +that uses the facility (other than as an argument passed when the^M
    +facility is invoked), then you may convey a copy of the modified^M
    +version:^M
    +^M
  • a) under this License, provided that you make a good faith effort to^M
  • ensure that, in the event an Application does not supply the^M
  • function or data, the facility still operates, and performs^M
  • whatever part of its purpose remains meaningful, or^M
    +^M
  • b) under the GNU GPL, with none of the additional permissions of^M
  • this License applicable to that copy.^M
    +^M
    1. Object Code Incorporating Material from Library Header Files.^M
      +^M
  • The object code form of an Application may incorporate material from^M
    +a header file that is part of the Library. You may convey such object^M
    +code under terms of your choice, provided that, if the incorporated^M
    +material is not limited to numerical parameters, data structure^M
    +layouts and accessors, or small macros, inline functions and templates^M
    +(ten or fewer lines in length), you do both of the following:^M
    +^M
  • a) Give prominent notice with each copy of the object code that the^M
  • Library is used in it and that the Library and its use are^M
  • covered by this License.^M
    +^M
  • b) Accompany the object code with a copy of the GNU GPL and this license^M
  • document.^M
    +^M
    1. Combined Works.^M
      +^M
  • You may convey a Combined Work under terms of your choice that,^M
    +taken together, effectively do not restrict modification of the^M
    +portions of the Library contained in the Combined Work and reverse^M
    +engineering for debugging such modifications, if you also do each of^M
    +the following:^M
    +^M
  • a) Give prominent notice with each copy of the Combined Work that^M
  • the Library is used in it and that the Library and its use are^M
  • covered by this License.^M
    +^M
  • b) Accompany the Combined Work with a copy of the GNU GPL and this license^M
  • document.^M
    +^M
  • c) For a Combined Work that displays copyright notices during^M
  • execution, include the copyright notice for the Library among^M
  • these notices, as well as a reference directing the user to the^M
  • copies of the GNU GPL and this license document.^M
    +^M
  • d) Do one of the following:^M
    +^M
  •   0) Convey the Minimal Corresponding Source under the terms of this^M
    
  •   License, and the Corresponding Application Code in a form^M
    
  •   suitable for, and under terms that permit, the user to^M
    
  •   recombine or relink the Application with a modified version of^M
    
  •   the Linked Version to produce a modified Combined Work, in the^M
    
  •   manner specified by section 6 of the GNU GPL for conveying^M
    
  •   Corresponding Source.^M
    

+^M

  •   1) Use a suitable shared library mechanism for linking with the^M
    
  •   Library.  A suitable mechanism is one that (a) uses at run time^M
    
  •   a copy of the Library already present on the user's computer^M
    
  •   system, and (b) will operate properly with a modified version^M
    
  •   of the Library that is interface-compatible with the Linked^M
    
  •   Version.^M
    

+^M

  • e) Provide Installation Information, but only if you would otherwise^M
  • be required to provide such information under section 6 of the^M
  • GNU GPL, and only to the extent that such information is^M
  • necessary to install and execute a modified version of the^M
  • Combined Work produced by recombining or relinking the^M
  • Application with a modified version of the Linked Version. (If^M
  • you use option 4d0, the Installation Information must accompany^M
  • the Minimal Corresponding Source and Corresponding Application^M
  • Code. If you use option 4d1, you must provide the Installation^M
  • Information in the manner specified by section 6 of the GNU GPL^M
  • for conveying Corresponding Source.)^M
    +^M
    1. Combined Libraries.^M
      +^M
  • You may place library facilities that are a work based on the^M
    +Library side by side in a single library together with other library^M
    +facilities that are not Applications and are not covered by this^M
    +License, and convey such a combined library under terms of your^M
    +choice, if you do both of the following:^M
    +^M
  • a) Accompany the combined library with a copy of the same work based^M
  • on the Library, uncombined with any other library facilities,^M
  • conveyed under the terms of this License.^M
    +^M
  • b) Give prominent notice with the combined library that part of it^M
  • is a work based on the Library, and explaining where to find the^M
  • accompanying uncombined form of the same work.^M
    +^M
    1. Revised Versions of the GNU Lesser General Public License.^M
      +^M
  • The Free Software Foundation may publish revised and/or new versions^M
    +of the GNU Lesser General Public License from time to time. Such new^M
    +versions will be similar in spirit to the present version, but may^M
    +differ in detail to address new problems or concerns.^M
    +^M
  • Each version is given a distinguishing version number. If the^M
    +Library as you received it specifies that a certain numbered version^M
    +of the GNU Lesser General Public License "or any later version"^M
    +applies to it, you have the option of following the terms and^M
    +conditions either of that published version or of any later version^M
    +published by the Free Software Foundation. If the Library as you^M
    +received it does not specify a version number of the GNU Lesser^M
    +General Public License, you may choose any version of the GNU Lesser^M
    +General Public License ever published by the Free Software Foundation.^M
    +^M
  • If the Library as you received it specifies that a proxy can decide^M
    +whether future versions of the GNU Lesser General Public License shall^M
    +apply, that proxy's public statement of acceptance of any version is^M
    +permanent authorization for you to choose that version for the^M
    +Library.^M
    diff --git a/README.md b/README.md
    index 68c401a..b94959e 100644
    --- a/README.md
    +++ b/README.md
    @@ -1,236 +1,236 @@
    -# C++ tokenizer for Vietnamese

-This project provides tokenizer library for Vietnamese language and 2 command line tools for tokenization and some simple Vietnamese-specific operations with text (i.e. remove diacritics). It is used in Cốc Cốc Search and Ads systems and the main goal in its development was to reach high performance while keeping the quality reasonable for search ranking needs.

-### Installing

-Building from source and installing into sandbox (or into the system):

- -$ mkdir build && cd build -$ cmake .. -# make install -

-To include java bindings:

- -$ mkdir build && cd build -$ cmake -DBUILD_JAVA=1 .. -# make install -

-To include python bindings - install cython package and compile wrapper code (only Python3 is supported):

- -$ mkdir build && cd build -$ cmake -DBUILD_PYTHON=1 .. -# make install -

-Building debian package can be done with debhelper tools:

- -$ dpkg-buildpackage <options> # from source tree root -

-If you want to build and install everything into your sandbox, you can use something like this (it will build everything and install into ~/.local, which is considered as a standard sandbox PREFIX by many applications and frameworks):
- -$ mdkir build && cd build -$ cmake -DBUILD_JAVA=1 -DBUILD_PYTHON=1 -DCMAKE_INSTALL_PREFIX=~/.local .. -$ make install -

-## Using the tools

-Both tools will show their usage with --help option. Both tools can accept either command line arguments or stdin as an input (if both provided, command line arguments are preferred). If stdin is used, each line is considered as one separate argument. The output format is TAB-separated tokens of the original phrase (note that Vietnamese tokens can have whitespaces inside). There's a few examples of usage below.

-Tokenize command line argument:

- -$ tokenizer "Từng bước để trở thành một lập trình viên giỏi" -từng bước để trở thành một lập trình viên giỏi -

-Note that it may take one or two seconds for tokenizer to load due to one comparably big dictionary used to tokenize "sticky phrases" (when people write words without spacing). You can disable it by using -n option and the tokenizer will be up in no time. The default behaviour about "sticky phrases" is to only try to split them within urls or domains. With -n you can disable it completely and with -u you can force using it for the whole text. Compare:

-```
-$ tokenizer "toisongohanoi, tôi đăng ký trên thegioididong.vn"
-toisongohanoi tôi đăng ký trên the gioi di dong vn

-$ tokenizer -n "toisongohanoi, tôi đăng ký trên thegioididong.vn"
-toisongohanoi tôi đăng ký trên thegioididong vn

-$ tokenizer -u "toisongohanoi, tôi đăng ký trên thegioididong.vn"
-toi song o ha noi tôi đăng ký trên the gioi di dong vn
-```

-To avoid reloading dictionaries for every phrase, you can pass phrases from stdin. Here's an example (note that the first line of output is empty - that means empty result for "/" input line):

-```
-$ echo -ne "/\nanh yêu em\nbún chả ở nhà hàng Quán Ăn Ngon ko ngon\n" | tokenizer

-anh yêu em
-bún chả ở nhà hàng quán ăn ngon ko ngon
-```

-Whitespaces and punctuations are ignored during normal tokenization, but are kept during tokenization for transformation, which is used internally by Coc Coc search engine. To keep punctuations during normal tokenization, except those in segmented URLs, use -k. To run tokenization for transformation, use -t - notice that this will format result by replacing spaces in multi-syllable tokens with _ and _ with ~.

-```
-$ tokenizer "toisongohanoi, tôi đăng ký trên thegioididong.vn" -k
-toisongohanoi , tôi đăng ký trên the gioi di dong vn

-$ tokenizer "toisongohanoi, tôi đăng ký trên thegioididong.vn" -t
-toisongohanoi , tôi đăng_ký trên the_gioi di_dong vn
-```

-The usage of vn_lang_tool is pretty similar, you can see full list of options for both tools by using:

- -$ tokenizer --help -$ vn_lang_tool --help -

-## Using the library

-Use the code of both tools as an example of usage for a library, they are pretty straightforward and easy to understand:

- -utils/tokenizer.cpp # for tokenizer tool -utils/vn_lang_tool.cpp # for vn_lang_tool -

-Here's a short code snippet from there:

-```cpp
-// initialize tokenizer, exit in case of failure
-if (0 > Tokenizer::instance().initialize(opts.dict_path, !opts.no_sticky))
-{

  •   exit(EXIT_FAILURE);
    

-}

-// tokenize given text, two additional options are:
-// - bool for_transforming - this option is Cốc Cốc specific kept for backwards compatibility
-// - int tokenize_options - TOKENIZE_NORMAL, TOKENIZE_HOST or TOKENIZE_URL,
-// just use Tokenizer::TOKENIZE_NORMAL if unsure
-std::vector< FullToken > res = Tokenizer::instance().segment(text, false, opts.tokenize_option);

-for (FullToken t : res)
-{

  •   // do something with tokens
    

-}
-```

-Note that you can call segment() function of the same Tokenizer instance multiple times and in parallel from multiple threads.

-Here's a short explanation of fields in FullToken structure:

-```cpp
-struct Token {

  •   // position of the start of normalized token (in chars)
    
  •   int32_t normalized_start;
    
  •   // position of the end of normalized token (in chars)
    
  •   int32_t normalized_end;
    
  •   // position of the start of token in original text (in bytes)
    
  •   int32_t original_start;
    
  •   // position of the end of token in original text (in bytes)
    
  •   int32_t original_end;
    
  •   // token type (WORD, NUMBER, SPACE or PUNCT)
    
  •   int32_t type;
    
  •   // token segmentation type (this field is Cốc Cốc specific and kept for backwards compatibility)
    
  •   int32_t seg_type;
    

-}

-struct FullToken : Token {

  •   // normalized token text
    
  •   std::string text;
    

-}

-```

-## Using Java bindings

-A java interface is provided to be used in java projects. Internally it utilizes JNI and the Unsafe API to connect Java and C++. You can find an example of its usage in Tokenizer class's main function:

- -java/src/java/Tokenizer.java -

-To run this test class from source tree, use the following command:

- -$ LD_LIBRARY_PATH=build java -cp build/coccoc-tokenizer.jar com.coccoc.Tokenizer "một câu văn tiếng Việt" -

-Normally LD_LIBRARY_PATH should point to a directory with libcoccoc_tokenizer_jni.so binary. If you have already installed deb package or make install-ed everything into your system, LD_LIBRARY_PATH is not needed as the binary will be taken from your system (/usr/lib or similar).

-## Using Python bindings

-```python
-from CocCocTokenizer import PyTokenizer

-# load_nontone_data is True by default
-T = PyTokenizer(load_nontone_data=True)

-# tokenize_option:
-# 0: TOKENIZE_NORMAL (default)
-# 1: TOKENIZE_HOST
-# 2: TOKENIZE_URL
-print(T.word_tokenize("xin chào, tôi là người Việt Nam", tokenize_option=0))

-# output: ['xin', 'chào', ',', 'tôi', 'là', 'người', 'Việt_Nam']
-```

-## Other languages

-Bindings for other languages are not yet implemented but it will be nice if someone can help to write them.

-## Benchmark

-The library provides high speed tokenization which is a requirement for performance critical applications.

-The benchmark is done on a typical laptop with Intel Core i5-5200U processor:
-- Dataset: 1.203.165 Vietnamese Wikipedia articles (Link)
-- Output: 106.042.935 tokens out of 630.252.179 characters
-- Processing time: 41 seconds
-- Speed: 15M characters / second, or 2.5M tokens / second
-- RAM consumption is around 300Mb

-## Quality Comparison

-The tokenizer tool has a special output format which is similar to other existing tools for tokenization of Vietnamese texts - it preserves all the original text and just marks multi-syllable tokens with underscores instead of spaces. Compare:

-```
-$ tokenizer 'Lan hỏi: "điều kiện gì?".'
-lan hỏi điều kiện gì

-$ tokenizer -f original 'Lan hỏi: "điều kiện gì?".'
-Lan hỏi: "điều_kiện gì?".
-```

-Using the following testset for comparison with underthesea and RDRsegmenter, we get significantly lower result, but for most of the cases the observed differences are not important for search ranking quality. Below you can find few examples of such differences. Please, be aware of them when using this library.

-```
-original : Em út theo anh cả vào miền Nam.
-coccoc-tokenizer : Em_út theo anh_cả vào miền_Nam.
-underthesea : Em_út theo anh cả vào miền Nam.
-RDRsegmenter : Em_út theo anh_cả vào miền Nam.

-original : kết quả cuộc thi phóng sự - ký sự 2004 của báo Tuổi Trẻ.
-coccoc-tokenizer : kết_quả cuộc_thi phóng_sự - ký_sự 2004 của báo Tuổi_Trẻ.
-underthesea : kết_quả cuộc thi phóng_sự - ký_sự 2004 của báo Tuổi_Trẻ.
-RDRsegmenter : kết_quả cuộc thi phóng_sự - ký_sự 2004 của báo Tuổi_Trẻ.

-original : cô bé lớn lên dưới mái lều tranh rách nát, trong một gia đình có bốn thế hệ phải xách bị gậy đi ăn xin.
-coccoc-tokenizer : cô_bé lớn lên dưới mái lều tranh rách_nát, trong một gia_đình có bốn thế_hệ phải xách bị gậy đi ăn_xin.
-underthesea : cô bé lớn lên dưới mái lều tranh rách_nát, trong một gia_đình có bốn thế_hệ phải xách bị_gậy đi ăn_xin.
-RDRsegmenter : cô bé lớn lên dưới mái lều tranh rách_nát, trong một gia_đình có bốn thế_hệ phải xách bị_gậy đi ăn_xin.
-```

-We also don't apply any named entity recognition mechanisms within the tokenizer and have few rare cases where we fail to solve ambiguity correctly. We thus didn't want to provide exact quality comparison results as probably the goals and potential use cases of this library and of those similar ones mentioned above are different and thus precise comparison doesn't make much sense.

-## Future Plans

-We'd love to introduce bindings for Python and maybe other languages later and we'd be happy if somebody can help us doing that. We are also thinking about adding POS tagger and more complex linguistic features later.

-If you find any issues or have any suggestions regarding further upgrades, please, report them here or write us through github.
+# C++ tokenizer for Vietnamese^M
+^M
+This project provides tokenizer library for Vietnamese language and 2 command line tools for tokenization and some simple Vietnamese-specific operations with text (i.e. remove diacritics). It is used in Cốc Cốc Search and Ads systems and the main goal in its development was to reach high performance while keeping the quality reasonable for search ranking needs.^M
+^M
@bachan Can you tell me where I edit? Because I remember that I did not change it

from coccoc-tokenizer.

dinhan92 avatar dinhan92 commented on July 26, 2024

@bachan how do I revert file build_java.sh to its original?

from coccoc-tokenizer.

bachan avatar bachan commented on July 26, 2024

just remove everything, clone the repo again and try to follow the guidelines in README carefully.

from coccoc-tokenizer.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.