File tree 2 files changed +13
-0
lines changed
2 files changed +13
-0
lines changed Original file line number Diff line number Diff line change @@ -88,6 +88,8 @@ set(TOKENIZERS_CPP_CARGO_SOURCE_PATH ${TOKENIZERS_CPP_ROOT}/rust)
88
88
option (MSGPACK_USE_BOOST "Use Boost libraried" OFF )
89
89
add_subdirectory (msgpack)
90
90
91
+ option (MLC_ENABLE_SENTENCEPIECE_TOKENIZER "Enable SentencePiece tokenizer" OFF )
92
+
91
93
if (MSVC )
92
94
set (TOKENIZERS_RUST_LIB "${TOKENIZERS_CPP_CARGO_BINARY_DIR} /tokenizers_c.lib" )
93
95
else ()
@@ -120,6 +122,9 @@ add_library(tokenizer_cpp_objs OBJECT ${TOKENIZER_CPP_SRCS})
120
122
target_include_directories (tokenizer_cpp_objs PRIVATE sentencepiece/src)
121
123
target_include_directories (tokenizer_cpp_objs PRIVATE msgpack/include )
122
124
target_include_directories (tokenizer_cpp_objs PUBLIC ${TOKENIZERS_CPP_INCLUDE} )
125
+ if (MLC_ENABLE_SENTENCEPIECE_TOKENIZER STREQUAL "ON" )
126
+ target_compile_definitions (tokenizer_cpp_objs PUBLIC MLC_ENABLE_SENTENCEPIECE_TOKENIZER)
127
+ endif ()
123
128
target_link_libraries (tokenizer_cpp_objs PRIVATE msgpack-cxx)
124
129
125
130
# sentencepiece config
Original file line number Diff line number Diff line change 10
10
11
11
namespace tokenizers {
12
12
13
+ #ifdef MLC_ENABLE_SENTENCEPIECE_TOKENIZER
13
14
class SentencePieceTokenizer : public Tokenizer {
14
15
public:
15
16
explicit SentencePieceTokenizer (const std::string& model_blob) {
@@ -46,4 +47,11 @@ class SentencePieceTokenizer : public Tokenizer {
46
47
std::unique_ptr<Tokenizer> Tokenizer::FromBlobSentencePiece (const std::string& model_blob) {
47
48
return std::make_unique<SentencePieceTokenizer>(model_blob);
48
49
}
50
+ #else
51
+ std::unique_ptr<Tokenizer> Tokenizer::FromBlobSentencePiece (const std::string& model_blob) {
52
+ assert (false );
53
+ throw ;
54
+ }
55
+ #endif // MLC_ENABLE_SENTENCEPIECE_TOKENIZER
56
+
49
57
} // namespace tokenizers
You can’t perform that action at this time.
0 commit comments