File tree 3 files changed +19
-2
lines changed
3 files changed +19
-2
lines changed Original file line number Diff line number Diff line change @@ -15,7 +15,11 @@ namespace tokenizers {
15
15
*/
16
16
class HFTokenizer : public Tokenizer {
17
17
public:
18
- explicit HFTokenizer (TokenizerHandle handle) : handle_(handle) {}
18
+ explicit HFTokenizer (TokenizerHandle handle) : handle_(handle) {
19
+ #ifdef COMPILE_WASM_RUNTIME
20
+ setenv (" TOKENIZERS_PARALLELISM" , " false" , true );
21
+ #endif
22
+ }
19
23
20
24
HFTokenizer (const HFTokenizer&) = delete ;
21
25
HFTokenizer (HFTokenizer&& other) { std::swap (other.handle_ , handle_); }
Original file line number Diff line number Diff line change @@ -5,7 +5,7 @@ rustup target add wasm32-unknown-emscripten
5
5
6
6
mkdir -p build
7
7
cd build
8
- emcmake cmake ../.. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS=" -O3"
8
+ emcmake cmake ../.. -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS=" -O3 -DCOMPILE_WASM_RUNTIME "
9
9
emmake make tokenizers_cpp tokenizers_c sentencepiece-static -j8
10
10
cd ..
11
11
Original file line number Diff line number Diff line change @@ -48,8 +48,21 @@ async function testLlamaTokenizer() {
48
48
}
49
49
}
50
50
51
+ // Without COMPILE_WASM_RUNTIME, this triggers parallel processing, leading to error
52
+ async function testBertTokenizer ( ) {
53
+ console . log ( "Bert Tokenizer" ) ;
54
+ const modelBuffer = await ( await
55
+ fetch ( "https://huggingface.co/Snowflake/snowflake-arctic-embed-l/raw/main/tokenizer.json" )
56
+ ) . arrayBuffer ( ) ;
57
+ const tok = await Tokenizer . fromJSON ( modelBuffer ) ;
58
+ const text = "What is the capital of Canada?" ;
59
+ const ids = tok . encode ( text ) ;
60
+ console . log ( ids ) ;
61
+ }
62
+
51
63
async function main ( ) {
52
64
await testJSONTokenizer ( )
65
+ await testBertTokenizer ( ) ;
53
66
await testLlamaTokenizer ( )
54
67
}
55
68
You can’t perform that action at this time.
0 commit comments