Skip to content

Commit 0605670

Browse files
committed
feat: Added useRawBinaryStrings option to Decoder to allow override of default UTF-8 behaviour
1 parent 1fc7622 commit 0605670

File tree

4 files changed

+95
-23
lines changed

4 files changed

+95
-23
lines changed

.vscode/settings.json

+7-7
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
"typescript.tsdk": "node_modules/typescript/lib",
33
"files.eol": "\n",
44
"editor.tabSize": 2,
5+
"editor.defaultFormatter": "esbenp.prettier-vscode",
6+
"editor.formatOnSave": false,
57
"editor.codeActionsOnSave": {
6-
"source.fixAll.eslint": true
8+
"source.fixAll.eslint": true,
9+
"source.fixAll": "always"
710
},
8-
"cSpell.words": [
9-
"instanceof",
10-
"tsdoc",
11-
"typeof",
12-
"whatwg"
13-
]
11+
"cSpell.words": ["instanceof", "tsdoc", "typeof", "whatwg"],
12+
"mochaExplorer.files": "test/**/*.test.{ts,js}",
13+
"mochaExplorer.require": ["ts-node/register", "tsconfig-paths/register"]
1414
}

README.md

+16-12
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,16 @@ Name|Type|Default
114114
extensionCodec | ExtensionCodec | `ExtensionCodec.defaultCodec`
115115
context | user-defined | -
116116
useBigInt64 | boolean | false
117+
useRawBinaryStrings | boolean | false
117118
maxDepth | number | `100`
118119
initialBufferSize | number | `2048`
119120
sortKeys | boolean | false
120121
forceFloat32 | boolean | false
121122
forceIntegerToFloat | boolean | false
122123
ignoreUndefined | boolean | false
123124

125+
To skip UTF-8 decoding of strings, `useRawBinaryStrings` can be set to `true`. In this case, strings are decoded into `Uint8Array`.
126+
124127
### `decode(buffer: ArrayLike<number> | BufferSource, options?: DecoderOptions): unknown`
125128

126129
It decodes `buffer` that includes a MessagePack-encoded object, and returns the decoded object typed `unknown`.
@@ -498,18 +501,19 @@ null, undefined|nil|null (*1)
498501
boolean (true, false)|bool family|boolean (true, false)
499502
number (53-bit int)|int family|number
500503
number (64-bit float)|float family|number
501-
string|str family|string
502-
ArrayBufferView |bin family|Uint8Array (*2)
504+
string|str family|string (*2)
505+
ArrayBufferView |bin family|Uint8Array (*3)
503506
Array|array family|Array
504-
Object|map family|Object (*3)
505-
Date|timestamp ext family|Date (*4)
506-
bigint|N/A|N/A (*5)
507+
Object|map family|Object (*4)
508+
Date|timestamp ext family|Date (*5)
509+
bigint|N/A|N/A (*6)
507510

508-
* *1 Both `null` and `undefined` are mapped to `nil` (`0xC0`) type, and are decoded into `null`
509-
* *2 Any `ArrayBufferView`s including NodeJS's `Buffer` are mapped to `bin` family, and are decoded into `Uint8Array`
510-
* *3 In handling `Object`, it is regarded as `Record<string, unknown>` in terms of TypeScript
511-
* *4 MessagePack timestamps may have nanoseconds, which will lost when it is decoded into JavaScript `Date`. This behavior can be overridden by registering `-1` for the extension codec.
512-
* *5 bigint is not supported in `useBigInt64: false` mode, but you can define an extension codec for it.
511+
* *1 Both `null` and `undefined` are mapped to `nil` (`0xC0`) type, and are decoded into `null`.
512+
* *2 If you'd like to skip UTF-8 decoding of strings, set `useRawBinaryStrings: true`. In this case, strings are decoded into `Uint8Array`.
513+
* *3 Any `ArrayBufferView`s including NodeJS's `Buffer` are mapped to `bin` family, and are decoded into `Uint8Array`.
514+
* *4 In handling `Object`, it is regarded as `Record<string, unknown>` in terms of TypeScript.
515+
* *5 MessagePack timestamps may have nanoseconds, which will lost when it is decoded into JavaScript `Date`. This behavior can be overridden by registering `-1` for the extension codec.
516+
* *6 bigint is not supported in `useBigInt64: false` mode, but you can define an extension codec for it.
513517

514518
If you set `useBigInt64: true`, the following mapping is used:
515519

@@ -519,15 +523,15 @@ null, undefined|nil|null
519523
boolean (true, false)|bool family|boolean (true, false)
520524
**number (32-bit int)**|int family|number
521525
**number (except for the above)**|float family|number
522-
**bigint**|int64 / uint64|bigint (*6)
526+
**bigint**|int64 / uint64|bigint (*7)
523527
string|str family|string
524528
ArrayBufferView |bin family|Uint8Array
525529
Array|array family|Array
526530
Object|map family|Object
527531
Date|timestamp ext family|Date
528532

529533

530-
* *6 If the bigint is larger than the max value of uint64 or smaller than the min value of int64, then the behavior is undefined.
534+
* *7 If the bigint is larger than the max value of uint64 or smaller than the min value of int64, then the behavior is undefined.
531535

532536
## Prerequisites
533537

src/Decoder.ts

+23-4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,16 @@ export type DecoderOptions<ContextType = undefined> = Readonly<
2020
*/
2121
useBigInt64: boolean;
2222

23+
/**
24+
* By default, string values will be decoded as UTF-8 strings. However, if this option is true,
25+
* string values will be returned as Uint8Arrays without additional decoding.
26+
*
27+
* This is useful if the strings may contain invalid UTF-8 sequences.
28+
*
29+
* Note that this option only applies to string values, not map keys. Additionally, when
30+
* enabled, raw string length is limited by the maxBinLength option.
31+
*/
32+
useRawBinaryStrings: boolean;
2333
/**
2434
* Maximum string length.
2535
*
@@ -195,6 +205,7 @@ export class Decoder<ContextType = undefined> {
195205
private readonly extensionCodec: ExtensionCodecType<ContextType>;
196206
private readonly context: ContextType;
197207
private readonly useBigInt64: boolean;
208+
private readonly useRawBinaryStrings: boolean;
198209
private readonly maxStrLength: number;
199210
private readonly maxBinLength: number;
200211
private readonly maxArrayLength: number;
@@ -215,6 +226,7 @@ export class Decoder<ContextType = undefined> {
215226
this.context = (options as { context: ContextType } | undefined)?.context as ContextType; // needs a type assertion because EncoderOptions has no context property when ContextType is undefined
216227

217228
this.useBigInt64 = options?.useBigInt64 ?? false;
229+
this.useRawBinaryStrings = options?.useRawBinaryStrings ?? false;
218230
this.maxStrLength = options?.maxStrLength ?? UINT32_MAX;
219231
this.maxBinLength = options?.maxBinLength ?? UINT32_MAX;
220232
this.maxArrayLength = options?.maxArrayLength ?? UINT32_MAX;
@@ -399,7 +411,7 @@ export class Decoder<ContextType = undefined> {
399411
} else {
400412
// fixstr (101x xxxx) 0xa0 - 0xbf
401413
const byteLength = headByte - 0xa0;
402-
object = this.decodeUtf8String(byteLength, 0);
414+
object = this.decodeString(byteLength, 0);
403415
}
404416
} else if (headByte === 0xc0) {
405417
// nil
@@ -451,15 +463,15 @@ export class Decoder<ContextType = undefined> {
451463
} else if (headByte === 0xd9) {
452464
// str 8
453465
const byteLength = this.lookU8();
454-
object = this.decodeUtf8String(byteLength, 1);
466+
object = this.decodeString(byteLength, 1);
455467
} else if (headByte === 0xda) {
456468
// str 16
457469
const byteLength = this.lookU16();
458-
object = this.decodeUtf8String(byteLength, 2);
470+
object = this.decodeString(byteLength, 2);
459471
} else if (headByte === 0xdb) {
460472
// str 32
461473
const byteLength = this.lookU32();
462-
object = this.decodeUtf8String(byteLength, 4);
474+
object = this.decodeString(byteLength, 4);
463475
} else if (headByte === 0xdc) {
464476
// array 16
465477
const size = this.readU16();
@@ -637,6 +649,13 @@ export class Decoder<ContextType = undefined> {
637649
this.stack.pushArrayState(size);
638650
}
639651

652+
private decodeString(byteLength: number, headerOffset: number): string | Uint8Array {
653+
if (!this.useRawBinaryStrings || this.stateIsMapKey()) {
654+
return this.decodeUtf8String(byteLength, headerOffset);
655+
}
656+
return this.decodeBinary(byteLength, headerOffset);
657+
}
658+
640659
private decodeUtf8String(byteLength: number, headerOffset: number): string {
641660
if (byteLength > this.maxStrLength) {
642661
throw new DecodeError(

test/decode-raw-strings.test.ts

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import assert from "assert";
2+
import { encode, decode } from "../src";
3+
import type { DecoderOptions } from "../src";
4+
5+
describe("decode with useRawBinaryStrings specified", () => {
6+
const options = { useRawBinaryStrings: true } satisfies DecoderOptions;
7+
8+
it("decodes string as binary", () => {
9+
const actual = decode(encode("foo"), options);
10+
const expected = Uint8Array.from([0x66, 0x6f, 0x6f]);
11+
assert.deepStrictEqual(actual, expected);
12+
});
13+
14+
it("decodes invalid UTF-8 string as binary", () => {
15+
const invalidUtf8String = Uint8Array.from([
16+
61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50, 176,
17+
184, 221, 66, 188, 171, 36, 135, 121,
18+
]);
19+
const encoded = Uint8Array.from([
20+
196, 32, 61, 180, 118, 220, 39, 166, 43, 68, 219, 116, 105, 84, 121, 46, 122, 136, 233, 221, 15, 174, 247, 19, 50,
21+
176, 184, 221, 66, 188, 171, 36, 135, 121,
22+
]);
23+
24+
const actual = decode(encoded, options);
25+
assert.deepStrictEqual(actual, invalidUtf8String);
26+
});
27+
28+
it("decodes object keys as strings", () => {
29+
const actual = decode(encode({ key: "foo" }), options);
30+
const expected = { key: Uint8Array.from([0x66, 0x6f, 0x6f]) };
31+
assert.deepStrictEqual(actual, expected);
32+
});
33+
34+
it("ignores maxStrLength", () => {
35+
const lengthLimitedOptions = { ...options, maxStrLength: 1 } satisfies DecoderOptions;
36+
37+
const actual = decode(encode("foo"), lengthLimitedOptions);
38+
const expected = Uint8Array.from([0x66, 0x6f, 0x6f]);
39+
assert.deepStrictEqual(actual, expected);
40+
});
41+
42+
it("respects maxBinLength", () => {
43+
const lengthLimitedOptions = { ...options, maxBinLength: 1 } satisfies DecoderOptions;
44+
45+
assert.throws(() => {
46+
decode(encode("foo"), lengthLimitedOptions);
47+
}, /max length exceeded/i);
48+
});
49+
});

0 commit comments

Comments
 (0)