Skip to content

Commit 6600a96

Browse files
authored
Add ability to encode raw binary strings (#6)
1 parent c72fa85 commit 6600a96

8 files changed

+339
-149
lines changed

README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ NodeJS `Buffer` is also acceptable because it is a subclass of `Uint8Array`.
162162
| intMode | IntMode | `IntMode.AS_ENCODED` if `useBigInt64` is `true` or `IntMode.UNSAFE_NUMBER` otherwise |
163163
| rawBinaryStringKeys | boolean | false |
164164
| rawBinaryStringValues | boolean | false |
165+
| useRawBinaryStringClass | boolean | false |
165166
| useMap | boolean | false |
166167
| supportObjectNumberKeys | boolean | false |
167168
| maxStrLength | number | `4_294_967_295` (UINT32_MAX) |
@@ -174,7 +175,7 @@ You can use `max${Type}Length` to limit the length of each type decoded.
174175

175176
`intMode` determines whether decoded integers should be returned as numbers or bigints in different circumstances. The possible values are [described below](#intmode).
176177

177-
To skip UTF-8 decoding of strings, one or both of `rawBinaryStringKeys` and `rawBinaryStringValues` can be set to `true`. If enabled, strings are decoded into `Uint8Array`. `rawBinaryStringKeys` affects only map keys, while `rawBinaryStringValues` affect all other string values.
178+
To skip UTF-8 decoding of strings, one or both of `rawBinaryStringKeys` and `rawBinaryStringValues` can be set to `true`. If enabled, strings are decoded into `Uint8Array`, or a `RawBinaryString` which wraps a `Uint8Array` if `useRawBinaryStringClass` is true. `rawBinaryStringKeys` affects only map keys, while `rawBinaryStringValues` affect all other string values. You may want to enable `useRawBinaryStringClass` if you want to distinguish between regular strings and binary strings, or if you wish to re-encode the object, since `RawBinaryString` instances will be encoded as regular strings.
178179

179180
If `useMap` is enabled, maps are decoded into the `Map` container instead of plain objects. `Map` objects support a wider range of key types. Plain objects only support string keys (though you can enable `supportObjectNumberKeys` to coerce number keys to strings), while `Map` objects support strings, numbers, bigints, and Uint8Arrays.
180181

@@ -549,7 +550,7 @@ The mapping of integers varies on the setting of `intMode`.
549550

550551
- \*1 Both `null` and `undefined` are mapped to `nil` (`0xC0`) type, and are decoded into `null`
551552
- \*2 MessagePack ints are decoded as either numbers or bigints depending on the [IntMode](#intmode) used during decoding.
552-
- \*3 If you'd like to skip UTF-8 decoding of strings, enable one of `rawBinaryStringKeys` or `rawBinaryStringValues`. In that case, strings are decoded into `Uint8Array`.
553+
- \*3 If you'd like to skip UTF-8 decoding of strings, enable one of `rawBinaryStringKeys` or `rawBinaryStringValues`. In that case, strings are decoded into a `Uint8Array` or a `RawBinaryString`, depending on the value of `useRawBinaryStringClass`.
553554
- \*4 Any `ArrayBufferView`s including NodeJS's `Buffer` are mapped to `bin` family, and are decoded into `Uint8Array`
554555
- \*5 In handling `Object`, it is regarded as `Record<string, unknown>` in terms of TypeScript
555556
- \*6 MessagePack timestamps may have nanoseconds, which will lost when it is decoded into JavaScript `Date`. This behavior can be overridden by registering `-1` for the extension codec.

src/Decoder.ts

+27-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { prettyByte } from "./utils/prettyByte";
22
import { ExtensionCodec, ExtensionCodecType } from "./ExtensionCodec";
33
import { IntMode, getInt64, getUint64, convertSafeIntegerToMode, UINT32_MAX } from "./utils/int";
44
import { utf8Decode } from "./utils/utf8";
5-
import { createDataView, ensureUint8Array } from "./utils/typedArrays";
5+
import { createDataView, ensureUint8Array, RawBinaryString } from "./utils/typedArrays";
66
import { CachedKeyDecoder, KeyDecoder } from "./CachedKeyDecoder";
77
import { DecodeError } from "./DecodeError";
88
import type { ContextOf } from "./context";
@@ -53,6 +53,17 @@ export type DecoderOptions<ContextType = undefined> = Readonly<
5353
*/
5454
rawBinaryStringKeys: boolean;
5555

56+
/**
57+
* If true, the decoder will use the RawBinaryString class to store raw binary strings created
58+
* during decoding from the rawBinaryStringValues and rawBinaryStringKeys options. If false, it
59+
* will use Uint8Arrays.
60+
*
61+
* Defaults to false.
62+
*
63+
* Has no effect if rawBinaryStringValues and rawBinaryStringKeys are both false.
64+
*/
65+
useRawBinaryStringClass: boolean;
66+
5667
/**
5768
* If true, the decoder will use the Map object to store map values. If false, it will use plain
5869
* objects. Defaults to false.
@@ -126,7 +137,13 @@ type MapKeyType = string | number | bigint | Uint8Array;
126137

127138
function isValidMapKeyType(key: unknown, useMap: boolean, supportObjectNumberKeys: boolean): key is MapKeyType {
128139
if (useMap) {
129-
return typeof key === "string" || typeof key === "number" || typeof key === "bigint" || key instanceof Uint8Array;
140+
return (
141+
typeof key === "string" ||
142+
typeof key === "number" ||
143+
typeof key === "bigint" ||
144+
key instanceof Uint8Array ||
145+
key instanceof RawBinaryString
146+
);
130147
}
131148
// Plain objects support a more limited set of key types
132149
return typeof key === "string" || (supportObjectNumberKeys && typeof key === "number");
@@ -261,6 +278,7 @@ export class Decoder<ContextType = undefined> {
261278
private readonly intMode: IntMode;
262279
private readonly rawBinaryStringValues: boolean;
263280
private readonly rawBinaryStringKeys: boolean;
281+
private readonly useRawBinaryStringClass: boolean;
264282
private readonly useMap: boolean;
265283
private readonly supportObjectNumberKeys: boolean;
266284
private readonly maxStrLength: number;
@@ -285,6 +303,7 @@ export class Decoder<ContextType = undefined> {
285303
this.intMode = options?.intMode ?? (options?.useBigInt64 ? IntMode.AS_ENCODED : IntMode.UNSAFE_NUMBER);
286304
this.rawBinaryStringValues = options?.rawBinaryStringValues ?? false;
287305
this.rawBinaryStringKeys = options?.rawBinaryStringKeys ?? false;
306+
this.useRawBinaryStringClass = options?.useRawBinaryStringClass ?? false;
288307
this.useMap = options?.useMap ?? false;
289308
this.supportObjectNumberKeys = options?.supportObjectNumberKeys ?? false;
290309
this.maxStrLength = options?.maxStrLength ?? UINT32_MAX;
@@ -716,9 +735,13 @@ export class Decoder<ContextType = undefined> {
716735
this.stack.pushArrayState(size);
717736
}
718737

719-
private decodeString(byteLength: number, headerOffset: number): string | Uint8Array {
738+
private decodeString(byteLength: number, headerOffset: number): string | Uint8Array | RawBinaryString {
720739
if (this.stateIsMapKey() ? this.rawBinaryStringKeys : this.rawBinaryStringValues) {
721-
return this.decodeBinary(byteLength, headerOffset);
740+
const decoded = this.decodeBinary(byteLength, headerOffset);
741+
if (this.useRawBinaryStringClass) {
742+
return new RawBinaryString(decoded);
743+
}
744+
return decoded;
722745
}
723746
return this.decodeUtf8String(byteLength, headerOffset);
724747
}

src/Encoder.ts

+20-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { utf8Count, utf8Encode } from "./utils/utf8";
22
import { ExtensionCodec, ExtensionCodecType } from "./ExtensionCodec";
33
import { setInt64, setUint64 } from "./utils/int";
4-
import { ensureUint8Array, compareUint8Arrays } from "./utils/typedArrays";
4+
import { ensureUint8Array, compareUint8Arrays, RawBinaryString } from "./utils/typedArrays";
55
import type { ExtData } from "./ExtData";
66
import type { ContextOf } from "./context";
77

@@ -326,6 +326,8 @@ export class Encoder<ContextType = undefined> {
326326
this.encodeArray(object, depth);
327327
} else if (ArrayBuffer.isView(object)) {
328328
this.encodeBinary(object);
329+
} else if (object instanceof RawBinaryString) {
330+
this.encodeBinaryAsString(object);
329331
} else if (typeof object === "bigint") {
330332
// this is here instead of in doEncode so that we can try encoding with an extension first,
331333
// otherwise we would break existing extensions for bigints
@@ -361,6 +363,13 @@ export class Encoder<ContextType = undefined> {
361363
this.writeU8a(bytes);
362364
}
363365

366+
private encodeBinaryAsString(binaryString: RawBinaryString) {
367+
const object = binaryString.rawBinaryValue;
368+
this.writeStringHeader(object.byteLength);
369+
const bytes = ensureUint8Array(object);
370+
this.writeU8a(bytes);
371+
}
372+
364373
private encodeArray(object: Array<unknown>, depth: number) {
365374
const size = object.length;
366375
if (size < 16) {
@@ -397,6 +406,7 @@ export class Encoder<ContextType = undefined> {
397406
private sortMapKeys(keys: Array<unknown>): Array<unknown> {
398407
const numericKeys: Array<number | bigint> = [];
399408
const stringKeys: Array<string> = [];
409+
const rawStringKeys: Array<RawBinaryString> = [];
400410
const binaryKeys: Array<Uint8Array> = [];
401411
for (const key of keys) {
402412
if (typeof key === "number") {
@@ -410,15 +420,20 @@ export class Encoder<ContextType = undefined> {
410420
stringKeys.push(key);
411421
} else if (ArrayBuffer.isView(key)) {
412422
binaryKeys.push(ensureUint8Array(key));
423+
} else if (key instanceof RawBinaryString) {
424+
rawStringKeys.push(key);
413425
} else {
414426
throw new Error(`Unsupported map key type: ${Object.prototype.toString.apply(key)}`);
415427
}
416428
}
417429
numericKeys.sort((a, b) => (a < b ? -1 : a > b ? 1 : 0)); // Avoid using === to compare numbers and bigints
418430
stringKeys.sort();
431+
rawStringKeys.sort((a, b) =>
432+
compareUint8Arrays(ensureUint8Array(a.rawBinaryValue), ensureUint8Array(b.rawBinaryValue)),
433+
);
419434
binaryKeys.sort(compareUint8Arrays);
420-
// At the moment this arbitrarily orders the keys as numeric, string, binary
421-
return ([] as Array<unknown>).concat(numericKeys, stringKeys, binaryKeys);
435+
// At the moment this arbitrarily orders the keys as numeric, string, raw string, binary
436+
return ([] as Array<unknown>).concat(numericKeys, stringKeys, rawStringKeys, binaryKeys);
422437
}
423438

424439
private encodeMapObject(object: Record<string, unknown>, depth: number) {
@@ -464,6 +479,8 @@ export class Encoder<ContextType = undefined> {
464479
this.encodeBigInt(key);
465480
} else if (ArrayBuffer.isView(key)) {
466481
this.encodeBinary(key);
482+
} else if (key instanceof RawBinaryString) {
483+
this.encodeBinaryAsString(key);
467484
} else {
468485
throw new Error(`Unsupported map key type: ${Object.prototype.toString.apply(key)}`);
469486
}

src/index.ts

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ import { Encoder } from "./Encoder";
2626
export { Encoder };
2727
import type { EncoderOptions } from "./Encoder";
2828
export type { EncoderOptions };
29+
import { RawBinaryString } from "./utils/typedArrays";
30+
export { RawBinaryString };
2931

3032
// Utilities for Extension Types:
3133

src/utils/typedArrays.ts

+19
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,22 @@ export function compareUint8Arrays(a: Uint8Array, b: Uint8Array): number {
3030
}
3131
return a.length - b.length;
3232
}
33+
34+
/**
35+
* Represents a binary value that should be encoded as if it were a string.
36+
*
37+
* Effectively, this is a string that has already been UTF-8 encoded to a binary string. This is
38+
* useful if you need to encode a value as a string, but that value contains invalid UTF-8 sequences;
39+
* ideally this situation should be avoided and the value should be encoded as binary, not string,
40+
* but this may be necessary for compatibility with non-ideal systems.
41+
*/
42+
export class RawBinaryString {
43+
/**
44+
* Create a new RawBinaryString from an ArrayBufferView.
45+
*/
46+
public constructor(public readonly rawBinaryValue: ArrayBufferView) {
47+
if (!ArrayBuffer.isView(rawBinaryValue)) {
48+
throw new TypeError("RawBinaryString: rawBinaryValue must be an ArrayBufferView");
49+
}
50+
}
51+
}

test/decode-raw-strings.test.ts

-132
This file was deleted.

0 commit comments

Comments
 (0)