Skip to content

Commit 75f6a37

Browse files
authored
Revert "PYTHON-5126 & PYTHON-5280 Addresses issues raised in DRIVERS-3097 and DRIVERS-3123 " (#2337)
1 parent aa41e70 commit 75f6a37

File tree

4 files changed

+20
-55
lines changed

4 files changed

+20
-55
lines changed

bson/binary.py

-15
Original file line numberDiff line numberDiff line change
@@ -462,10 +462,6 @@ def from_vector(
462462
raise ValueError(f"{padding=}. It must be in [0,1, ..7].")
463463
if padding and not vector:
464464
raise ValueError("Empty vector with non-zero padding.")
465-
if padding and not (vector[-1] & ((1 << padding) - 1)) == 0: # type: ignore
466-
raise ValueError(
467-
"If padding p is provided, all bits in the final byte lower than p must be 0."
468-
)
469465
elif dtype == BinaryVectorDtype.FLOAT32: # pack floats as float32
470466
format_str = "f"
471467
if padding:
@@ -494,11 +490,6 @@ def as_vector(self) -> BinaryVector:
494490
dtype = BinaryVectorDtype(dtype)
495491
n_values = len(self) - position
496492

497-
if padding and dtype != BinaryVectorDtype.PACKED_BIT:
498-
raise ValueError(
499-
f"Corrupt data. Padding ({padding}) must be 0 for all but PACKED_BIT dtypes. ({dtype=})"
500-
)
501-
502493
if dtype == BinaryVectorDtype.INT8:
503494
dtype_format = "b"
504495
format_string = f"<{n_values}{dtype_format}"
@@ -522,12 +513,6 @@ def as_vector(self) -> BinaryVector:
522513
dtype_format = "B"
523514
format_string = f"<{n_values}{dtype_format}"
524515
unpacked_uint8s = list(struct.unpack_from(format_string, self, position))
525-
if padding and not n_values:
526-
raise ValueError("Corrupt data. Vector has a padding P, but no data.")
527-
if padding and n_values and not (unpacked_uint8s[-1] & ((1 << padding) - 1)) == 0:
528-
raise ValueError(
529-
"Corrupt data. Vector has a padding P, but bits in the final byte lower than P are non-zero."
530-
)
531516
return BinaryVector(unpacked_uint8s, dtype, padding)
532517

533518
else:

test/bson_binary_vector/packed_bit.json

+6-15
Original file line numberDiff line numberDiff line change
@@ -21,32 +21,23 @@
2121
"canonical_bson": "1600000005766563746F7200040000000910007F0700"
2222
},
2323
{
24-
"description": "PACKED_BIT with padding",
24+
"description": "Empty Vector PACKED_BIT",
2525
"valid": true,
26-
"vector": [127, 8],
26+
"vector": [],
2727
"dtype_hex": "0x10",
2828
"dtype_alias": "PACKED_BIT",
29-
"padding": 3,
30-
"canonical_bson": "1600000005766563746F7200040000000910037F0800"
29+
"padding": 0,
30+
"canonical_bson": "1400000005766563746F72000200000009100000"
3131
},
3232
{
33-
"description": "PACKED_BIT with inconsistent padding",
34-
"valid": false,
33+
"description": "PACKED_BIT with padding",
34+
"valid": true,
3535
"vector": [127, 7],
3636
"dtype_hex": "0x10",
3737
"dtype_alias": "PACKED_BIT",
3838
"padding": 3,
3939
"canonical_bson": "1600000005766563746F7200040000000910037F0700"
4040
},
41-
{
42-
"description": "Empty Vector PACKED_BIT",
43-
"valid": true,
44-
"vector": [],
45-
"dtype_hex": "0x10",
46-
"dtype_alias": "PACKED_BIT",
47-
"padding": 0,
48-
"canonical_bson": "1400000005766563746F72000200000009100000"
49-
},
5041
{
5142
"description": "Overflow Vector PACKED_BIT",
5243
"valid": false,

test/test_bson.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ def test_vector(self):
739739
"""Tests of subtype 9"""
740740
# We start with valid cases, across the 3 dtypes implemented.
741741
# Work with a simple vector that can be interpreted as int8, float32, or ubyte
742-
list_vector = [127, 8]
742+
list_vector = [127, 7]
743743
# As INT8, vector has length 2
744744
binary_vector = Binary.from_vector(list_vector, BinaryVectorDtype.INT8)
745745
vector = binary_vector.as_vector()
@@ -764,18 +764,18 @@ def test_vector(self):
764764
uncompressed = ""
765765
for val in list_vector:
766766
uncompressed += format(val, "08b")
767-
assert uncompressed[:-padding] == "0111111100001"
767+
assert uncompressed[:-padding] == "0111111100000"
768768

769769
# It is worthwhile explicitly showing the values encoded to BSON
770770
padded_doc = {"padded_vec": padded_vec}
771771
assert (
772772
encode(padded_doc)
773-
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x08\x00"
773+
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x07\x00"
774774
)
775775
# and dumped to json
776776
assert (
777777
json_util.dumps(padded_doc)
778-
== '{"padded_vec": {"$binary": {"base64": "EAN/CA==", "subType": "09"}}}'
778+
== '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}'
779779
)
780780

781781
# FLOAT32 is also implemented

test/test_bson_binary_vector.py

+10-21
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ def create_test(case_spec):
4848
def run_test(self):
4949
for test_case in case_spec.get("tests", []):
5050
description = test_case["description"]
51-
vector_exp = test_case.get("vector", None)
51+
vector_exp = test_case.get("vector", [])
5252
dtype_hex_exp = test_case["dtype_hex"]
5353
dtype_alias_exp = test_case.get("dtype_alias")
5454
padding_exp = test_case.get("padding", 0)
55-
canonical_bson_exp = test_case.get("canonical_bson", None)
55+
canonical_bson_exp = test_case.get("canonical_bson")
5656
# Convert dtype hex string into bytes
5757
dtype_exp = BinaryVectorDtype(int(dtype_hex_exp, 16).to_bytes(1, byteorder="little"))
5858

@@ -85,25 +85,14 @@ def run_test(self):
8585
self.assertEqual(cB_obs, canonical_bson_exp, description)
8686

8787
else:
88-
"""
89-
#### To prove correct in an invalid case (`valid:false`), one MUST
90-
- if the vector field is present, raise an exception when attempting to encode a document from the numeric values,
91-
dtype, and padding.
92-
- if the canonical_bson field is present, raise an exception when attempting to deserialize it into the corresponding
93-
numeric values, as the field contains corrupted data.
94-
"""
95-
# Tests Binary.from_vector()
96-
if vector_exp is not None:
97-
with self.assertRaises((struct.error, ValueError), msg=description):
98-
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
99-
100-
# Tests Binary.as_vector()
101-
if canonical_bson_exp is not None:
102-
with self.assertRaises((struct.error, ValueError), msg=description):
103-
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
104-
decoded_doc = decode(cB_exp)
105-
binary_obs = decoded_doc[test_key]
106-
binary_obs.as_vector()
88+
with self.assertRaises((struct.error, ValueError), msg=description):
89+
# Tests Binary.from_vector
90+
Binary.from_vector(vector_exp, dtype_exp, padding_exp)
91+
# Tests Binary.as_vector
92+
cB_exp = binascii.unhexlify(canonical_bson_exp.encode("utf8"))
93+
decoded_doc = decode(cB_exp)
94+
binary_obs = decoded_doc[test_key]
95+
binary_obs.as_vector()
10796

10897
return run_test
10998

0 commit comments

Comments
 (0)