From 4f49f0185b0bbc527cf1be9395ff7e6ea99bb015 Mon Sep 17 00:00:00 2001 From: Anay Moitra Date: Fri, 25 Apr 2025 20:57:16 -0400 Subject: [PATCH 1/2] BUG: Raise ValueError for non-string columns in read_json orient='table' (GH19129) --- pandas/io/json/_table_schema.py | 5 +++++ .../tests/io/json/test_json_table_schema.py | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 7879be18b52c9..b5ee32220d7b7 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -372,6 +372,11 @@ def parse_table_schema(json, precise_float: bool) -> DataFrame: pandas.read_json """ table = ujson_loads(json, precise_float=precise_float) + fields = table["schema"]["fields"] + + if any(not isinstance(field["name"], str) for field in fields): + raise ValueError("All column names must be strings when using orient='table'.") + col_order = [field["name"] for field in table["schema"]["fields"]] df = DataFrame(table["data"], columns=col_order)[col_order] diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 7936982e4a055..d3d6b4fc4e30e 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -881,3 +881,22 @@ def test_read_json_table_orient_period_depr_freq(self, freq): out = StringIO(df.to_json(orient="table")) result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) + + def test_read_json_table_non_string_column_names(self) -> None: + bad_json = json.dumps({ + "schema": { + "fields": [ + {"name": 0, "type": "integer"}, + {"name": 1, "type": "string"} + ], + "primaryKey": [], + "pandas_version": "1.0.0" + }, + "data": [ + [1, "a"], + [2, "b"] + ] + }) + + with pytest.raises(ValueError, match="All column names must be strings when using orient='table'"): + pd.read_json(StringIO(bad_json), orient="table") From 1bba16b9014b428d76dd819a468e64b20557560c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 26 Apr 2025 01:56:58 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../tests/io/json/test_json_table_schema.py | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index d3d6b4fc4e30e..dba615f532d70 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -883,20 +883,22 @@ def test_read_json_table_orient_period_depr_freq(self, freq): tm.assert_frame_equal(df, result) def test_read_json_table_non_string_column_names(self) -> None: - bad_json = json.dumps({ - "schema": { - "fields": [ - {"name": 0, "type": "integer"}, - {"name": 1, "type": "string"} - ], - "primaryKey": [], - "pandas_version": "1.0.0" - }, - "data": [ - [1, "a"], - [2, "b"] - ] - }) + bad_json = json.dumps( + { + "schema": { + "fields": [ + {"name": 0, "type": "integer"}, + {"name": 1, "type": "string"}, + ], + "primaryKey": [], + "pandas_version": "1.0.0", + }, + "data": [[1, "a"], [2, "b"]], + } + ) - with pytest.raises(ValueError, match="All column names must be strings when using orient='table'"): + with pytest.raises( + ValueError, + match="All column names must be strings when using orient='table'", + ): pd.read_json(StringIO(bad_json), orient="table")