pandas-dev · albertvillanova · Feb 28, 2019 · Feb 28, 2019 · Feb 28, 2019 · Mar 2, 2019
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
@@ -162,24 +162,24 @@ def _write(self, obj, orient, double_precision, ensure_ascii,
 
 
 class JSONTableWriter(FrameWriter):
-    _default_orient = 'records'
+    _default_orient = 'values'
 
     def __init__(self, obj, orient, date_format, double_precision,
                  ensure_ascii, date_unit, index, default_handler=None):
         """
         Adds a `schema` attribute with the Table Schema, resets
         the index (can't do in caller, because the schema inference needs
-        to know what the index is, forces orient to records, and forces
+        to know what the index is, forces orient to values, and forces
         date_format to 'iso'.
         """
         super(JSONTableWriter, self).__init__(
             obj, orient, date_format, double_precision, ensure_ascii,
             date_unit, index, default_handler=default_handler)
 
         if date_format != 'iso':
-            msg = ("Trying to write with `orient='table'` and "
-                   "`date_format='{fmt}'`. Table Schema requires dates "
-                   "to be formatted with `date_format='iso'`"
+            msg = ("Trying to write with orient='table' and "
+                   "date_format='{fmt}'. Table Schema requires dates "
+                   "to be formatted with date_format='iso'"
                    .format(fmt=date_format))
             raise ValueError(msg)
 
@@ -211,7 +211,7 @@ def __init__(self, obj, orient, date_format, double_precision,
         else:
             self.obj = obj.reset_index(drop=False)
         self.date_format = 'iso'
-        self.orient = 'records'
+        self.orient = 'values'
         self.index = index
 
     def _write(self, obj, orient, double_precision, ensure_ascii,
@@ -221,7 +221,12 @@ def _write(self, obj, orient, double_precision, ensure_ascii,
                                                    ensure_ascii, date_unit,
                                                    iso_dates,
                                                    default_handler)
-        serialized = '{{"schema": {schema}, "data": {data}}}'.format(
+        # add column names
+        column_names = dumps(obj.columns)
+        if len(data) > 2:
+            column_names = column_names + ','
+        data = data[0] + column_names + data[1:]
+        serialized = '{{"schema":{schema},"data":{data}}}'.format(
                      schema=dumps(self.schema), data=data)
         return serialized
 

diff --git a/pandas/io/json/table_schema.py b/pandas/io/json/table_schema.py
@@ -255,7 +255,7 @@ def build_table_schema(data, index=True, primary_key=None, version=True):
         schema['primaryKey'] = primary_key
 
     if version:
-        schema['pandas_version'] = '0.20.0'
+        schema['pandas_version'] = '0.25.0'
     return schema
 
 
@@ -296,21 +296,28 @@ def parse_table_schema(json, precise_float):
     pandas.read_json
     """
     table = loads(json, precise_float=precise_float)
-    col_order = [field['name'] for field in table['schema']['fields']]
-    df = DataFrame(table['data'], columns=col_order)[col_order]
+    version = table['schema']['pandas_version']
+    if version == '0.20.0':
+        # Each table row is represented by a dict
+        col_order = [field['name'] for field in table['schema']['fields']]
+        df = DataFrame(table['data'], columns=col_order)[col_order]
+    elif version == '0.25.0':
+        # Each table row is represented by a list
+        col_order = table['data'][0]
+        df = DataFrame(table['data'][1:], columns=col_order)[col_order]
 
     dtypes = {field['name']: convert_json_field_to_pandas_type(field)
               for field in table['schema']['fields']}
 
     # Cannot directly use as_type with timezone data on object; raise for now
     if any(str(x).startswith('datetime64[ns, ') for x in dtypes.values()):
-        raise NotImplementedError('table="orient" can not yet read timezone '
-                                  'data')
+        raise NotImplementedError("orient='table' can not yet read timezone "
+                                  "data")
 
     # No ISO constructor for Timedelta as of yet, so need to raise
     if 'timedelta64' in dtypes.values():
-        raise NotImplementedError('table="orient" can not yet read '
-                                  'ISO-formatted Timedelta data')
+        raise NotImplementedError("orient='table' can not yet read "
+                                  "ISO-formatted Timedelta data")
 
     df = df.astype(dtypes)
 
@@ -322,5 +329,7 @@ def parse_table_schema(json, precise_float):
         else:
             df.index.names = [None if x.startswith('level_') else x for x in
                               df.index.names]
+        # Reset columns dtype
+        df.columns = df.columns.values.tolist()
 
     return df
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
@@ -208,8 +208,8 @@ def test_build_series(self):
 
         expected = OrderedDict([
             ('schema', schema),
-            ('data', [OrderedDict([('id', 0), ('a', 1)]),
-                      OrderedDict([('id', 1), ('a', 2)])])])
+            ('data', [['id', 'a'], [0, 1], [1, 2]])
+        ])
         assert result == expected
 
     def test_to_json(self):
@@ -243,32 +243,15 @@ def test_to_json(self):
             'fields': fields,
             'primaryKey': ['idx'],
         }
-        data = [
-            OrderedDict([('idx', 0), ('A', 1), ('B', 'a'),
-                         ('C', '2016-01-01T00:00:00.000Z'),
-                         ('D', 'P0DT1H0M0S'),
-                         ('E', 'a'), ('F', 'a'), ('G', 1.),
-                         ('H', '2016-01-01T06:00:00.000Z')
-                         ]),
-            OrderedDict([('idx', 1), ('A', 2), ('B', 'b'),
-                         ('C', '2016-01-02T00:00:00.000Z'),
-                         ('D', 'P0DT1H1M0S'),
-                         ('E', 'b'), ('F', 'b'), ('G', 2.),
-                         ('H', '2016-01-02T06:00:00.000Z')
-                         ]),
-            OrderedDict([('idx', 2), ('A', 3), ('B', 'c'),
-                         ('C', '2016-01-03T00:00:00.000Z'),
-                         ('D', 'P0DT1H2M0S'),
-                         ('E', 'c'), ('F', 'c'), ('G', 3.),
-                         ('H', '2016-01-03T06:00:00.000Z')
-                         ]),
-            OrderedDict([('idx', 3), ('A', 4), ('B', 'c'),
-                         ('C', '2016-01-04T00:00:00.000Z'),
-                         ('D', 'P0DT1H3M0S'),
-                         ('E', 'c'), ('F', 'c'), ('G', 4.),
-                         ('H', '2016-01-04T06:00:00.000Z')
-                         ]),
-        ]
+        data = [['idx', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'],
+                [0, 1, 'a', '2016-01-01T00:00:00.000Z', 'P0DT1H0M0S', 'a', 'a',
+                 1., '2016-01-01T06:00:00.000Z'],
+                [1, 2, 'b', '2016-01-02T00:00:00.000Z', 'P0DT1H1M0S', 'b', 'b',
+                 2., '2016-01-02T06:00:00.000Z'],
+                [2, 3, 'c', '2016-01-03T00:00:00.000Z', 'P0DT1H2M0S', 'c', 'c',
+                 3., '2016-01-03T06:00:00.000Z'],
+                [3, 4, 'c', '2016-01-04T00:00:00.000Z', 'P0DT1H3M0S', 'c', 'c',
+                 4., '2016-01-04T06:00:00.000Z']]
         expected = OrderedDict([('schema', schema), ('data', data)])
         assert result == expected
 
@@ -277,16 +260,14 @@ def test_to_json_float_index(self):
         result = data.to_json(orient='table', date_format='iso')
         result = json.loads(result, object_pairs_hook=OrderedDict)
         result['schema'].pop('pandas_version')
-
-        expected = (
-            OrderedDict([('schema', {
+        expected = (OrderedDict([
+            ('schema', {
                 'fields': [{'name': 'index', 'type': 'number'},
                            {'name': 'values', 'type': 'integer'}],
                 'primaryKey': ['index']
             }),
-                ('data', [OrderedDict([('index', 1.0), ('values', 1)]),
-                          OrderedDict([('index', 2.0), ('values', 1)])])])
-        )
+            ('data', [['index', 'values'], [1.0, 1], [2.0, 1]])
+        ]))
         assert result == expected
 
     def test_to_json_period_index(self):
@@ -300,10 +281,9 @@ def test_to_json_period_index(self):
                   {'name': 'values', 'type': 'integer'}]
 
         schema = {'fields': fields, 'primaryKey': ['index']}
-        data = [OrderedDict([('index', '2015-11-01T00:00:00.000Z'),
-                             ('values', 1)]),
-                OrderedDict([('index', '2016-02-01T00:00:00.000Z'),
-                             ('values', 1)])]
+        data = [['index', 'values'],
+                ['2015-11-01T00:00:00.000Z', 1],
+                ['2016-02-01T00:00:00.000Z', 1]]
         expected = OrderedDict([('schema', schema), ('data', data)])
         assert result == expected
 
@@ -320,10 +300,7 @@ def test_to_json_categorical_index(self):
                                        'ordered': False},
                                       {'name': 'values', 'type': 'integer'}],
                            'primaryKey': ['index']}),
-                         ('data', [
-                             OrderedDict([('index', 'a'),
-                                          ('values', 1)]),
-                             OrderedDict([('index', 'b'), ('values', 1)])])])
+                         ('data', [['index', 'values'], ['a', 1], ['b', 1]])])
         )
         assert result == expected
 
@@ -428,9 +405,7 @@ def test_categorical(self):
         expected = OrderedDict([
             ('schema', {'fields': fields,
                         'primaryKey': ['idx']}),
-            ('data', [OrderedDict([('idx', 0), ('values', 'a')]),
-                      OrderedDict([('idx', 1), ('values', 'b')]),
-                      OrderedDict([('idx', 2), ('values', 'a')])])])
+            ('data', [['idx', 'values'], [0, 'a'], [1, 'b'], [2, 'a']])])
         assert result == expected
 
     @pytest.mark.parametrize('idx,nm,prop', [

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -1197,9 +1197,10 @@ def test_data_frame_size_after_to_json(self):
 
     @pytest.mark.parametrize('index', [None, [1, 2], [1., 2.], ['a', 'b'],
                                        ['1', '2'], ['1.', '2.']])
-    @pytest.mark.parametrize('columns', [['a', 'b'], ['1', '2'], ['1.', '2.']])
+    @pytest.mark.parametrize('columns', [None, [1, 2], [1., 2.], ['a', 'b'],
+                                         ['1', '2'], ['1.', '2.']])
     def test_from_json_to_json_table_index_and_columns(self, index, columns):
-        # GH25433 GH25435
+        # GH19129 GH25433 GH25435
         expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns)
         dfjson = expected.to_json(orient='table')
         result = pd.read_json(dfjson, orient='table')
@@ -1229,6 +1230,50 @@ def test_read_json_table_convert_axes_raises(self):
         with pytest.raises(ValueError, match=msg):
             pd.read_json(dfjson, orient='table', convert_axes=True)
 
+    @pytest.mark.parametrize('index, dfjson', [
+        (None,
+         '{"schema":{"fields":[{"name":"index","type":"integer"},{"name":"a",'
+         '"type":"integer"},{"name":"b","type":"number"},{"name":"c",'
+         '"type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},'
+         '"data":[{"index":0,"a":1,"b":3.0,"c":"5"},{"index":1,"a":2,"b":4.0,'
+         '"c":"6"}]}'),
+        ([1, 2],
+         '{"schema":{"fields":[{"name":"index","type":"integer"},{"name":"a",'
+         '"type":"integer"},{"name":"b","type":"number"},{"name":"c",'
+         '"type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},'
+         '"data":[{"index":1,"a":1,"b":3.0,"c":"5"},{"index":2,"a":2,"b":4.0,'
+         '"c":"6"}]}'),
+        ([1., 2.],
+         '{"schema":{"fields":[{"name":"index","type":"number"},{"name":"a",'
+         '"type":"integer"},{"name":"b","type":"number"},{"name":"c",'
+         '"type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},'
+         '"data":[{"index":1.0,"a":1,"b":3.0,"c":"5"},{"index":2.0,"a":2,'
+         '"b":4.0,"c":"6"}]}'),
+        (['a', 'b'],
+         '{"schema":{"fields":[{"name":"index","type":"string"},{"name":"a",'
+         '"type":"integer"},{"name":"b","type":"number"},{"name":"c",'
+         '"type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},'
+         '"data":[{"index":"a","a":1,"b":3.0,"c":"5"},{"index":"b","a":2,'
+         '"b":4.0,"c":"6"}]}'),
+        (['1', '2'],
+         '{"schema":{"fields":[{"name":"index","type":"string"},{"name":"a",'
+         '"type":"integer"},{"name":"b","type":"number"},{"name":"c",'
+         '"type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},'
+         '"data":[{"index":"1","a":1,"b":3.0,"c":"5"},{"index":"2","a":2,'
+         '"b":4.0,"c":"6"}]}'),
+        (['1.', '2.'],
+         '{"schema":{"fields":[{"name":"index","type":"string"},{"name":"a",'
+         '"type":"integer"},{"name":"b","type":"number"},{"name":"c",'
+         '"type":"string"}],"primaryKey":["index"],"pandas_version":"0.20.0"},'
+         '"data":[{"index":"1.","a":1,"b":3.0,"c":"5"},{"index":"2.","a":2,'
+         '"b":4.0,"c":"6"}]}')
+    ])
+    def test_read_json_table_version_0_20_0(self, index, dfjson):
+        expected = pd.DataFrame([[1, 3., '5'], [2, 4., '6']],
+                                index=index, columns=['a', 'b', 'c'])
+        result = pd.read_json(dfjson, orient='table')
+        assert_frame_equal(result, expected)
+
     @pytest.mark.parametrize('data, expected', [
         (DataFrame([[1, 2], [4, 5]], columns=['a', 'b']),
             {'columns': ['a', 'b'], 'data': [[1, 2], [4, 5]]}),
@@ -1265,16 +1310,9 @@ def test_index_false_to_json_split(self, data, expected):
     def test_index_false_to_json_table(self, data):
         # GH 17394
         # Testing index=False in to_json with orient='table'
-
         result = data.to_json(orient='table', index=False)
         result = json.loads(result)
-
-        expected = {
-            'schema': pd.io.json.build_table_schema(data, index=False),
-            'data': DataFrame(data).to_dict(orient='records')
-        }
-
-        assert result == expected
+        assert 'primaryKey' not in result['schema']
 
     @pytest.mark.parametrize('orient', [
         'records', 'index', 'columns', 'values'