@@ -98,9 +98,7 @@ def write(self, df, path, compression='snappy',
9898 coerce_timestamps = 'ms' , ** kwargs ):
9999 self .validate_dataframe (df )
100100 if self ._pyarrow_lt_070 :
101- self ._validate_write_lt_070 (
102- df , path , compression , coerce_timestamps , ** kwargs
103- )
101+ self ._validate_write_lt_070 (df )
104102 path , _ , _ = get_filepath_or_buffer (path )
105103
106104 if self ._pyarrow_lt_060 :
@@ -116,48 +114,46 @@ def write(self, df, path, compression='snappy',
116114
117115 def read (self , path , columns = None , ** kwargs ):
118116 path , _ , _ = get_filepath_or_buffer (path )
119- parquet_file = self .api .parquet .ParquetFile (path )
120117 if self ._pyarrow_lt_070 :
121- return self ._read_lt_070 (path , parquet_file , columns , ** kwargs )
118+ return self .api .parquet .read_pandas (path , columns = columns ,
119+ ** kwargs ).to_pandas ()
122120 kwargs ['use_pandas_metadata' ] = True
123- return parquet_file .read (columns = columns , ** kwargs ).to_pandas ()
121+ return self .api .parquet .read_table (path , columns = columns ,
122+ ** kwargs ).to_pandas ()
124123
125- def _validate_write_lt_070 (self , df , path , compression = 'snappy' ,
126- coerce_timestamps = 'ms' , ** kwargs ):
124+ def _validate_write_lt_070 (self , df ):
127125 # Compatibility shim for pyarrow < 0.7.0
128126 # TODO: Remove in pandas 0.22.0
129127 from pandas .core .indexes .multi import MultiIndex
130128 if isinstance (df .index , MultiIndex ):
131129 msg = (
132- "Mulit -index DataFrames are only supported "
130+ "Multi -index DataFrames are only supported "
133131 "with pyarrow >= 0.7.0"
134132 )
135133 raise ValueError (msg )
136134 # Validate index
137135 if not isinstance (df .index , Int64Index ):
138136 msg = (
139- "parquet does not support serializing {} for the index;"
140- "you can .reset_index() to make the index into column(s)"
137+ "pyarrow < 0.7.0 does not support serializing {} for the "
138+ "index; you can .reset_index() to make the index into "
139+ "column(s), or install the latest version of pyarrow or "
140+ "fastparquet."
141141 )
142142 raise ValueError (msg .format (type (df .index )))
143143 if not df .index .equals (RangeIndex (len (df ))):
144144 raise ValueError (
145- "parquet does not support serializing a non-default index "
146- "for the index; you can .reset_index() to make the index "
147- "into column(s)"
145+ "pyarrow < 0.7.0 does not support serializing a non-default "
146+ "index; you can .reset_index() to make the index into "
147+ "column(s), or install the latest version of pyarrow or "
148+ "fastparquet."
148149 )
149150 if df .index .name is not None :
150151 raise ValueError (
151- "parquet does not serialize index meta-data "
152- "on a default index"
152+ "pyarrow < 0.7.0 does not serialize indexes with a name; you "
153+ "can set the index.name to None or install the latest version "
154+ "of pyarrow or fastparquet."
153155 )
154156
155- def _read_lt_070 (self , path , parquet_file , columns , ** kwargs ):
156- # Compatibility shim for pyarrow < 0.7.0
157- # TODO: Remove in pandas 0.22.0
158- kwargs ['columns' ] = columns
159- return self .api .parquet .read_pandas (path , ** kwargs ).to_pandas ()
160-
161157
162158class FastParquetImpl (BaseImpl ):
163159
0 commit comments