1- import os
2- import pytest
3-
41import numpy as np
5- from pandas . compat import zip
2+ import pytest
63
74import pandas as pd
8- from pandas import (DataFrame , Series , isna , to_datetime , DatetimeIndex , Index ,
9- Timestamp , Interval , IntervalIndex , Categorical ,
10- cut , qcut , date_range , timedelta_range , NaT ,
11- TimedeltaIndex )
12- from pandas .tseries .offsets import Nano , Day
13- import pandas .util .testing as tm
5+ from pandas import (
6+ Categorical , DataFrame , DatetimeIndex , Index , Interval , IntervalIndex ,
7+ Series , TimedeltaIndex , Timestamp , cut , date_range , isna , qcut ,
8+ timedelta_range , to_datetime )
149from pandas .api .types import CategoricalDtype as CDT
15-
16- from pandas .core .algorithms import quantile
1710import pandas .core .reshape .tile as tmod
11+ import pandas .util .testing as tm
1812
1913
2014def test_simple ():
@@ -211,44 +205,6 @@ def test_inf_handling():
211205 assert result_ser [0 ] == Interval (- np .inf , 2 )
212206
213207
214- def test_qcut ():
215- arr = np .random .randn (1000 )
216-
217- # We store the bins as Index that have been
218- # rounded to comparisons are a bit tricky.
219- labels , bins = qcut (arr , 4 , retbins = True )
220- ex_bins = quantile (arr , [0 , .25 , .5 , .75 , 1. ])
221-
222- result = labels .categories .left .values
223- assert np .allclose (result , ex_bins [:- 1 ], atol = 1e-2 )
224-
225- result = labels .categories .right .values
226- assert np .allclose (result , ex_bins [1 :], atol = 1e-2 )
227-
228- ex_levels = cut (arr , ex_bins , include_lowest = True )
229- tm .assert_categorical_equal (labels , ex_levels )
230-
231-
232- def test_qcut_bounds ():
233- arr = np .random .randn (1000 )
234-
235- factor = qcut (arr , 10 , labels = False )
236- assert len (np .unique (factor )) == 10
237-
238-
239- def test_qcut_specify_quantiles ():
240- arr = np .random .randn (100 )
241- factor = qcut (arr , [0 , .25 , .5 , .75 , 1. ])
242-
243- expected = qcut (arr , 4 )
244- tm .assert_categorical_equal (factor , expected )
245-
246-
247- def test_qcut_all_bins_same ():
248- with pytest .raises (ValueError , match = "edges.*unique" ):
249- qcut ([0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ], 3 )
250-
251-
252208def test_cut_out_of_bounds ():
253209 arr = np .random .randn (100 )
254210 result = cut (arr , [- 1 , 0 , 1 ])
@@ -286,31 +242,6 @@ def test_cut_pass_labels_compat():
286242 tm .assert_categorical_equal (result , exp )
287243
288244
289- def test_qcut_include_lowest ():
290- values = np .arange (10 )
291- ii = qcut (values , 4 )
292-
293- ex_levels = IntervalIndex ([Interval (- 0.001 , 2.25 ), Interval (2.25 , 4.5 ),
294- Interval (4.5 , 6.75 ), Interval (6.75 , 9 )])
295- tm .assert_index_equal (ii .categories , ex_levels )
296-
297-
298- def test_qcut_nas ():
299- arr = np .random .randn (100 )
300- arr [:20 ] = np .nan
301-
302- result = qcut (arr , 4 )
303- assert isna (result [:20 ]).all ()
304-
305-
306- def test_qcut_index ():
307- result = qcut ([0 , 2 ], 2 )
308- intervals = [Interval (- 0.001 , 1 ), Interval (1 , 2 )]
309-
310- expected = Categorical (intervals , ordered = True )
311- tm .assert_categorical_equal (result , expected )
312-
313-
314245@pytest .mark .parametrize ("x" , [np .arange (11. ), np .arange (11. ) / 1e10 ])
315246def test_round_frac_just_works (x ):
316247 # It works.
@@ -329,30 +260,6 @@ def test_round_frac(val, precision, expected):
329260 assert result == expected
330261
331262
332- def test_qcut_binning_issues (datapath ):
333- # see gh-1978, gh-1979
334- cut_file = datapath (os .path .join ("reshape" , "data" , "cut_data.csv" ))
335- arr = np .loadtxt (cut_file )
336- result = qcut (arr , 20 )
337-
338- starts = []
339- ends = []
340-
341- for lev in np .unique (result ):
342- s = lev .left
343- e = lev .right
344- assert s != e
345-
346- starts .append (float (s ))
347- ends .append (float (e ))
348-
349- for (sp , sn ), (ep , en ) in zip (zip (starts [:- 1 ], starts [1 :]),
350- zip (ends [:- 1 ], ends [1 :])):
351- assert sp < sn
352- assert ep < en
353- assert ep <= sn
354-
355-
356263def test_cut_return_intervals ():
357264 ser = Series ([0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ])
358265 result = cut (ser , 3 )
@@ -365,17 +272,6 @@ def test_cut_return_intervals():
365272 tm .assert_series_equal (result , expected )
366273
367274
368- def test_qcut_return_intervals ():
369- ser = Series ([0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ])
370- res = qcut (ser , [0 , 0.333 , 0.666 , 1 ])
371-
372- exp_levels = np .array ([Interval (- 0.001 , 2.664 ),
373- Interval (2.664 , 5.328 ), Interval (5.328 , 8 )])
374- exp = Series (exp_levels .take ([0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 , 2 ])).astype (
375- CDT (ordered = True ))
376- tm .assert_series_equal (res , exp )
377-
378-
379275def test_series_ret_bins ():
380276 # see gh-8589
381277 ser = Series (np .arange (4 ))
@@ -406,47 +302,6 @@ def test_cut_duplicates_bin(kwargs, msg):
406302 tm .assert_series_equal (result , expected )
407303
408304
409- @pytest .mark .parametrize ("kwargs,msg" , [
410- (dict (duplicates = "drop" ), None ),
411- (dict (), "Bin edges must be unique" ),
412- (dict (duplicates = "raise" ), "Bin edges must be unique" ),
413- (dict (duplicates = "foo" ), "invalid value for 'duplicates' parameter" )
414- ])
415- def test_qcut_duplicates_bin (kwargs , msg ):
416- # see gh-7751
417- values = [0 , 0 , 0 , 0 , 1 , 2 , 3 ]
418-
419- if msg is not None :
420- with pytest .raises (ValueError , match = msg ):
421- qcut (values , 3 , ** kwargs )
422- else :
423- result = qcut (values , 3 , ** kwargs )
424- expected = IntervalIndex ([Interval (- 0.001 , 1 ), Interval (1 , 3 )])
425- tm .assert_index_equal (result .categories , expected )
426-
427-
428- @pytest .mark .parametrize ("data,start,end" , [
429- (9.0 , 8.999 , 9.0 ),
430- (0.0 , - 0.001 , 0.0 ),
431- (- 9.0 , - 9.001 , - 9.0 ),
432- ])
433- @pytest .mark .parametrize ("length" , [1 , 2 ])
434- @pytest .mark .parametrize ("labels" , [None , False ])
435- def test_single_quantile (data , start , end , length , labels ):
436- # see gh-15431
437- ser = Series ([data ] * length )
438- result = qcut (ser , 1 , labels = labels )
439-
440- if labels is None :
441- intervals = IntervalIndex ([Interval (start , end )] *
442- length , closed = "right" )
443- expected = Series (intervals ).astype (CDT (ordered = True ))
444- else :
445- expected = Series ([0 ] * length )
446-
447- tm .assert_series_equal (result , expected )
448-
449-
450305@pytest .mark .parametrize ("data" , [9.0 , - 9.0 , 0.0 ])
451306@pytest .mark .parametrize ("length" , [1 , 2 ])
452307def test_single_bin (data , length ):
@@ -474,21 +329,6 @@ def test_cut_read_only(array_1_writeable, array_2_writeable):
474329 cut (hundred_elements , array_2 ))
475330
476331
477- @pytest .mark .parametrize ("ser" , [
478- Series (DatetimeIndex (["20180101" , NaT , "20180103" ])),
479- Series (TimedeltaIndex (["0 days" , NaT , "2 days" ]))],
480- ids = lambda x : str (x .dtype ))
481- def test_qcut_nat (ser ):
482- # see gh-19768
483- intervals = IntervalIndex .from_tuples ([
484- (ser [0 ] - Nano (), ser [2 ] - Day ()),
485- np .nan , (ser [2 ] - Day (), ser [2 ])])
486- expected = Series (Categorical (intervals , ordered = True ))
487-
488- result = qcut (ser , 2 )
489- tm .assert_series_equal (result , expected )
490-
491-
492332@pytest .mark .parametrize ("conv" , [
493333 lambda v : Timestamp (v ),
494334 lambda v : to_datetime (v ),
@@ -558,24 +398,6 @@ def test_datetime_tz_cut(bins, box):
558398 tm .assert_series_equal (result , expected )
559399
560400
561- @pytest .mark .parametrize ("bins" , [3 , np .linspace (0 , 1 , 4 )])
562- def test_datetime_tz_qcut (bins ):
563- # see gh-19872
564- tz = "US/Eastern"
565- ser = Series (date_range ("20130101" , periods = 3 , tz = tz ))
566-
567- result = qcut (ser , bins )
568- expected = Series (IntervalIndex ([
569- Interval (Timestamp ("2012-12-31 23:59:59.999999999" , tz = tz ),
570- Timestamp ("2013-01-01 16:00:00" , tz = tz )),
571- Interval (Timestamp ("2013-01-01 16:00:00" , tz = tz ),
572- Timestamp ("2013-01-02 08:00:00" , tz = tz )),
573- Interval (Timestamp ("2013-01-02 08:00:00" , tz = tz ),
574- Timestamp ("2013-01-03 00:00:00" , tz = tz ))])).astype (
575- CDT (ordered = True ))
576- tm .assert_series_equal (result , expected )
577-
578-
579401def test_datetime_nan_error ():
580402 msg = "bins must be of datetime64 dtype"
581403
@@ -623,15 +445,3 @@ def test_timedelta_cut_roundtrip():
623445 "2 days 00:00:00" ,
624446 "3 days 00:00:00" ])
625447 tm .assert_index_equal (result_bins , expected_bins )
626-
627-
628- @pytest .mark .parametrize ("arg,expected_bins" , [
629- [timedelta_range ("1day" , periods = 3 ),
630- TimedeltaIndex (["1 days" , "2 days" , "3 days" ])],
631- [date_range ("20180101" , periods = 3 ),
632- DatetimeIndex (["2018-01-01" , "2018-01-02" , "2018-01-03" ])]])
633- def test_date_like_qcut_bins (arg , expected_bins ):
634- # see gh-19891
635- ser = Series (arg )
636- result , result_bins = qcut (ser , 2 , retbins = True )
637- tm .assert_index_equal (result_bins , expected_bins )
0 commit comments