1111import pytest
1212
1313from pandas import (DataFrame , compat , option_context )
14- from pandas .compat import StringIO , lrange , u
14+ from pandas .compat import StringIO , lrange , u , PYPY
1515import pandas .io .formats .format as fmt
1616import pandas as pd
1717
@@ -323,23 +323,6 @@ def test_info_memory_usage(self):
323323 # excluded column with object dtype, so estimate is accurate
324324 assert not re .match (r"memory usage: [^+]+\+" , res [- 1 ])
325325
326- df_with_object_index = pd .DataFrame ({'a' : [1 ]}, index = ['foo' ])
327- df_with_object_index .info (buf = buf , memory_usage = True )
328- res = buf .getvalue ().splitlines ()
329- assert re .match (r"memory usage: [^+]+\+" , res [- 1 ])
330-
331- df_with_object_index .info (buf = buf , memory_usage = 'deep' )
332- res = buf .getvalue ().splitlines ()
333- assert re .match (r"memory usage: [^+]+$" , res [- 1 ])
334-
335- assert (df_with_object_index .memory_usage (
336- index = True , deep = True ).sum () > df_with_object_index .memory_usage (
337- index = True ).sum ())
338-
339- df_object = pd .DataFrame ({'a' : ['a' ]})
340- assert (df_object .memory_usage (deep = True ).sum () >
341- df_object .memory_usage ().sum ())
342-
343326 # Test a DataFrame with duplicate columns
344327 dtypes = ['int64' , 'int64' , 'int64' , 'float64' ]
345328 data = {}
@@ -349,6 +332,15 @@ def test_info_memory_usage(self):
349332 df = DataFrame (data )
350333 df .columns = dtypes
351334
335+ df_with_object_index = pd .DataFrame ({'a' : [1 ]}, index = ['foo' ])
336+ df_with_object_index .info (buf = buf , memory_usage = True )
337+ res = buf .getvalue ().splitlines ()
338+ assert re .match (r"memory usage: [^+]+\+" , res [- 1 ])
339+
340+ df_with_object_index .info (buf = buf , memory_usage = 'deep' )
341+ res = buf .getvalue ().splitlines ()
342+ assert re .match (r"memory usage: [^+]+$" , res [- 1 ])
343+
352344 # Ensure df size is as expected
353345 # (cols * rows * bytes) + index size
354346 df_size = df .memory_usage ().sum ()
@@ -377,9 +369,47 @@ def test_info_memory_usage(self):
377369 df .memory_usage (index = True )
378370 df .index .values .nbytes
379371
372+ mem = df .memory_usage (deep = True ).sum ()
373+
374+ @pytest .mark .skipif (PYPY , reason = "on PyPy deep=True does not change result" )
375+ def test_info_memory_usage_deep_not_pypy (self ):
376+ buf = StringIO ()
377+ df_with_object_index = pd .DataFrame ({'a' : [1 ]}, index = ['foo' ])
378+ df_with_object_index .info (buf = buf , memory_usage = True )
379+ assert (df_with_object_index .memory_usage (
380+ index = True , deep = True ).sum () >
381+ df_with_object_index .memory_usage (
382+ index = True ).sum ())
383+
384+ df_object = pd .DataFrame ({'a' : ['a' ]})
385+ assert (df_object .memory_usage (deep = True ).sum () >
386+ df_object .memory_usage ().sum ())
387+
388+ @pytest .mark .skipif (not PYPY , reason = "on PyPy deep=True does not change result" )
389+ def test_info_memory_usage_deep_pypy (self ):
390+ buf = StringIO ()
391+ df_with_object_index = pd .DataFrame ({'a' : [1 ]}, index = ['foo' ])
392+ assert (df_with_object_index .memory_usage (
393+ index = True , deep = True ).sum () ==
394+ df_with_object_index .memory_usage (
395+ index = True ).sum ())
396+
397+ df_object = pd .DataFrame ({'a' : ['a' ]})
398+ assert (df_object .memory_usage (deep = True ).sum () ==
399+ df_object .memory_usage ().sum ())
400+
401+ @pytest .mark .skipif (PYPY , reason = "PyPy getsizeof() fails by design" )
402+ def test_usage_via_getsizeof (self ):
403+ df = DataFrame (
404+ data = 1 ,
405+ index = pd .MultiIndex .from_product (
406+ [['a' ], range (1000 )]),
407+ columns = ['A' ]
408+ )
409+ mem = df .memory_usage (deep = True ).sum ()
380410 # sys.getsizeof will call the .memory_usage with
381411 # deep=True, and add on some GC overhead
382- diff = df . memory_usage ( deep = True ). sum () - sys .getsizeof (df )
412+ diff = mem - sys .getsizeof (df )
383413 assert abs (diff ) < 100
384414
385415 def test_info_memory_usage_qualified (self ):
0 commit comments