2727from pandas .core .dtypes .cast import (
2828 find_common_type ,
2929 infer_dtype_from_scalar ,
30- maybe_infer_to_datetimelike ,
3130 maybe_promote ,
3231)
3332from pandas .core .dtypes .common import (
3433 DT64NS_DTYPE ,
3534 is_dtype_equal ,
3635 is_extension_array_dtype ,
3736 is_list_like ,
38- is_object_dtype ,
3937)
4038from pandas .core .dtypes .concat import concat_compat
4139from pandas .core .dtypes .dtypes import ExtensionDtype
@@ -328,7 +326,7 @@ def _verify_integrity(self) -> None:
328326 if block .shape [1 :] != mgr_shape [1 :]:
329327 raise construction_error (tot_items , block .shape [1 :], self .axes )
330328 if len (self .items ) != tot_items :
331- raise AssertionError (
329+ raise ValueError (
332330 "Number of manager items must equal union of "
333331 f"block items\n # manager items: { len (self .items )} , # "
334332 f"tot_items: { tot_items } "
@@ -1671,48 +1669,14 @@ def create_block_manager_from_arrays(
16711669 # ensure we dont have any PandasArrays when we call get_block_type
16721670 # Note: just calling extract_array breaks tests that patch PandasArray._typ.
16731671 arrays = [x if not isinstance (x , ABCPandasArray ) else x .to_numpy () for x in arrays ]
1672+ blocks = _form_blocks (arrays , names , axes )
16741673 try :
1675- blocks = _form_blocks (arrays , names , axes )
1676- mgr = BlockManager (blocks , axes )
1677- mgr ._consolidate_inplace ()
1678- return mgr
1679- except ValueError as e :
1680- raise construction_error (len (arrays ), arrays [0 ].shape , axes , e )
1681-
1682-
1683- def create_block_manager_from_array (
1684- array , axes : List [Index ], dtype : Optional [Dtype ] = None
1685- ) -> BlockManager :
1686- assert isinstance (axes , list )
1687- assert all (isinstance (x , Index ) for x in axes )
1688-
1689- # ensure we dont have any PandasArrays when we call get_block_type
1690- # Note: just calling extract_array breaks tests that patch PandasArray._typ.
1691- array = array if not isinstance (array , ABCPandasArray ) else array .to_numpy ()
1692-
1693- try :
1694- # if we don't have a dtype specified, then try to convert objects
1695- # on the entire block; this is to convert if we have datetimelike's
1696- # embedded in an object type
1697- if dtype is None and is_object_dtype (array .dtype ):
1698- maybe_datetime = [
1699- maybe_infer_to_datetimelike (instance ) for instance in array
1700- ]
1701- # don't convert (and copy) the objects if no type conversion occurs
1702- if any (
1703- not is_dtype_equal (instance .dtype , array .dtype )
1704- for instance in maybe_datetime
1705- ):
1706- blocks = _form_blocks (maybe_datetime , axes [0 ], axes )
1707- else :
1708- blocks = [make_block (array , slice (0 , len (axes [0 ])))]
1709- else :
1710- blocks = [make_block (array , slice (0 , len (axes [0 ])), dtype = dtype )]
17111674 mgr = BlockManager (blocks , axes )
17121675 mgr ._consolidate_inplace ()
17131676 return mgr
17141677 except ValueError as e :
1715- raise construction_error (array .shape [0 ], array .shape [1 :], axes , e )
1678+ tot_items = sum (b .shape [0 ] for b in blocks )
1679+ raise construction_error (tot_items , blocks [0 ].shape [1 :], axes , e )
17161680
17171681
17181682def construction_error (tot_items , block_shape , axes , e = None ):
@@ -1743,32 +1707,41 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
17431707 # put "leftover" items in float bucket, where else?
17441708 # generalize?
17451709
1746- if len (arrays ) != len (names ):
1747- raise ValueError (
1748- f"Number of arrays ({ len (arrays )} ) "
1749- f"does not match index length ({ len (names )} )"
1750- )
1751-
17521710 items_dict : DefaultDict [str , List ] = defaultdict (list )
17531711 extra_locs = []
17541712
1755- names_idx = names
1756- if names_idx .equals (axes [0 ]):
1757- names_indexer = np .arange (len (names_idx ))
1713+ if len (arrays ) == 1 :
1714+ first = arrays [0 ]
1715+ block_type = get_block_type (first )
1716+ if first .ndim == 1 :
1717+ end = 1
1718+ else :
1719+ end = len (first )
1720+ items_dict [block_type .__name__ ].append (((0 , end ), first ))
17581721 else :
1759- assert names_idx .intersection (axes [0 ]).is_unique
1760- names_indexer = names_idx .get_indexer_for (axes [0 ])
1761-
1762- for i , name_idx in enumerate (names_indexer ):
1763- if name_idx == - 1 :
1764- extra_locs .append (i )
1765- continue
1722+ names_idx = names
1723+ if names_idx .equals (axes [0 ]):
1724+ names_indexer = np .arange (len (names_idx ))
1725+ else :
1726+ assert names_idx .intersection (axes [0 ]).is_unique
1727+ names_indexer = names_idx .get_indexer_for (axes [0 ])
1728+
1729+ i = 0
1730+ for name_idx in names_indexer :
1731+ if name_idx == - 1 :
1732+ extra_locs .append (i )
1733+ i += 1
1734+ continue
17661735
1767- k = names [name_idx ]
1768- v = arrays [name_idx ]
1736+ v = arrays [name_idx ]
17691737
1770- block_type = get_block_type (v )
1771- items_dict [block_type .__name__ ].append ((i , k , v ))
1738+ if v .ndim == 2 :
1739+ ei = i + v .shape [1 ]
1740+ else :
1741+ ei = i + 1
1742+ block_type = get_block_type (v )
1743+ items_dict [block_type .__name__ ].append (((i , ei ), v ))
1744+ i = ei
17721745
17731746 blocks : List [Block ] = []
17741747 if len (items_dict ["FloatBlock" ]):
@@ -1789,8 +1762,8 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
17891762
17901763 if len (items_dict ["DatetimeTZBlock" ]):
17911764 dttz_blocks = [
1792- make_block (array , klass = DatetimeTZBlock , placement = i , ndim = 2 )
1793- for i , _ , array in items_dict ["DatetimeTZBlock" ]
1765+ make_block (array , klass = DatetimeTZBlock , placement = slice ( * i ) , ndim = 2 )
1766+ for i , array in items_dict ["DatetimeTZBlock" ]
17941767 ]
17951768 blocks .extend (dttz_blocks )
17961769
@@ -1800,23 +1773,25 @@ def _form_blocks(arrays, names: Index, axes) -> List[Block]:
18001773
18011774 if len (items_dict ["CategoricalBlock" ]) > 0 :
18021775 cat_blocks = [
1803- make_block (array , klass = CategoricalBlock , placement = i , ndim = 2 )
1804- for i , _ , array in items_dict ["CategoricalBlock" ]
1776+ make_block (array , klass = CategoricalBlock , placement = slice ( * i ) , ndim = 2 )
1777+ for i , array in items_dict ["CategoricalBlock" ]
18051778 ]
18061779 blocks .extend (cat_blocks )
18071780
18081781 if len (items_dict ["ExtensionBlock" ]):
18091782 external_blocks = [
1810- make_block (array , klass = ExtensionBlock , placement = i , ndim = 2 )
1811- for i , _ , array in items_dict ["ExtensionBlock" ]
1783+ make_block (array , klass = ExtensionBlock , placement = slice ( * i ) , ndim = 2 )
1784+ for i , array in items_dict ["ExtensionBlock" ]
18121785 ]
18131786
18141787 blocks .extend (external_blocks )
18151788
18161789 if len (items_dict ["ObjectValuesExtensionBlock" ]):
18171790 external_blocks = [
1818- make_block (array , klass = ObjectValuesExtensionBlock , placement = i , ndim = 2 )
1819- for i , _ , array in items_dict ["ObjectValuesExtensionBlock" ]
1791+ make_block (
1792+ array , klass = ObjectValuesExtensionBlock , placement = slice (* i ), ndim = 2
1793+ )
1794+ for i , array in items_dict ["ObjectValuesExtensionBlock" ]
18201795 ]
18211796
18221797 blocks .extend (external_blocks )
@@ -1849,10 +1824,10 @@ def _simple_blockify(tuples, dtype) -> List[Block]:
18491824 return [block ]
18501825
18511826
1852- def _multi_blockify (tuples , dtype : Optional [ Dtype ] = None ):
1827+ def _multi_blockify (tuples ):
18531828 """ return an array of blocks that potentially have different dtypes """
18541829 # group by dtype
1855- grouper = itertools .groupby (tuples , lambda x : x [2 ].dtype )
1830+ grouper = itertools .groupby (tuples , lambda x : x [1 ].dtype )
18561831
18571832 new_blocks = []
18581833 for dtype , tup_block in grouper :
@@ -1880,16 +1855,25 @@ def _shape_compat(x) -> Shape:
18801855 else :
18811856 return x .shape
18821857
1883- placement , names , arrays = zip (* tuples )
1858+ placement , arrays = zip (* tuples )
18841859
18851860 first = arrays [0 ]
1886- shape = (len (arrays ),) + _shape_compat (first )
18871861
1862+ if len (arrays ) == 1 :
1863+ if dtype is None or is_dtype_equal (first .dtype , dtype ):
1864+ arr = _asarray_compat (first )
1865+ else :
1866+ arr = _asarray_compat (first ).astype (dtype )
1867+ if len (_shape_compat (first )) < 2 :
1868+ arr = arr .reshape (1 , - 1 )
1869+ return arr , slice (* placement [0 ])
1870+
1871+ shape = (len (arrays ),) + _shape_compat (first )
18881872 stacked = np .empty (shape , dtype = dtype )
18891873 for i , arr in enumerate (arrays ):
18901874 stacked [i ] = _asarray_compat (arr )
18911875
1892- return stacked , placement
1876+ return stacked , list ( itertools . chain ( * ( list ( range ( * p )) for p in placement )))
18931877
18941878
18951879def _interleaved_dtype (blocks : Sequence [Block ]) -> Optional [DtypeObj ]:
0 commit comments