Skip to content
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -293,10 +293,11 @@ Groupby/resample/rolling

Reshaping
^^^^^^^^^

- Bug in :meth:`DataFrame.unstack` with missing levels led to incorrect index names (:issue:`37510`)
- Bug in :func:`concat` incorrectly casting to ``object`` dtype in some cases when one or more of the operands is empty (:issue:`38843`)
-


Sparse
^^^^^^

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def is_nonempty(x) -> bool:
# marginal given that it would still require shape & dtype calculation and
# np.concatenate which has them both implemented is compiled.
non_empties = [x for x in to_concat if is_nonempty(x)]
if non_empties and axis == 0:
if non_empties:
to_concat = non_empties

typs = _get_dtype_kinds(to_concat)
Expand Down
16 changes: 13 additions & 3 deletions pandas/tests/indexing/test_partial.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,21 @@ def test_partial_setting_mixed_dtype(self):
with pytest.raises(ValueError, match=msg):
df.loc[0] = [1, 2, 3]

# TODO: #15657, these are left as object and not coerced
@pytest.mark.parametrize("dtype", [None, "int64", "Int64"])
def test_loc_setitem_expanding_empty(self, dtype):
df = DataFrame(columns=["A", "B"])
df.loc[3] = [6, 7]

exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object")
value = [6, 7]
if dtype == "int64":
value = np.array(value, dtype=dtype)
elif dtype == "Int64":
value = pd.array(value, dtype=dtype)

df.loc[3] = value

exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=dtype)
if dtype is not None:
exp = exp.astype(dtype)
tm.assert_frame_equal(df, exp)

def test_series_partial_set(self):
Expand Down
7 changes: 4 additions & 3 deletions pandas/tests/reshape/concat/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,11 +474,12 @@ def test_concat_will_upcast(dt, pdt):
assert x.values.dtype == "float64"


def test_concat_empty_and_non_empty_frame_regression():
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
def test_concat_empty_and_non_empty_frame_regression(dtype):
# GH 18178 regression test
df1 = DataFrame({"foo": [1]})
df1 = DataFrame({"foo": [1]}).astype(dtype)
df2 = DataFrame({"foo": []})
expected = DataFrame({"foo": [1.0]})
expected = df1
result = pd.concat([df1, df2])
tm.assert_frame_equal(result, expected)

Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/reshape/concat/test_empty.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,15 @@ def test_concat_empty_series_dtypes_sparse(self):
expected = pd.SparseDtype("object")
assert result.dtype == expected

def test_concat_empty_df_object_dtype(self):
@pytest.mark.parametrize("dtype", ["int64", "Int64"])
def test_concat_empty_df_object_dtype(self, dtype):
# GH 9149
df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]})
df_1["Row"] = df_1["Row"].astype(dtype)
df_2 = DataFrame(columns=df_1.columns)
result = pd.concat([df_1, df_2], axis=0)
expected = df_1.astype(object)
expected = df_1.copy()
expected["EmptyCol"] = expected["EmptyCol"].astype(object) # TODO: why?
tm.assert_frame_equal(result, expected)

def test_concat_empty_dataframe_dtypes(self):
Expand Down