preserve kwargs order on assign func for py36plus - #14207 (#17632)

bobhaffner · jreback · commit 965c1c89b6df · 2017-09-24T09:22:13.000-04:00
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -162,6 +162,7 @@ Other Enhancements
 - :func:`MultiIndex.is_monotonic_decreasing` has been implemented.  Previously returned ``False`` in all cases. (:issue:`16554`)
 - :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`)
 - :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`)
+- :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names
 
 
 .. _whatsnew_0210.api_breaking:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -82,6 +82,7 @@
 from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u,
                            OrderedDict, raise_with_traceback)
 from pandas import compat
+from pandas.compat import PY36
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import Appender, Substitution
 from pandas.util._validators import validate_bool_kwarg
@@ -2575,12 +2576,12 @@ def assign(self, **kwargs):
 
         Notes
         -----
-        Since ``kwargs`` is a dictionary, the order of your
-        arguments may not be preserved. To make things predicatable,
-        the columns are inserted in alphabetical order, at the end of
-        your DataFrame. Assigning multiple columns within the same
-        ``assign`` is possible, but you cannot reference other columns
-        created within the same ``assign`` call.
+        For python 3.6 and above, the columns are inserted in the order of
+        **kwargs. For python 3.5 and earlier, since **kwargs is unordered,
+        the columns are inserted in alphabetical order at the end of your
+        DataFrame.  Assigning multiple columns within the same ``assign``
+        is possible, but you cannot reference other columns created within
+        the same ``assign`` call.
 
         Examples
         --------
@@ -2620,14 +2621,18 @@ def assign(self, **kwargs):
         data = self.copy()
 
         # do all calculations first...
-        results = {}
+        results = OrderedDict()
         for k, v in kwargs.items():
             results[k] = com._apply_if_callable(v, data)
 
+        # preserve order for 3.6 and later, but sort by key for 3.5 and earlier
+        if PY36:
+            results = results.items()
+        else:
+            results = sorted(results.items())
         # ... and then assign
-        for k, v in sorted(results.items()):
+        for k, v in results:
             data[k] = v
-
         return data
 
     def _sanitize_column(self, key, value, broadcast=True):
diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py
@@ -4,6 +4,7 @@
 import pytest
 from pandas.compat import range, lrange
 import numpy as np
+from pandas.compat import PY36
 
 from pandas import DataFrame, Series, Index, MultiIndex
 
@@ -61,14 +62,23 @@ def test_assign_multiple(self):
                               [3, 6, 9, 3, 6]], columns=list('ABCDE'))
         assert_frame_equal(result, expected)
 
-    def test_assign_alphabetical(self):
+    def test_assign_order(self):
         # GH 9818
         df = DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
         result = df.assign(D=df.A + df.B, C=df.A - df.B)
-        expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]],
-                             columns=list('ABCD'))
+
+        if PY36:
+            expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]],
+                                 columns=list('ABDC'))
+        else:
+            expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]],
+                                 columns=list('ABCD'))
         assert_frame_equal(result, expected)
         result = df.assign(C=df.A - df.B, D=df.A + df.B)
+
+        expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]],
+                             columns=list('ABCD'))
+
         assert_frame_equal(result, expected)
 
     def test_assign_bad(self):