22
33import numpy as np
44
5- import pandas as pd
65from pandas .core .arrays import ExtensionArray
76
8- from ._utils import refactorize
9-
107
118class NumPyBackedExtensionArrayMixin (ExtensionArray ):
129 @property
@@ -18,6 +15,10 @@ def dtype(self):
1815 def _constructor_from_sequence (cls , scalars ):
1916 return cls (scalars )
2017
18+ @classmethod
19+ def _from_factorized (cls , values , original ):
20+ return cls (values )
21+
2122 @property
2223 def shape (self ):
2324 return (len (self .data ),)
@@ -68,65 +69,3 @@ def unique(self):
6869 _ , indices = np .unique (self .data , return_index = True )
6970 data = self .data .take (np .sort (indices ))
7071 return self ._from_ndarray (data )
71-
72- def factorize (self , na_sentinel = - 1 ):
73- """Factorize an IPArray into integer labels and unique values.
74-
75- Calling :meth:`pandas.Series.factorize` or :meth:`pandas.factorize`
76- will dispatch to this method.
77-
78- Parameters
79- ----------
80- na_sentinel : int, default -1
81- The value in `labels` to use for indicating missing values in
82- `self`.
83-
84- Returns
85- -------
86- labels : ndarray
87- An integer-type ndarray the same length as `self`. Each newly-
88- observed value in `self` will be assigned the next integer.
89- Missing values in self are assigned `na_sentinel`.
90- uniques : IPArray
91- The unique values in `self` in order of appereance, not including
92- the missing value ``IPv4Address('0.0.0.0')``.
93-
94- See Also
95- --------
96- pandas.factorize, pandas.Series.factorize
97-
98- Examples
99- --------
100- >>> arr = IPArray([2, 2, 0, 1, 2, 2**64 + 1])
101- >>> arr
102- IPArray(['0.0.0.2', '0.0.0.2', '0.0.0.0', '0.0.0.1',
103- '0.0.0.2', '::1:0:0:0:1'])
104-
105- >>> labels, uniques = arr.factorize()
106- >>> labels
107- array([ 0, 0, -1, 1, 0, 2])
108-
109- Notice that `uniques` does not include the missing value.
110- >>> uniques
111- IPArray(['0.0.0.2', '0.0.0.1', '::1:0:0:0:1'])
112- """
113- # OK, so here's the plan.
114- # Start with factorizing `self.data`, which has two unfortunate issues
115- # 1. Requires casting to object.
116- # 2. Gets the NA logic wrong, since (0, 0) isn't NA to pandas.
117- # For now, we can't help with 1. Maybe someday.
118- # For 2, we can "fix" things with a little post-factorization cleanup.
119- l , u = pd .factorize (self .data )
120- mask = self .isna ()
121- any_na = mask .any ()
122-
123- if any_na :
124- first_na = mask .argmax ()
125- refactorize (l , first_na , na_sentinel = na_sentinel ) # inplace op
126-
127- # u is an ndarray of tuples. Go to our record type, then an IPArray
128- u2 = type (self )((u .astype (self .dtype ._record_type )))
129- # May have a missing value.
130- if any_na :
131- u2 = u2 [~ u2 .isna ()]
132- return l , u2
0 commit comments