|
11 | 11 | from .pandas_vb_common import setup # noqa |
12 | 12 |
|
13 | 13 |
|
| 14 | +method_blacklist = { |
| 15 | + 'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean', |
| 16 | + 'max', 'skew', 'cumprod', 'cummax', 'rank', 'pct_change', 'min', |
| 17 | + 'var', 'mad', 'describe', 'std'} |
| 18 | + } |
| 19 | + |
| 20 | + |
14 | 21 | class ApplyDictReturn(object): |
15 | 22 | goal_time = 0.2 |
16 | 23 |
|
@@ -153,6 +160,7 @@ def time_frame_nth_any(self, df): |
153 | 160 | def time_frame_nth(self, df): |
154 | 161 | df.groupby(0).nth(0) |
155 | 162 |
|
| 163 | + |
156 | 164 | def time_series_nth_any(self, df): |
157 | 165 | df[1].groupby(df[0]).nth(0, dropna='any') |
158 | 166 |
|
@@ -369,23 +377,27 @@ class GroupByMethods(object): |
369 | 377 | goal_time = 0.2 |
370 | 378 |
|
371 | 379 | param_names = ['dtype', 'method'] |
372 | | - params = [['int', 'float'], |
| 380 | + params = [['int', 'float', 'object'], |
373 | 381 | ['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin', |
374 | 382 | 'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head', |
375 | 383 | 'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique', |
376 | 384 | 'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew', |
377 | 385 | 'std', 'sum', 'tail', 'unique', 'value_counts', 'var']] |
378 | 386 |
|
379 | 387 | def setup(self, dtype, method): |
| 388 | + if method in method_blacklist.get(dtype, {}): |
| 389 | + raise NotImplementedError # skip benchmark |
380 | 390 | ngroups = 1000 |
381 | 391 | size = ngroups * 2 |
382 | 392 | rng = np.arange(ngroups) |
383 | 393 | values = rng.take(np.random.randint(0, ngroups, size=size)) |
384 | 394 | if dtype == 'int': |
385 | 395 | key = np.random.randint(0, size, size=size) |
386 | | - else: |
| 396 | + elif dtype == 'float': |
387 | 397 | key = np.concatenate([np.random.random(ngroups) * 0.1, |
388 | 398 | np.random.random(ngroups) * 10.0]) |
| 399 | + elif dtype == 'object': |
| 400 | + key = ['foo'] * size |
389 | 401 |
|
390 | 402 | df = DataFrame({'values': values, 'key': key}) |
391 | 403 | self.df_groupby_method = getattr(df.groupby('key')['values'], method) |
|
0 commit comments