-
-
Couldn't load subscription status.
- Fork 33.3k
Closed
Labels
interpreter-core(Objects, Python, Grammar, and Parser dirs)(Objects, Python, Grammar, and Parser dirs)type-featureA feature request or enhancementA feature request or enhancement
Description
Feature or enhancement
Right now PyUnicode_Count from
cpython/Objects/unicodeobject.c
Lines 8968 to 9040 in cbdeda8
| Py_ssize_t | |
| PyUnicode_Count(PyObject *str, | |
| PyObject *substr, | |
| Py_ssize_t start, | |
| Py_ssize_t end) | |
| { | |
| Py_ssize_t result; | |
| int kind1, kind2; | |
| const void *buf1 = NULL, *buf2 = NULL; | |
| Py_ssize_t len1, len2; | |
| if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0) | |
| return -1; | |
| kind1 = PyUnicode_KIND(str); | |
| kind2 = PyUnicode_KIND(substr); | |
| if (kind1 < kind2) | |
| return 0; | |
| len1 = PyUnicode_GET_LENGTH(str); | |
| len2 = PyUnicode_GET_LENGTH(substr); | |
| ADJUST_INDICES(start, end, len1); | |
| if (end - start < len2) | |
| return 0; | |
| buf1 = PyUnicode_DATA(str); | |
| buf2 = PyUnicode_DATA(substr); | |
| if (kind2 != kind1) { | |
| buf2 = unicode_askind(kind2, buf2, len2, kind1); | |
| if (!buf2) | |
| goto onError; | |
| } | |
| switch (kind1) { | |
| case PyUnicode_1BYTE_KIND: | |
| if (PyUnicode_IS_ASCII(str) && PyUnicode_IS_ASCII(substr)) | |
| result = asciilib_count( | |
| ((const Py_UCS1*)buf1) + start, end - start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| else | |
| result = ucs1lib_count( | |
| ((const Py_UCS1*)buf1) + start, end - start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| break; | |
| case PyUnicode_2BYTE_KIND: | |
| result = ucs2lib_count( | |
| ((const Py_UCS2*)buf1) + start, end - start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| break; | |
| case PyUnicode_4BYTE_KIND: | |
| result = ucs4lib_count( | |
| ((const Py_UCS4*)buf1) + start, end - start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| break; | |
| default: | |
| Py_UNREACHABLE(); | |
| } | |
| assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); | |
| if (kind2 != kind1) | |
| PyMem_Free((void *)buf2); | |
| return result; | |
| onError: | |
| assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); | |
| if (kind2 != kind1) | |
| PyMem_Free((void *)buf2); | |
| return -1; | |
| } |
unicode_count from cpython/Objects/unicodeobject.c
Lines 10854 to 10916 in cbdeda8
| static PyObject * | |
| unicode_count(PyObject *self, PyObject *args) | |
| { | |
| PyObject *substring = NULL; /* initialize to fix a compiler warning */ | |
| Py_ssize_t start = 0; | |
| Py_ssize_t end = PY_SSIZE_T_MAX; | |
| PyObject *result; | |
| int kind1, kind2; | |
| const void *buf1, *buf2; | |
| Py_ssize_t len1, len2, iresult; | |
| if (!parse_args_finds_unicode("count", args, &substring, &start, &end)) | |
| return NULL; | |
| kind1 = PyUnicode_KIND(self); | |
| kind2 = PyUnicode_KIND(substring); | |
| if (kind1 < kind2) | |
| return PyLong_FromLong(0); | |
| len1 = PyUnicode_GET_LENGTH(self); | |
| len2 = PyUnicode_GET_LENGTH(substring); | |
| ADJUST_INDICES(start, end, len1); | |
| if (end - start < len2) | |
| return PyLong_FromLong(0); | |
| buf1 = PyUnicode_DATA(self); | |
| buf2 = PyUnicode_DATA(substring); | |
| if (kind2 != kind1) { | |
| buf2 = unicode_askind(kind2, buf2, len2, kind1); | |
| if (!buf2) | |
| return NULL; | |
| } | |
| switch (kind1) { | |
| case PyUnicode_1BYTE_KIND: | |
| iresult = ucs1lib_count( | |
| ((const Py_UCS1*)buf1) + start, end - start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| break; | |
| case PyUnicode_2BYTE_KIND: | |
| iresult = ucs2lib_count( | |
| ((const Py_UCS2*)buf1) + start, end - start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| break; | |
| case PyUnicode_4BYTE_KIND: | |
| iresult = ucs4lib_count( | |
| ((const Py_UCS4*)buf1) + start, end - start, | |
| buf2, len2, PY_SSIZE_T_MAX | |
| ); | |
| break; | |
| default: | |
| Py_UNREACHABLE(); | |
| } | |
| result = PyLong_FromSsize_t(iresult); | |
| assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substring))); | |
| if (kind2 != kind1) | |
| PyMem_Free((void *)buf2); | |
| return result; | |
| } |
They can be unified, because the do the same thing.
Pitch
Apparently unicode_count missed an optimization in 2011, otherwise they're equivalent (except arg parsing & converting the return value). Merging them could add the optimization to unicode_count.
If you want to work on that, note that there's also anylib_count that duplicates the main switch.
Previous discussion
Link: #96929
PR in the works.
Metadata
Metadata
Assignees
Labels
interpreter-core(Objects, Python, Grammar, and Parser dirs)(Objects, Python, Grammar, and Parser dirs)type-featureA feature request or enhancementA feature request or enhancement