Skip to content
Merged
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 76 additions & 34 deletions Objects/longobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ static inline stwodigits
medium_value(PyLongObject *x)
{
assert(IS_MEDIUM_VALUE(x));
return ((stwodigits)Py_SIZE(x)) * x->ob_digit[0];
Py_ssize_t s = Py_SIZE(x);
stwodigits d = x->ob_digit[0];
return s == 1 ? d : s == -1 ? -d : 0;
}

#define IS_SMALL_INT(ival) (-_PY_NSMALLNEGINTS <= (ival) && (ival) < _PY_NSMALLPOSINTS)
Expand All @@ -45,7 +47,7 @@ static inline int is_medium_int(stwodigits x)
return x_plus_mask < ((twodigits)PyLong_MASK) + PyLong_BASE;
}

static PyObject *
static inline PyObject *
get_small_int(sdigit ival)
{
assert(IS_SMALL_INT(ival));
Expand All @@ -54,7 +56,7 @@ get_small_int(sdigit ival)
return v;
}

static PyLongObject *
static inline PyLongObject *
maybe_small_long(PyLongObject *v)
{
if (v && IS_MEDIUM_VALUE(v)) {
Expand Down Expand Up @@ -114,14 +116,15 @@ PyLongObject *
_PyLong_New(Py_ssize_t size)
{
PyLongObject *result;
if (size > (Py_ssize_t)MAX_LONG_DIGITS) {
Py_ssize_t size_abs = Py_ABS(size);
if (size_abs > (Py_ssize_t)MAX_LONG_DIGITS) {
PyErr_SetString(PyExc_OverflowError,
"too many digits in integer");
return NULL;
}
/* Fast operations for single digit integers (including zero)
* assume that there is always at least one digit present. */
Py_ssize_t ndigits = size ? size : 1;
Py_ssize_t ndigits = size_abs ? size_abs : 1;
/* Number of bytes needed is: offsetof(PyLongObject, ob_digit) +
sizeof(digit)*size. Previous incarnations of this code used
sizeof(PyVarObject) instead of the offsetof, but this risks being
Expand Down Expand Up @@ -4491,38 +4494,65 @@ long_rshift1(PyLongObject *a, Py_ssize_t wordshift, digit remshift)
{
PyLongObject *z = NULL;
Py_ssize_t newsize, hishift, i, j;
digit lomask, himask;
digit lomask, himask, omitmark;

if (Py_SIZE(a) < 0) {
/* Right shifting negative numbers is harder */
PyLongObject *a1, *a2;
a1 = (PyLongObject *) long_invert(a);
if (a1 == NULL)
return NULL;
a2 = (PyLongObject *) long_rshift1(a1, wordshift, remshift);
Py_DECREF(a1);
if (a2 == NULL)
return NULL;
z = (PyLongObject *) long_invert(a2);
Py_DECREF(a2);
/* for a positive integrate x
-x >> m = -(x >> m) when the dropped bits are all zeros,
= -(x >> m) -1 otherwise. */

if (IS_MEDIUM_VALUE(a)) {
stwodigits sval;
if (wordshift > 0) {
return get_small_int(-(Py_SIZE(a) < 0));
}
sval = Py_ARITHMETIC_RIGHT_SHIFT(stwodigits, medium_value(a), remshift);
if (IS_SMALL_INT(sval)) {
return get_small_int((sdigit)sval);
}
z = _PyLong_New(Py_SIZE(a));
z->ob_digit[0] = (digit)(a->ob_digit[0] >> remshift);
if (Py_SIZE(a) < 0 && (a->ob_digit[0] & ~(PyLong_MASK << remshift))) {
z->ob_digit[0] += 1;
}
return (PyObject *)z;
}
else {
newsize = Py_SIZE(a) - wordshift;
if (newsize <= 0)
return PyLong_FromLong(0);
hishift = PyLong_SHIFT - remshift;
lomask = ((digit)1 << hishift) - 1;
himask = PyLong_MASK ^ lomask;
z = _PyLong_New(newsize);
if (z == NULL)
return NULL;
for (i = 0, j = wordshift; i < newsize; i++, j++) {
z->ob_digit[i] = (a->ob_digit[j] >> remshift) & lomask;
if (i+1 < newsize)
z->ob_digit[i] |= (a->ob_digit[j+1] << hishift) & himask;

newsize = Py_ABS(Py_SIZE(a)) - wordshift;
if (newsize <= 0) {
return PyLong_FromLong(0);
}
z = _PyLong_New(newsize);
if (z == NULL) {
return NULL;
}

hishift = PyLong_SHIFT - remshift;
lomask = ((digit)1 << hishift) - 1;
himask = PyLong_MASK ^ lomask;
for (i = 0, j = wordshift; i < newsize; i++, j++) {
z->ob_digit[i] = (a->ob_digit[j] >> remshift) & lomask;
if (i + 1 < newsize) {
z->ob_digit[i] |= (a->ob_digit[j + 1] << hishift) & himask;
}
z = maybe_small_long(long_normalize(z));
}

if (Py_SIZE(a) < 0) {
Py_SET_SIZE(z, -newsize);
omitmark = a->ob_digit[wordshift] & (((digit)1 << remshift) - 1);
for (j = 0; omitmark == 0 && j < wordshift; j++) {
omitmark |= a->ob_digit[j];
}
if (omitmark) {
PyLongObject *z0 = z;
z = (PyLongObject *) long_sub(z0, (PyLongObject *)_PyLong_GetOne());
Py_DECREF(z0);
if (z == NULL) {
return NULL;
}
}
}

z = maybe_small_long(long_normalize(z));
return (PyObject *)z;
}

Expand Down Expand Up @@ -4565,11 +4595,23 @@ _PyLong_Rshift(PyObject *a, size_t shiftby)
static PyObject *
long_lshift1(PyLongObject *a, Py_ssize_t wordshift, digit remshift)
{
/* This version due to Tim Peters */
PyLongObject *z = NULL;
Py_ssize_t oldsize, newsize, i, j;
twodigits accum;

if (wordshift == 0 && IS_MEDIUM_VALUE(a) &&
(a->ob_digit[0] & ~(PyLong_MASK >> remshift)) == 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is the (a->ob_digit[0] & ~(PyLong_MASK >> remshift)) == 0 check necessary?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's necessary because (2**20) << 20 won't fit in a single 30/32-bit digit, and this checks for cases like that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I guess it's not necessary if we use _PyLong_FromSTwoDigits

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's necessary to ensure the shifting result fits twodigits or "medium value" before using _PyLong_FromSTwoDigits or something else.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium_value(a) << remshift will always fit in an stwodigits, though. I think it's safe to drop this condition.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, Okay. Thanks.

stwodigits sval = (stwodigits)((twodigits)medium_value(a) << remshift);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest splitting into negative and nonnegative cases. It'll be easier to read that way, and a good compiler can probably figure out how to recombine them if that's more efficient for the processor. (And the code as written isn't strictly portable. There are some crazy corner cases in C's integer handling: the left operand of << here doesn't even necessarily have unsigned type, because if twodigits is smaller than int (which is certainly allowed by the standard) then the left operand gets promoted to an int. We're unlikely to run into this case in practice, but we might as well go for the direct and obviously correct spelling anyway.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The checking a->ob_digit[0] & ~(PyLong_MASK >> remshift) == 0 ensures no bit overflowing or discarding, that is valid bits would stay in the range covered by twodigit.

Besides, I'm sorry but I couldn't catch the idea about "splitting into negative and nonnegative cases" - it's both C's left shifting that applied to "medium value". Could you please explain it further?

Copy link
Member

@mdickinson mdickinson Dec 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the problem is that it takes some mental effort to understand why casting to twodigits, shifting, and then casting back to stwodigits gives the correct result in the negative case. Moreover, there are corner cases (admittedly unlikely to occur in practice, but still theoretically possible) where it's still undefined behaviour, if the integer promotions promote an twodigits to int. See C99 §6.3.1.1, paragraph 2.

Because of those two things, I'd prefer a more direct spelling: if medium_value(a) is nonnegative, do medium_value(a) << remshift (without any casting). If medium_value(a) is negative, do -((-medium_value(a)) << remshift) (again without any casting). So something like stwodigits sval = m < 0 ? -(-m << remshift) : m << remshift, where m is medium_value(a).

// bypass undefined shift operator behavior
if (IS_SMALL_INT(sval)) {
return get_small_int((sdigit)sval);
}
z = _PyLong_New(Py_SIZE(a));
z->ob_digit[0] = (digit)(a->ob_digit[0] << remshift);
return (PyObject *)z;
}

/* This version due to Tim Peters */
oldsize = Py_ABS(Py_SIZE(a));
newsize = oldsize + wordshift;
if (remshift)
Expand Down