Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 0 additions & 16 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,22 +47,6 @@ class CodeGen final : public CodeGenInterface

private:
#if defined(TARGET_XARCH)
// Bit masks used in negating a float or double number.
// This is to avoid creating more than one data constant for these bitmasks when a
// method has more than one GT_NEG operation on floating point values.
CORINFO_FIELD_HANDLE negBitmaskFlt;
CORINFO_FIELD_HANDLE negBitmaskDbl;

// Bit masks used in computing Math.Abs() of a float or double number.
CORINFO_FIELD_HANDLE absBitmaskFlt;
CORINFO_FIELD_HANDLE absBitmaskDbl;

// Bit mask used in zeroing the 3rd element of a SIMD12
CORINFO_FIELD_HANDLE zroSimd12Elm3;

// Bit mask used in U8 -> double conversion to adjust the result.
CORINFO_FIELD_HANDLE u8ToDblBitmask;

// Generates SSE2 code for the given tree as "Operand BitWiseOp BitMask"
void genSSE2BitwiseOp(GenTree* treeNode);

Expand Down
9 changes: 0 additions & 9 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,15 +196,6 @@ void CodeGenInterface::CopyRegisterInfo()
CodeGen::CodeGen(Compiler* theCompiler)
: CodeGenInterface(theCompiler)
{
#if defined(TARGET_XARCH)
negBitmaskFlt = nullptr;
negBitmaskDbl = nullptr;
absBitmaskFlt = nullptr;
absBitmaskDbl = nullptr;
zroSimd12Elm3 = nullptr;
u8ToDblBitmask = nullptr;
#endif // defined(TARGET_XARCH)

#if defined(FEATURE_PUT_STRUCT_ARG_STK) && !defined(TARGET_X86)
m_stkArgVarNum = BAD_VAR_NUM;
#endif
Expand Down
96 changes: 47 additions & 49 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7532,47 +7532,53 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
// integral to floating-point conversions all have RMW semantics if VEX support
// is not available

bool isRMW = !compiler->canUseVexEncoding();
inst_RV_RV_TT(ins, emitTypeSize(srcType), targetReg, targetReg, op1, isRMW, INS_OPTS_NONE);
const bool isRMW = !compiler->canUseVexEncoding();

// Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction
// will interpret ULONG value as LONG. Hence we need to adjust the
// result if sign-bit of srcType is set.
if (srcType == TYP_ULONG)
{
// The instruction sequence below is less accurate than what clang
// and gcc generate. However, we keep the current sequence for backward compatibility.
// If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
// should be also updated for consistent conversion result.
assert(dstType == TYP_DOUBLE);
assert(op1->isUsedFromReg());

// Set the flags without modifying op1.
// test op1Reg, op1Reg
inst_RV_RV(INS_test, op1->GetRegNum(), op1->GetRegNum(), srcType);
// The following LONG->DOUBLE cast machinery is based on clang's implementation
// with "-ffp-model=strict" flag:
//
// mov tmp1, arg (8 byte)
// shr tmp1
// mov tmp2, arg (4 byte)
// and tmp2, 1
// or tmp2, tmp1
// test arg, arg
// cmovns tmp2, arg
// cvtsi2sd xmm0, tmp2
// jns .LABEL
// addsd xmm0, xmm0
//.LABEL
//
regNumber argReg = treeNode->gtGetOp1()->GetRegNum();
regNumber tmpReg1 = internalRegisters.Extract(treeNode);
regNumber tmpReg2 = internalRegisters.Extract(treeNode);

inst_Mov(TYP_LONG, tmpReg1, argReg, /* canSkip */ false, EA_8BYTE);
inst_RV_SH(INS_shr, EA_8BYTE, tmpReg1, 1);
inst_Mov(TYP_INT, tmpReg2, argReg, /* canSkip */ false, EA_4BYTE);
GetEmitter()->emitIns_R_I(INS_and, EA_4BYTE, tmpReg2, 1);
GetEmitter()->emitIns_R_R(INS_or, EA_8BYTE, tmpReg2, tmpReg1);
GetEmitter()->emitIns_R_R(INS_test, EA_8BYTE, argReg, argReg);
GetEmitter()->emitIns_R_R(INS_cmovns, EA_8BYTE, tmpReg2, argReg);
GetEmitter()->emitIns_R_R(ins, EA_8BYTE, targetReg, tmpReg2);

// No need to adjust result if op1 >= 0 i.e. positive
// Jge label
BasicBlock* label = genCreateTempLabel();
inst_JMP(EJ_jge, label);

// Adjust the result
// result = result + 0x43f00000 00000000
// addsd resultReg, 0x43f00000 00000000
CORINFO_FIELD_HANDLE* cns = &u8ToDblBitmask;
if (*cns == nullptr)
{
double d;
static_assert_no_msg(sizeof(double) == sizeof(__int64));
*((__int64*)&d) = 0x43f0000000000000LL;

*cns = GetEmitter()->emitFltOrDblConst(d, EA_8BYTE);
}
GetEmitter()->emitIns_SIMD_R_R_C(INS_addsd, EA_8BYTE, targetReg, targetReg, *cns, 0, INS_OPTS_NONE);

inst_JMP(EJ_jns, label);
GetEmitter()->emitIns_R_R(INS_addsd, EA_8BYTE, targetReg, targetReg);
genDefineTempLabel(label);
}

else
{
inst_RV_RV_TT(ins, emitTypeSize(srcType), targetReg, targetReg, op1, isRMW, INS_OPTS_NONE);
}
genProduceReg(treeNode);
}

Expand Down Expand Up @@ -7899,54 +7905,46 @@ void CodeGen::genSSE2BitwiseOp(GenTree* treeNode)
{
regNumber targetReg = treeNode->GetRegNum();
regNumber operandReg = genConsumeReg(treeNode->gtGetOp1());
emitAttr size = emitTypeSize(treeNode);

assert(varTypeIsFloating(treeNode->TypeGet()));
assert(treeNode->gtGetOp1()->isUsedFromReg());

CORINFO_FIELD_HANDLE* maskFld = nullptr;
UINT64 mask = 0;
instruction ins = INS_invalid;
CORINFO_FIELD_HANDLE maskFld = NO_FIELD_HANDLE;
UINT64 mask = 0;
instruction ins = INS_invalid;

if (treeNode->OperIs(GT_NEG))
{
// Neg(x) = flip the sign bit.
// Neg(f) = f ^ 0x80000000 x4 (packed)
// Neg(d) = d ^ 0x8000000000000000 x2 (packed)
ins = INS_xorps;
mask = treeNode->TypeIs(TYP_FLOAT) ? 0x8000000080000000UL : 0x8000000000000000UL;
maskFld = treeNode->TypeIs(TYP_FLOAT) ? &negBitmaskFlt : &negBitmaskDbl;
ins = INS_xorps;
mask = treeNode->TypeIs(TYP_FLOAT) ? 0x8000000080000000UL : 0x8000000000000000UL;
}
else if (treeNode->OperIs(GT_INTRINSIC))
{
assert(treeNode->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs);
// Abs(x) = set sign-bit to zero
// Abs(f) = f & 0x7fffffff x4 (packed)
// Abs(d) = d & 0x7fffffffffffffff x2 (packed)
ins = INS_andps;
mask = treeNode->TypeIs(TYP_FLOAT) ? 0x7FFFFFFF7FFFFFFFUL : 0x7FFFFFFFFFFFFFFFUL;
maskFld = treeNode->TypeIs(TYP_FLOAT) ? &absBitmaskFlt : &absBitmaskDbl;
ins = INS_andps;
mask = treeNode->TypeIs(TYP_FLOAT) ? 0x7FFFFFFF7FFFFFFFUL : 0x7FFFFFFFFFFFFFFFUL;
}
else
{
assert(!"genSSE2BitwiseOp: unsupported oper");
}

if (*maskFld == nullptr)
{
simd16_t constValue;

constValue.u64[0] = mask;
constValue.u64[1] = mask;

simd16_t constValue;
constValue.u64[0] = mask;
constValue.u64[1] = mask;
#if defined(FEATURE_SIMD)
*maskFld = GetEmitter()->emitSimd16Const(constValue);
maskFld = GetEmitter()->emitSimd16Const(constValue);
#else
*maskFld = GetEmitter()->emitBlkConst(&constValue, 16, 16, treeNode->TypeGet());
maskFld = GetEmitter()->emitBlkConst(&constValue, 16, 16, treeNode->TypeGet());
#endif
}

GetEmitter()->emitIns_SIMD_R_R_C(ins, EA_16BYTE, targetReg, operandReg, *maskFld, 0, INS_OPTS_NONE);
GetEmitter()->emitIns_SIMD_R_R_C(ins, EA_16BYTE, targetReg, operandReg, maskFld, 0, INS_OPTS_NONE);
}

//-----------------------------------------------------------------------------------------
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2827,6 +2827,15 @@ int LinearScan::BuildCast(GenTreeCast* cast)
const var_types srcType = genActualType(src->TypeGet());
const var_types castType = cast->gtCastType;

if ((srcType == TYP_LONG) && (castType == TYP_DOUBLE) &&
!compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
{
// We need two extra temp regs for LONG->DOUBLE cast
// if we don't have AVX512F available.
buildInternalIntRegisterDefForNode(cast);
buildInternalIntRegisterDefForNode(cast);
}

regMaskTP candidates = RBM_NONE;
#ifdef TARGET_X86
if (varTypeIsByte(castType))
Expand Down
19 changes: 6 additions & 13 deletions src/coreclr/jit/simdcodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,19 +536,12 @@ void CodeGen::genSimd12UpperClear(regNumber tgtReg)
else
{
// Preserve element 0, 1, and 2; Zero element 3

if (zroSimd12Elm3 == NO_FIELD_HANDLE)
{
simd16_t constValue;

constValue.u32[0] = 0xFFFFFFFF;
constValue.u32[1] = 0xFFFFFFFF;
constValue.u32[2] = 0xFFFFFFFF;
constValue.u32[3] = 0x00000000;

zroSimd12Elm3 = GetEmitter()->emitSimd16Const(constValue);
}

simd16_t constValue;
constValue.u32[0] = 0xFFFFFFFF;
constValue.u32[1] = 0xFFFFFFFF;
constValue.u32[2] = 0xFFFFFFFF;
constValue.u32[3] = 0x00000000;
CORINFO_FIELD_HANDLE zroSimd12Elm3 = GetEmitter()->emitSimd16Const(constValue);
GetEmitter()->emitIns_SIMD_R_R_C(INS_andps, EA_16BYTE, tgtReg, tgtReg, zroSimd12Elm3, 0, INS_OPTS_NONE);
}
}
Expand Down
30 changes: 2 additions & 28 deletions src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Licensed to the .NET Foundation under one or more agreements.
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

// ===================================================================================================
Expand Down Expand Up @@ -2230,33 +2230,7 @@ unsigned CountDigits(double num, unsigned base /* = 10 */)

double FloatingPointUtils::convertUInt64ToDouble(unsigned __int64 uIntVal)
{
__int64 s64 = uIntVal;
double d;
if (s64 < 0)
{
#if defined(TARGET_XARCH)
// RyuJIT codegen and clang (or gcc) may produce different results for casting uint64 to
// double, and the clang result is more accurate. For example,
// 1) (double)0x84595161401484A0UL --> 43e08b2a2c280290 (RyuJIT codegen or VC++)
// 2) (double)0x84595161401484A0UL --> 43e08b2a2c280291 (clang or gcc)
// If the folding optimization below is implemented by simple casting of (double)uint64_val
// and it is compiled by clang, casting result can be inconsistent, depending on whether
// the folding optimization is triggered or the codegen generates instructions for casting. //
// The current solution is to force the same math as the codegen does, so that casting
// result is always consistent.

// d = (double)(int64_t)uint64 + 0x1p64
uint64_t adjHex = 0x43F0000000000000UL;
d = (double)s64 + *(double*)&adjHex;
#else
d = (double)uIntVal;
#endif
}
else
{
d = (double)uIntVal;
}
return d;
return (double)uIntVal;
}

float FloatingPointUtils::convertUInt64ToFloat(unsigned __int64 u64)
Expand Down
34 changes: 34 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_102149/Runtime_102149.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
using System.Runtime.CompilerServices;
using Xunit;

public static class Runtime_102149
{
[Fact]
public static void Test()
{
if (BitConverter.DoubleToUInt64Bits(U8_to_R8(Const(9303915604039947368UL))) != 4890948523238023291UL)
throw new InvalidOperationException("Case 1 failed");

if (BitConverter.DoubleToUInt64Bits((double)Const(9303915604039947368UL)) != 4890948523238023291UL)
throw new InvalidOperationException("Case 2 failed");

if (NonConst(10648738977740919977d) != NonConst(10648738977740919977UL))
throw new InvalidOperationException("Case 3 failed");

if (Const(10648738977740919977d) != Const(10648738977740919977UL))
throw new InvalidOperationException("Case 4 failed");
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static T Const<T>(T val) => val;

[MethodImpl(MethodImplOptions.NoInlining)]
private static T NonConst<T>(T val) => val;

[MethodImpl(MethodImplOptions.NoInlining)]
private static double U8_to_R8(ulong x) => x;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>