@@ -145,6 +145,9 @@ static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
145145 4 , -10 , 4 , 8 , fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
146146static constexpr fltSemantics semFloat8E3M4 = {3 , -2 , 5 , 8 };
147147static constexpr fltSemantics semFloatTF32 = {127 , -126 , 11 , 19 };
148+ static constexpr fltSemantics semFloat8E8M0FN = {
149+ 127 , -127 , 1 , 8 , fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
150+
148151static constexpr fltSemantics semFloat6E3M2FN = {
149152 4 , -2 , 3 , 6 , fltNonfiniteBehavior::FiniteOnly};
150153static constexpr fltSemantics semFloat6E2M3FN = {
@@ -222,6 +225,8 @@ const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
222225 return Float8E3M4 ();
223226 case S_FloatTF32:
224227 return FloatTF32 ();
228+ case S_Float8E8M0FN:
229+ return Float8E8M0FN ();
225230 case S_Float6E3M2FN:
226231 return Float6E3M2FN ();
227232 case S_Float6E2M3FN:
@@ -264,6 +269,8 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
264269 return S_Float8E3M4;
265270 else if (&Sem == &llvm::APFloat::FloatTF32 ())
266271 return S_FloatTF32;
272+ else if (&Sem == &llvm::APFloat::Float8E8M0FN ())
273+ return S_Float8E8M0FN;
267274 else if (&Sem == &llvm::APFloat::Float6E3M2FN ())
268275 return S_Float6E3M2FN;
269276 else if (&Sem == &llvm::APFloat::Float6E2M3FN ())
@@ -294,6 +301,7 @@ const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
294301}
295302const fltSemantics &APFloatBase::Float8E3M4 () { return semFloat8E3M4; }
296303const fltSemantics &APFloatBase::FloatTF32 () { return semFloatTF32; }
304+ const fltSemantics &APFloatBase::Float8E8M0FN () { return semFloat8E8M0FN; }
297305const fltSemantics &APFloatBase::Float6E3M2FN () { return semFloat6E3M2FN; }
298306const fltSemantics &APFloatBase::Float6E2M3FN () { return semFloat6E2M3FN; }
299307const fltSemantics &APFloatBase::Float4E2M1FN () { return semFloat4E2M1FN; }
@@ -396,6 +404,8 @@ static inline Error createError(const Twine &Err) {
396404}
397405
398406static constexpr inline unsigned int partCountForBits (unsigned int bits) {
407+ if (bits == 0 )
408+ return 1 ;
399409 return ((bits) + APFloatBase::integerPartWidth - 1 ) / APFloatBase::integerPartWidth;
400410}
401411
@@ -955,6 +965,12 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
955965 significand[part] = 0 ;
956966 }
957967
968+ // For the E8M0 types, precision is just 1 and the
969+ // the NaNBit handling below is not relevant.
970+ // So, exit early.
971+ if (semantics == &semFloat8E8M0FN)
972+ return ;
973+
958974 unsigned QNaNBit = semantics->precision - 2 ;
959975
960976 if (SNaN) {
@@ -1007,6 +1023,10 @@ IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
10071023}
10081024
10091025bool IEEEFloat::isDenormal () const {
1026+ // No denormals in Float8E8M0FN
1027+ if (semantics == &semFloat8E8M0FN)
1028+ return false ;
1029+
10101030 return isFiniteNonZero () && (exponent == semantics->minExponent ) &&
10111031 (APInt::tcExtractBit (significandParts (),
10121032 semantics->precision - 1 ) == 0 );
@@ -1028,6 +1048,10 @@ bool IEEEFloat::isSmallestNormalized() const {
10281048bool IEEEFloat::isSignificandAllOnes () const {
10291049 // Test if the significand excluding the integral bit is all ones. This allows
10301050 // us to test for binade boundaries.
1051+ // For the E8M0 format, this is always false since there are no
1052+ // actual significand bits.
1053+ if (semantics == &semFloat8E8M0FN)
1054+ return false ;
10311055 const integerPart *Parts = significandParts ();
10321056 const unsigned PartCount = partCountForBits (semantics->precision );
10331057 for (unsigned i = 0 ; i < PartCount - 1 ; i++)
@@ -1075,6 +1099,11 @@ bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
10751099}
10761100
10771101bool IEEEFloat::isSignificandAllZeros () const {
1102+ // For the E8M0 format, this is always true since there are no
1103+ // actual significand bits.
1104+ if (semantics == &semFloat8E8M0FN)
1105+ return true ;
1106+
10781107 // Test if the significand excluding the integral bit is all zeros. This
10791108 // allows us to test for binade boundaries.
10801109 const integerPart *Parts = significandParts ();
@@ -1113,6 +1142,8 @@ bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
11131142}
11141143
11151144bool IEEEFloat::isLargest () const {
1145+ if (semantics == &semFloat8E8M0FN)
1146+ return isFiniteNonZero () && exponent == semantics->maxExponent ;
11161147 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
11171148 semantics->nanEncoding == fltNanEncoding::AllOnes) {
11181149 // The largest number by magnitude in our format will be the floating point
@@ -1165,6 +1196,12 @@ IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
11651196
11661197IEEEFloat::IEEEFloat (const fltSemantics &ourSemantics) {
11671198 initialize (&ourSemantics);
1199+ // The E8M0 type cannot represent the value zero.
1200+ // So, initialize with the closest representation instead.
1201+ if (semantics == &semFloat8E8M0FN) {
1202+ makeSmallestNormalized (false );
1203+ return ;
1204+ }
11681205 makeZero (false );
11691206}
11701207
@@ -1727,6 +1764,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
17271764 /* Canonicalize zeroes. */
17281765 if (omsb == 0 ) {
17291766 category = fcZero;
1767+ // The E8M0 type cannot represent the value zero and
1768+ // thus the category cannot be fcZero. So, get the
1769+ // closest representation to fcZero instead.
1770+ if (semantics == &semFloat8E8M0FN)
1771+ makeSmallestNormalized (false );
17301772 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
17311773 sign = false ;
17321774 }
@@ -2606,6 +2648,11 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
26062648 fs = opOK;
26072649 }
26082650
2651+ // The E8M0 type cannot represent the value zero and
2652+ // thus the category cannot be fcZero. So, get the
2653+ // closest representation to fcZero instead.
2654+ if (category == fcZero && semantics == &semFloat8E8M0FN)
2655+ makeSmallestNormalized (false );
26092656 return fs;
26102657}
26112658
@@ -3070,6 +3117,11 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
30703117 fs = opOK;
30713118 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
30723119 sign = false ;
3120+ // The E8M0 type cannot represent the value zero and
3121+ // thus the category cannot be fcZero. So, get the
3122+ // closest representation to fcZero instead.
3123+ if (semantics == &semFloat8E8M0FN)
3124+ makeSmallestNormalized (false );
30733125
30743126 /* Check whether the normalized exponent is high enough to overflow
30753127 max during the log-rebasing in the max-exponent check below. */
@@ -3533,15 +3585,15 @@ APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
35333585template <const fltSemantics &S>
35343586APInt IEEEFloat::convertIEEEFloatToAPInt () const {
35353587 assert (semantics == &S);
3536-
3537- constexpr int bias = -(S.minExponent - 1 );
3588+ const int bias = (semantics == &semFloat8E8M0FN) ?
3589+ -S. minExponent : -(S.minExponent - 1 );
35383590 constexpr unsigned int trailing_significand_bits = S.precision - 1 ;
35393591 constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
35403592 constexpr integerPart integer_bit =
35413593 integerPart{1 } << (trailing_significand_bits % integerPartWidth);
35423594 constexpr uint64_t significand_mask = integer_bit - 1 ;
3543- constexpr unsigned int exponent_bits =
3544- S.sizeInBits - 1 - trailing_significand_bits;
3595+ constexpr unsigned int exponent_bits = trailing_significand_bits ?
3596+ ( S.sizeInBits - 1 - trailing_significand_bits) : S. sizeInBits ;
35453597 static_assert (exponent_bits < 64 );
35463598 constexpr uint64_t exponent_mask = (uint64_t {1 } << exponent_bits) - 1 ;
35473599
@@ -3557,6 +3609,8 @@ APInt IEEEFloat::convertIEEEFloatToAPInt() const {
35573609 !(significandParts ()[integer_bit_part] & integer_bit))
35583610 myexponent = 0 ; // denormal
35593611 } else if (category == fcZero) {
3612+ if (semantics == &semFloat8E8M0FN)
3613+ llvm_unreachable (" semantics does not support zero!" );
35603614 myexponent = ::exponentZero (S) + bias;
35613615 mysignificand.fill (0 );
35623616 } else if (category == fcInfinity) {
@@ -3659,6 +3713,11 @@ APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
36593713 return convertIEEEFloatToAPInt<semFloatTF32>();
36603714}
36613715
3716+ APInt IEEEFloat::convertFloat8E8M0FNAPFloatToAPInt () const {
3717+ assert (partCount () == 1 );
3718+ return convertIEEEFloatToAPInt<semFloat8E8M0FN>();
3719+ }
3720+
36623721APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt () const {
36633722 assert (partCount () == 1 );
36643723 return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
@@ -3721,6 +3780,9 @@ APInt IEEEFloat::bitcastToAPInt() const {
37213780 if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
37223781 return convertFloatTF32APFloatToAPInt ();
37233782
3783+ if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FN)
3784+ return convertFloat8E8M0FNAPFloatToAPInt ();
3785+
37243786 if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
37253787 return convertFloat6E3M2FNAPFloatToAPInt ();
37263788
@@ -3819,6 +3881,40 @@ void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
38193881 }
38203882}
38213883
3884+ // The E8M0 format has the following characteristics:
3885+ // It is an 8-bit unsigned format with only exponents (no actual significand)
3886+ // No encodings for {zero, infinities or denorms}
3887+ // NaN is represented by all 1's
3888+ // Bias is 127
3889+ void IEEEFloat::initFromFloat8E8M0FNAPInt (const APInt &api) {
3890+ const uint64_t exponent_mask = 0xff ;
3891+ uint64_t val = api.getRawData ()[0 ];
3892+ uint64_t myexponent = (val & exponent_mask);
3893+
3894+ initialize (&semFloat8E8M0FN);
3895+ assert (partCount () == 1 );
3896+
3897+ // This format has unsigned representation only
3898+ sign = 0 ;
3899+
3900+ // Set the significand
3901+ // This format does not have any significand but the 'Pth' precision bit is
3902+ // always set to 1 for consistency in APFloat's internal representation.
3903+ uint64_t mysignificand = 1 ;
3904+ significandParts ()[0 ] = mysignificand;
3905+
3906+ // This format can either have a NaN or fcNormal
3907+ // All 1's i.e. 255 is a NaN
3908+ if (val == exponent_mask) {
3909+ category = fcNaN;
3910+ exponent = exponentNaN ();
3911+ return ;
3912+ }
3913+ // Handle fcNormal...
3914+ category = fcNormal;
3915+ exponent = myexponent - 127 ; // 127 is bias
3916+ return ;
3917+ }
38223918template <const fltSemantics &S>
38233919void IEEEFloat::initFromIEEEAPInt (const APInt &api) {
38243920 assert (api.getBitWidth () == S.sizeInBits );
@@ -3999,6 +4095,8 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
39994095 return initFromFloat8E3M4APInt (api);
40004096 if (Sem == &semFloatTF32)
40014097 return initFromFloatTF32APInt (api);
4098+ if (Sem == &semFloat8E8M0FN)
4099+ return initFromFloat8E8M0FNAPInt (api);
40024100 if (Sem == &semFloat6E3M2FN)
40034101 return initFromFloat6E3M2FNAPInt (api);
40044102 if (Sem == &semFloat6E2M3FN)
@@ -4032,6 +4130,13 @@ void IEEEFloat::makeLargest(bool Negative) {
40324130 significand[PartCount - 1 ] = (NumUnusedHighBits < integerPartWidth)
40334131 ? (~integerPart (0 ) >> NumUnusedHighBits)
40344132 : 0 ;
4133+ // For E8M0 format, we only have the 'internal' precision bit
4134+ // (aka 'P' the precision bit) which is always set to 1.
4135+ // Hence, the below logic of setting the LSB to 0 does not apply.
4136+ // For other cases, the LSB is meant to be any bit other than
4137+ // the Pth precision bit.
4138+ if (semantics == &semFloat8E8M0FN)
4139+ return ;
40354140
40364141 if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
40374142 semantics->nanEncoding == fltNanEncoding::AllOnes)
@@ -4509,6 +4614,11 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
45094614 exponent = 0 ;
45104615 if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
45114616 sign = false ;
4617+ // The E8M0 type cannot represent the value zero and
4618+ // thus the category cannot be fcZero. So, get the
4619+ // closest representation to fcZero instead.
4620+ if (semantics == &semFloat8E8M0FN)
4621+ makeSmallestNormalized (false );
45124622 break ;
45134623 }
45144624
@@ -4575,6 +4685,11 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
45754685 // denormal always increment since moving denormals and the numbers in the
45764686 // smallest normal binade have the same exponent in our representation.
45774687 bool WillCrossBinadeBoundary = !isDenormal () && isSignificandAllOnes ();
4688+ // The E8M0 format does not support Denorms.
4689+ // Since there are only exponents, any increment always crosses the
4690+ // 'BinadeBoundary'. So, make this true always.
4691+ if (semantics == &semFloat8E8M0FN)
4692+ WillCrossBinadeBoundary = true ;
45784693
45794694 if (WillCrossBinadeBoundary) {
45804695 integerPart *Parts = significandParts ();
@@ -4626,6 +4741,11 @@ void IEEEFloat::makeInf(bool Negative) {
46264741}
46274742
46284743void IEEEFloat::makeZero (bool Negative) {
4744+ // The E8M0 type cannot represent the value zero.
4745+ if (semantics == &semFloat8E8M0FN) {
4746+ assert (false && " This floating point format does not support Zero\n " );
4747+ return ;
4748+ }
46294749 category = fcZero;
46304750 sign = Negative;
46314751 if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
0 commit comments