llvm · djtodoro · Aug 22, 2025 · Jun 2, 2025 · Aug 4, 2025 · Aug 7, 2025
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -2340,6 +2340,9 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
 
   getAssembler().getEmitter().encodeInstruction(Inst, Code, Fixups, STI);
 
+  // RISC-V instructions are always little-endian, even on BE systems.
+  bool ForceLE = getContext().getTargetTriple().isRISCV();
+
   // If we are showing fixups, create symbolic markers in the encoded
   // representation. We do this by making a per-bit map to the fixup item index,
   // then trying to display it as nicely as possible.
@@ -2394,7 +2397,10 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
         unsigned Bit = (Code[i] >> j) & 1;
 
         unsigned FixupBit;
-        if (MAI->isLittleEndian())
+        // RISC-V instructions are always little-endian.
+        // The FixupMap is indexed by actual bit positions in the LE
+        // instruction.
+        if (MAI->isLittleEndian() || ForceLE)
           FixupBit = i * 8 + j;
         else
           FixupBit = i * 8 + (7-j);

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -4065,4 +4065,6 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
 LLVMInitializeRISCVAsmParser() {
   RegisterMCAsmParser<RISCVAsmParser> X(getTheRISCV32Target());
   RegisterMCAsmParser<RISCVAsmParser> Y(getTheRISCV64Target());
+  RegisterMCAsmParser<RISCVAsmParser> A(getTheRISCV32beTarget());
+  RegisterMCAsmParser<RISCVAsmParser> B(getTheRISCV64beTarget());
 }
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -74,6 +74,10 @@ LLVMInitializeRISCVDisassembler() {
                                          createRISCVDisassembler);
   TargetRegistry::RegisterMCDisassembler(getTheRISCV64Target(),
                                          createRISCVDisassembler);
+  TargetRegistry::RegisterMCDisassembler(getTheRISCV32beTarget(),
+                                         createRISCVDisassembler);
+  TargetRegistry::RegisterMCDisassembler(getTheRISCV64beTarget(),
+                                         createRISCVDisassembler);
 }
 
 static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo,

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -38,9 +38,11 @@ static cl::opt<bool>
                       "bytes of NOPs even in norvc code"));
 
 RISCVAsmBackend::RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI,
-                                 bool Is64Bit, const MCTargetOptions &Options)
-    : MCAsmBackend(llvm::endianness::little), STI(STI), OSABI(OSABI),
-      Is64Bit(Is64Bit), TargetOptions(Options) {
+                                 bool Is64Bit, bool IsLittleEndian,
+                                 const MCTargetOptions &Options)
+    : MCAsmBackend(IsLittleEndian ? llvm::endianness::little
+                                  : llvm::endianness::big),
+      STI(STI), OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {
   RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits());
 }
 
@@ -374,7 +376,7 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCFragment &F,
   } else {
     PCBytes = 2;
     OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc);
-    support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
+    support::endian::write<uint16_t>(OS, 0, Endian);
   }
   auto Offset = OS.tell() - PCBytes;
 
@@ -428,15 +430,15 @@ bool RISCVAsmBackend::relaxDwarfCFA(MCFragment &F, bool &WasRelaxed) const {
     AddFixups(0, {ELF::R_RISCV_SET6, ELF::R_RISCV_SUB6});
   } else if (isUInt<8>(Value)) {
     OS << uint8_t(dwarf::DW_CFA_advance_loc1);
-    support::endian::write<uint8_t>(OS, 0, llvm::endianness::little);
+    support::endian::write<uint8_t>(OS, 0, Endian);
     AddFixups(1, {ELF::R_RISCV_SET8, ELF::R_RISCV_SUB8});
   } else if (isUInt<16>(Value)) {
     OS << uint8_t(dwarf::DW_CFA_advance_loc2);
-    support::endian::write<uint16_t>(OS, 0, llvm::endianness::little);
+    support::endian::write<uint16_t>(OS, 0, Endian);
     AddFixups(1, {ELF::R_RISCV_SET16, ELF::R_RISCV_SUB16});
   } else if (isUInt<32>(Value)) {
     OS << uint8_t(dwarf::DW_CFA_advance_loc4);
-    support::endian::write<uint32_t>(OS, 0, llvm::endianness::little);
+    support::endian::write<uint32_t>(OS, 0, Endian);
     AddFixups(1, {ELF::R_RISCV_SET32, ELF::R_RISCV_SUB32});
   } else {
     llvm_unreachable("unsupported CFA encoding");
@@ -909,6 +911,22 @@ bool RISCVAsmBackend::addReloc(const MCFragment &F, const MCFixup &Fixup,
   return false;
 }
 
+// Data fixups should be swapped for big endian cores.
+// Instruction fixups should not be swapped as RISC-V instructions
+// are always little-endian.
+static bool isDataFixup(unsigned Kind) {
+  switch (Kind) {
+  default:
+    return false;
+
+  case FK_Data_1:
+  case FK_Data_2:
+  case FK_Data_4:
+  case FK_Data_8:
+    return true;
+  }
+}
+
 void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
                                  const MCValue &Target, uint8_t *Data,
                                  uint64_t Value, bool IsResolved) {
@@ -932,8 +950,11 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup,
 
   // For each byte of the fragment that the fixup touches, mask in the
   // bits from the fixup value.
+  // For big endian cores, data fixup should be swapped.
+  bool SwapValue = Endian == llvm::endianness::big && isDataFixup(Kind);
   for (unsigned i = 0; i != NumBytes; ++i) {
-    Data[i] |= uint8_t((Value >> (i * 8)) & 0xff);
+    unsigned Idx = SwapValue ? (NumBytes - 1 - i) : i;
+    Data[Idx] |= uint8_t((Value >> (i * 8)) & 0xff);
   }
 }
 
@@ -948,5 +969,6 @@ MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
                                           const MCTargetOptions &Options) {
   const Triple &TT = STI.getTargetTriple();
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
-  return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit(), Options);
+  return new RISCVAsmBackend(STI, OSABI, TT.isArch64Bit(), TT.isLittleEndian(),
+                             Options);
 }
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -35,7 +35,7 @@ class RISCVAsmBackend : public MCAsmBackend {
 
 public:
   RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
-                  const MCTargetOptions &Options);
+                  bool IsLittleEndian, const MCTargetOptions &Options);
   ~RISCVAsmBackend() override = default;
 
   std::optional<bool> evaluateFixup(const MCFragment &, MCFixup &, MCValue &,

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -21,6 +21,7 @@ using namespace llvm;
 void RISCVMCAsmInfo::anchor() {}
 
 RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
+  IsLittleEndian = TT.isLittleEndian();
   CodePointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4;
   CommentString = "#";
   AlignmentIsInBytes = false;

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -376,7 +376,8 @@ static MCInstrAnalysis *createRISCVInstrAnalysis(const MCInstrInfo *Info) {
 
 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
 LLVMInitializeRISCVTargetMC() {
-  for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target()}) {
+  for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target(),
+                    &getTheRISCV32beTarget(), &getTheRISCV64beTarget()}) {
     TargetRegistry::RegisterMCAsmInfo(*T, createRISCVMCAsmInfo);
     TargetRegistry::RegisterMCObjectFileInfo(*T, createRISCVMCObjectFileInfo);
     TargetRegistry::RegisterMCInstrInfo(*T, createRISCVMCInstrInfo);

diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -611,6 +611,8 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
 LLVMInitializeRISCVAsmPrinter() {
   RegisterAsmPrinter<RISCVAsmPrinter> X(getTheRISCV32Target());
   RegisterAsmPrinter<RISCVAsmPrinter> Y(getTheRISCV64Target());
+  RegisterAsmPrinter<RISCVAsmPrinter> A(getTheRISCV32beTarget());
+  RegisterAsmPrinter<RISCVAsmPrinter> B(getTheRISCV64beTarget());
 }
 
 void RISCVAsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {

diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -106,6 +106,8 @@ static cl::opt<bool>
 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
   RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
+  RegisterTargetMachine<RISCVTargetMachine> A(getTheRISCV32beTarget());
+  RegisterTargetMachine<RISCVTargetMachine> B(getTheRISCV64beTarget());
   auto *PR = PassRegistry::getPassRegistry();
   initializeGlobalISel(*PR);
   initializeRISCVO0PreLegalizerCombinerPass(*PR);
@@ -139,21 +141,37 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   initializeRISCVAsmPrinterPass(*PR);
 }
 
-static StringRef computeDataLayout(const Triple &TT,
-                                   const TargetOptions &Options) {
-  StringRef ABIName = Options.MCOptions.getABIName();
-  if (TT.isArch64Bit()) {
-    if (ABIName == "lp64e")
-      return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S64";
+static std::string computeDataLayout(const Triple &TT,
+                                     const TargetOptions &Opts) {
+  std::string Ret;
+
+  if (TT.isLittleEndian())
+    Ret += "e";
+  else
+    Ret += "E";
+
+  Ret += "-m:e";
 
-    return "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128";
+  // Pointer and integer sizes.
+  if (TT.isArch64Bit()) {
+    Ret += "-p:64:64-i64:64-i128:128";
+    Ret += "-n32:64";
+  } else {
+    assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
+    Ret += "-p:32:32-i64:64";
+    Ret += "-n32";
   }
-  assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
 
-  if (ABIName == "ilp32e")
-    return "e-m:e-p:32:32-i64:64-n32-S32";
+  // Stack alignment based on ABI.
+  StringRef ABI = Opts.MCOptions.getABIName();
+  if (ABI == "ilp32e")
+    Ret += "-S32";
+  else if (ABI == "lp64e")
+    Ret += "-S64";
+  else
+    Ret += "-S128";
 
-  return "e-m:e-p:32:32-i64:64-n32-S128";
+  return Ret;
 }
 
 static Reloc::Model getEffectiveRelocModel(const Triple &TT,

diff --git a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
@@ -21,10 +21,24 @@ Target &llvm::getTheRISCV64Target() {
   return TheRISCV64Target;
 }
 
+Target &llvm::getTheRISCV32beTarget() {
+  static Target TheRISCV32beTarget;
+  return TheRISCV32beTarget;
+}
+
+Target &llvm::getTheRISCV64beTarget() {
+  static Target TheRISCV64beTarget;
+  return TheRISCV64beTarget;
+}
+
 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
 LLVMInitializeRISCVTargetInfo() {
   RegisterTarget<Triple::riscv32, /*HasJIT=*/true> X(
       getTheRISCV32Target(), "riscv32", "32-bit RISC-V", "RISCV");
   RegisterTarget<Triple::riscv64, /*HasJIT=*/true> Y(
       getTheRISCV64Target(), "riscv64", "64-bit RISC-V", "RISCV");
+  RegisterTarget<Triple::riscv32be> A(getTheRISCV32beTarget(), "riscv32be",
+                                      "32-bit big endian RISC-V", "RISCV");
+  RegisterTarget<Triple::riscv64be> B(getTheRISCV64beTarget(), "riscv64be",
+                                      "64-bit big endian RISC-V", "RISCV");
 }
diff --git a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
@@ -15,6 +15,8 @@ class Target;
 
 Target &getTheRISCV32Target();
 Target &getTheRISCV64Target();
+Target &getTheRISCV32beTarget();
+Target &getTheRISCV64beTarget();
 
 } // namespace llvm
 

diff --git a/llvm/test/MC/Disassembler/RISCV/bigendian.txt b/llvm/test/MC/Disassembler/RISCV/bigendian.txt
@@ -0,0 +1,29 @@
+# RUN: llvm-mc --disassemble %s -triple=riscv32be -mattr=+c 2>&1 | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=riscv64be -mattr=+c 2>&1 | FileCheck %s
+
+# Test basic disassembly for big-endian RISC-V
+# Instructions are always little-endian encoded in RISC-V
+
+[0x13,0x05,0x45,0x06]
+# CHECK: addi a0, a0, 100
+
+[0xb7,0x52,0x34,0x12]
+# CHECK: lui t0, 74565
+
+[0x03,0x26,0x05,0x00]
+# CHECK: lw a2, 0(a0)
+
+[0x23,0x22,0xc5,0x00]
+# CHECK: sw a2, 4(a0)
+
+[0xef,0x00,0x00,0x00]
+# CHECK: jal 0
+
+[0x63,0x00,0xb5,0x00]
+# CHECK: beq a0, a1, 0
+
+[0x01,0x00]
+# CHECK: nop
+
+[0x05,0x05]
+# CHECK: addi a0, a0, 1
diff --git a/llvm/test/MC/RISCV/Relocations/bigendian-fixups.s b/llvm/test/MC/RISCV/Relocations/bigendian-fixups.s
@@ -0,0 +1,101 @@
+# RUN: llvm-mc --triple=riscv32be %s --show-encoding \
+# RUN:     | FileCheck --check-prefixes=CHECK-FIXUP,CHECK-ENCODING %s
+# RUN: llvm-mc --filetype=obj --triple=riscv32be %s \
+# RUN:     | llvm-objdump -d - | FileCheck --check-prefix=CHECK-INSTR %s
+# RUN: llvm-mc --filetype=obj --triple=riscv32be %s \
+# RUN:     | llvm-readobj -r - | FileCheck --check-prefix=CHECK-REL %s
+
+# RUN: llvm-mc --triple=riscv64be %s --show-encoding \
+# RUN:     | FileCheck --check-prefixes=CHECK-FIXUP,CHECK-ENCODING %s
+# RUN: llvm-mc --filetype=obj --triple=riscv64be %s \
+# RUN:     | llvm-objdump -d - | FileCheck --check-prefix=CHECK-INSTR %s
+# RUN: llvm-mc --filetype=obj --triple=riscv64be %s \
+# RUN:     | llvm-readobj -r - | FileCheck --check-prefix=CHECK-REL %s
+
+## Checks that fixups that can be resolved within the same object file are
+## applied correctly on big-endian RISC-V targets.
+##
+## This test verifies that RISC-V instructions remain little-endian even on
+## big-endian systems. This is a fundamental property of RISC-V:
+## - Instructions are always little-endian
+## - Data can be big-endian or little-endian depending on the system
+
+.LBB0:
+addi t0, t0, 1
+# CHECK-ENCODING: encoding: [0x93,0x82,0x12,0x00]
+# CHECK-INSTR: addi t0, t0, 0x1
+
+lui t1, %hi(val)
+# CHECK-ENCODING: encoding: [0x37,0bAAAA0011,A,A]
+# CHECK-FIXUP: fixup A - offset: 0, value: %hi(val), kind: fixup_riscv_hi20
+# CHECK-INSTR: lui t1, 0x12345
+
+lw a0, %lo(val)(t1)
+# CHECK-ENCODING: encoding: [0x03,0x25,0bAAAA0011,A]
+# CHECK-FIXUP: fixup A - offset: 0, value: %lo(val), kind: fixup_riscv_lo12_i
+# CHECK-INSTR: lw a0, 0x678(t1)
+
+addi a1, t1, %lo(val)
+# CHECK-ENCODING: encoding: [0x93,0x05,0bAAAA0011,A]
+# CHECK-FIXUP: fixup A - offset: 0, value: %lo(val), kind: fixup_riscv_lo12_i
+# CHECK-INSTR: addi a1, t1, 0x678
+
+sw a0, %lo(val)(t1)
+# CHECK-ENCODING: encoding: [0x23'A',0x20'A',0xa3'A',A]
+# CHECK-FIXUP: fixup A - offset: 0, value: %lo(val), kind: fixup_riscv_lo12_s
+# CHECK-INSTR: sw a0, 0x678(t1)
+
+1:
+auipc t1, %pcrel_hi(.LBB0)
+# CHECK-ENCODING: encoding: [0x17,0bAAAA0011,A,A]
+# CHECK-FIXUP: fixup A - offset: 0, value: %pcrel_hi(.LBB0), kind: fixup_riscv_pcrel_hi20
+# CHECK-INSTR: auipc t1, 0
+
+addi t1, t1, %pcrel_lo(1b)
+# CHECK-ENCODING: encoding: [0x13,0x03,0bAAAA0011,A]
+# CHECK-FIXUP: fixup A - offset: 0, value: %pcrel_lo({{.*}}), kind: fixup_riscv_pcrel_lo12_i
+# CHECK-INSTR: addi t1, t1, -0x14
+
+sw t1, %pcrel_lo(1b)(t1)
+# CHECK-ENCODING: encoding: [0x23'A',0x20'A',0x63'A',A]
+# CHECK-FIXUP: fixup A - offset: 0, value: %pcrel_lo({{.*}}), kind: fixup_riscv_pcrel_lo12_s
+# CHECK-INSTR: sw t1, -0x14(t1)
+
+jal zero, .LBB0
+# CHECK-ENCODING: encoding: [0x6f,0bAAAA0000,A,A]
+# CHECK-FIXUP: fixup A - offset: 0, value: .LBB0, kind: fixup_riscv_jal
+# CHECK-INSTR: j 0x0 <.text>
+
+jal zero, .LBB2
+# CHECK-ENCODING: encoding: [0x6f,0bAAAA0000,A,A]
+# CHECK-FIXUP: fixup A - offset: 0, value: .LBB2, kind: fixup_riscv_jal
+# CHECK-INSTR: j 0x50d18 <.text+0x50d18>
+
+beq a0, a1, .LBB0
+# CHECK-ENCODING: encoding: [0x63'A',A,0xb5'A',A]
+# CHECK-FIXUP: fixup A - offset: 0, value: .LBB0, kind: fixup_riscv_branch
+# CHECK-INSTR: beq a0, a1, 0x0 <.text>
+
+blt a0, a1, .LBB1
+# CHECK-ENCODING: encoding: [0x63'A',0x40'A',0xb5'A',A]
+# CHECK-FIXUP: fixup A - offset: 0, value: .LBB1, kind: fixup_riscv_branch
+# CHECK-INSTR: blt a0, a1, 0x480 <.text+0x480>
+
+.fill 1104
+
+.LBB1:
+
+.fill 329876
+addi zero, zero, 0
+.LBB2:
+
+.set val, 0x12345678
+
+# CHECK-REL-NOT: R_RISCV
+
+.data
+.align 3
+data_label:
+  .word val  # On BE: 0x12345678 stored as [0x12, 0x34, 0x56, 0x78]
+  .long val  # On BE: 0x12345678 stored as [0x12, 0x34, 0x56, 0x78]
+  .quad val  # On BE: 0x0000000012345678 stored as [0x00, 0x00, 0x00, 0x00, 0x12, 0x34, 0x56, 0x78]