From bb26778e5c66be5ad3923ade9587a33db84681b1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 5 Oct 2025 23:35:20 -0700 Subject: [PATCH] [RISCV][GISel] Force atomic G_LOAD/STORE to the GPR register bank. We don't have FPR isel patterns for G_LOAD/STORE so force to the GPR register bank. --- .../RISCV/GISel/RISCVRegisterBankInfo.cpp | 8 + .../RISCV/GlobalISel/atomic-load-store-fp.ll | 950 ++++++++++++++++++ 2 files changed, 958 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index 597dd1271d394..9f9ae2f5c6dc6 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -324,6 +324,10 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = GPRValueMapping; + // Atomics always use GPR destinations. Don't refine any further. + if (cast(MI).isAtomic()) + break; + // Use FPR64 for s64 loads on rv32. if (GPRSize == 32 && Size.getFixedValue() == 64) { assert(MF.getSubtarget().hasStdExtD()); @@ -358,6 +362,10 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = GPRValueMapping; + // Atomics always use GPR sources. Don't refine any further. + if (cast(MI).isAtomic()) + break; + // Use FPR64 for s64 stores on rv32. if (GPRSize == 32 && Size.getFixedValue() == 64) { assert(MF.getSubtarget().hasStdExtD()); diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll new file mode 100644 index 0000000000000..4ad2d2cc3e845 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll @@ -0,0 +1,950 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32IA,RV32IA-WMO %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+ztso,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV32IA,RV32IA-TSO %s +; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64IA,RV64IA-WMO %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+ztso,+no-trailing-seq-cst-fence \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=RV64IA,RV64IA-TSO %s + + +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO-TRAILING-FENCE %s + +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO-TRAILING-FENCE %s +; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+ztso -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s + + +define float @atomic_load_f32_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f32_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: fmv.w.x fa0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f32_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: fmv.w.x fa0, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f32_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_f32_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: fmv.w.x fa0, a0 +; RV64IA-NEXT: ret + %1 = load atomic float, ptr %a unordered, align 4 + ret float %1 +} + +define float @atomic_load_f32_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f32_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: fmv.w.x fa0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f32_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: fmv.w.x fa0, a0 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f32_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_f32_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: fmv.w.x fa0, a0 +; RV64IA-NEXT: ret + %1 = load atomic float, ptr %a monotonic, align 4 + ret float %1 +} + +define float @atomic_load_f32_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f32_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: fmv.w.x fa0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_f32_acquire: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: lw a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: fmv.w.x fa0, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_f32_acquire: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: fmv.w.x fa0, a0 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_f32_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_f32_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: fmv.w.x fa0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_f32_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: fmv.w.x fa0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic float, ptr %a acquire, align 4 + ret float %1 +} + +define float @atomic_load_f32_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f32_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_4 +; RV32I-NEXT: fmv.w.x fa0, a0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_load_f32_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, rw +; RV32IA-WMO-NEXT: lw a0, 0(a0) +; RV32IA-WMO-NEXT: fence r, rw +; RV32IA-WMO-NEXT: fmv.w.x fa0, a0 +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_load_f32_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: lw a0, 0(a0) +; RV32IA-TSO-NEXT: fmv.w.x fa0, a0 +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_load_f32_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_f32_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: lw a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: fmv.w.x fa0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_f32_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: lw a0, 0(a0) +; RV64IA-TSO-NEXT: fmv.w.x fa0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f32_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic float, ptr %a seq_cst, align 4 + ret float %1 +} + +define double @atomic_load_f64_unordered(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f64_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: fld fa0, 0(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f64_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 0 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: sw a0, 0(sp) +; RV32IA-NEXT: sw a1, 4(sp) +; RV32IA-NEXT: fld fa0, 0(sp) +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f64_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_f64_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a0, 0(a0) +; RV64IA-NEXT: fmv.d.x fa0, a0 +; RV64IA-NEXT: ret + %1 = load atomic double, ptr %a unordered, align 8 + ret double %1 +} + +define double @atomic_load_f64_monotonic(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f64_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: fld fa0, 0(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f64_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 0 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: sw a0, 0(sp) +; RV32IA-NEXT: sw a1, 4(sp) +; RV32IA-NEXT: fld fa0, 0(sp) +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f64_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_load_f64_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: ld a0, 0(a0) +; RV64IA-NEXT: fmv.d.x fa0, a0 +; RV64IA-NEXT: ret + %1 = load atomic double, ptr %a monotonic, align 8 + ret double %1 +} + +define double @atomic_load_f64_acquire(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f64_acquire: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 2 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: fld fa0, 0(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f64_acquire: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 2 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: sw a0, 0(sp) +; RV32IA-NEXT: sw a1, 4(sp) +; RV32IA-NEXT: fld fa0, 0(sp) +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f64_acquire: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_f64_acquire: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: ld a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: fmv.d.x fa0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_f64_acquire: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: fmv.d.x fa0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f64_acquire: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f64_acquire: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic double, ptr %a acquire, align 8 + ret double %1 +} + +define double @atomic_load_f64_seq_cst(ptr %a) nounwind { +; RV32I-LABEL: atomic_load_f64_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a1, 5 +; RV32I-NEXT: call __atomic_load_8 +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: fld fa0, 0(sp) +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_load_f64_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: li a1, 5 +; RV32IA-NEXT: call __atomic_load_8 +; RV32IA-NEXT: sw a0, 0(sp) +; RV32IA-NEXT: sw a1, 4(sp) +; RV32IA-NEXT: fld fa0, 0(sp) +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_load_f64_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 5 +; RV64I-NEXT: call __atomic_load_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_load_f64_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, rw +; RV64IA-WMO-NEXT: ld a0, 0(a0) +; RV64IA-WMO-NEXT: fence r, rw +; RV64IA-WMO-NEXT: fmv.d.x fa0, a0 +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_load_f64_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ld a0, 0(a0) +; RV64IA-TSO-NEXT: fmv.d.x fa0, a0 +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_load_f64_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence r, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_load_f64_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + %1 = load atomic double, ptr %a seq_cst, align 8 + ret double %1 +} + +define void @atomic_store_f32_unordered(ptr %a, float %b) nounwind { +; RV32I-LABEL: atomic_store_f32_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fmv.x.w a1, fa0 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f32_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: fmv.x.w a1, fa0 +; RV32IA-NEXT: sw a1, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f32_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_f32_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: fmv.x.w a1, fa0 +; RV64IA-NEXT: sw a1, 0(a0) +; RV64IA-NEXT: ret + store atomic float %b, ptr %a unordered, align 4 + ret void +} + +define void @atomic_store_f32_monotonic(ptr %a, float %b) nounwind { +; RV32I-LABEL: atomic_store_f32_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fmv.x.w a1, fa0 +; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f32_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: fmv.x.w a1, fa0 +; RV32IA-NEXT: sw a1, 0(a0) +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f32_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_f32_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: fmv.x.w a1, fa0 +; RV64IA-NEXT: sw a1, 0(a0) +; RV64IA-NEXT: ret + store atomic float %b, ptr %a monotonic, align 4 + ret void +} + +define void @atomic_store_f32_release(ptr %a, float %b) nounwind { +; RV32I-LABEL: atomic_store_f32_release: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 3 +; RV32I-NEXT: fmv.x.w a1, fa0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_store_f32_release: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, w +; RV32IA-WMO-NEXT: fmv.x.w a1, fa0 +; RV32IA-WMO-NEXT: sw a1, 0(a0) +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_store_f32_release: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fmv.x.w a1, fa0 +; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_store_f32_release: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 3 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_f32_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: fmv.x.w a1, fa0 +; RV64IA-WMO-NEXT: sw a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_f32_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fmv.x.w a1, fa0 +; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_release: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_release: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_release: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_release: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic float %b, ptr %a release, align 4 + ret void +} + +define void @atomic_store_f32_seq_cst(ptr %a, float %b) nounwind { +; RV32I-LABEL: atomic_store_f32_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: li a2, 5 +; RV32I-NEXT: fmv.x.w a1, fa0 +; RV32I-NEXT: call __atomic_store_4 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-WMO-LABEL: atomic_store_f32_seq_cst: +; RV32IA-WMO: # %bb.0: +; RV32IA-WMO-NEXT: fence rw, w +; RV32IA-WMO-NEXT: fmv.x.w a1, fa0 +; RV32IA-WMO-NEXT: sw a1, 0(a0) +; RV32IA-WMO-NEXT: ret +; +; RV32IA-TSO-LABEL: atomic_store_f32_seq_cst: +; RV32IA-TSO: # %bb.0: +; RV32IA-TSO-NEXT: fmv.x.w a1, fa0 +; RV32IA-TSO-NEXT: sw a1, 0(a0) +; RV32IA-TSO-NEXT: fence rw, rw +; RV32IA-TSO-NEXT: ret +; +; RV64I-LABEL: atomic_store_f32_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_store_4 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_f32_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: fmv.x.w a1, fa0 +; RV64IA-WMO-NEXT: sw a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_f32_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fmv.x.w a1, fa0 +; RV64IA-TSO-NEXT: sw a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ret +; +; RV32IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst: +; RV32IA-WMO-TRAILING-FENCE: # %bb.0: +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV32IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV32IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV32IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst: +; RV32IA-TSO-TRAILING-FENCE: # %bb.0: +; RV32IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV32IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV32IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV32IA-TSO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f32_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic float %b, ptr %a seq_cst, align 4 + ret void +} + +define void @atomic_store_f64_unordered(ptr %a, double %b) nounwind { +; RV32I-LABEL: atomic_store_f64_unordered: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fsd fa0, 0(sp) +; RV32I-NEXT: lw a1, 0(sp) +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f64_unordered: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: fsd fa0, 0(sp) +; RV32IA-NEXT: lw a1, 0(sp) +; RV32IA-NEXT: lw a2, 4(sp) +; RV32IA-NEXT: li a3, 0 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f64_unordered: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_f64_unordered: +; RV64IA: # %bb.0: +; RV64IA-NEXT: fmv.x.d a1, fa0 +; RV64IA-NEXT: sd a1, 0(a0) +; RV64IA-NEXT: ret + store atomic double %b, ptr %a unordered, align 8 + ret void +} + +define void @atomic_store_f64_monotonic(ptr %a, double %b) nounwind { +; RV32I-LABEL: atomic_store_f64_monotonic: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fsd fa0, 0(sp) +; RV32I-NEXT: lw a1, 0(sp) +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: li a3, 0 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f64_monotonic: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: fsd fa0, 0(sp) +; RV32IA-NEXT: lw a1, 0(sp) +; RV32IA-NEXT: lw a2, 4(sp) +; RV32IA-NEXT: li a3, 0 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f64_monotonic: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-LABEL: atomic_store_f64_monotonic: +; RV64IA: # %bb.0: +; RV64IA-NEXT: fmv.x.d a1, fa0 +; RV64IA-NEXT: sd a1, 0(a0) +; RV64IA-NEXT: ret + store atomic double %b, ptr %a monotonic, align 8 + ret void +} + +define void @atomic_store_f64_release(ptr %a, double %b) nounwind { +; RV32I-LABEL: atomic_store_f64_release: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fsd fa0, 0(sp) +; RV32I-NEXT: lw a1, 0(sp) +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: li a3, 3 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f64_release: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: fsd fa0, 0(sp) +; RV32IA-NEXT: lw a1, 0(sp) +; RV32IA-NEXT: lw a2, 4(sp) +; RV32IA-NEXT: li a3, 3 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f64_release: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 3 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_f64_release: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: fmv.x.d a1, fa0 +; RV64IA-WMO-NEXT: sd a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_f64_release: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fmv.x.d a1, fa0 +; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f64_release: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f64_release: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic double %b, ptr %a release, align 8 + ret void +} + +define void @atomic_store_f64_seq_cst(ptr %a, double %b) nounwind { +; RV32I-LABEL: atomic_store_f64_seq_cst: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: fsd fa0, 0(sp) +; RV32I-NEXT: lw a1, 0(sp) +; RV32I-NEXT: lw a2, 4(sp) +; RV32I-NEXT: li a3, 5 +; RV32I-NEXT: call __atomic_store_8 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IA-LABEL: atomic_store_f64_seq_cst: +; RV32IA: # %bb.0: +; RV32IA-NEXT: addi sp, sp, -16 +; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IA-NEXT: fsd fa0, 0(sp) +; RV32IA-NEXT: lw a1, 0(sp) +; RV32IA-NEXT: lw a2, 4(sp) +; RV32IA-NEXT: li a3, 5 +; RV32IA-NEXT: call __atomic_store_8 +; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IA-NEXT: addi sp, sp, 16 +; RV32IA-NEXT: ret +; +; RV64I-LABEL: atomic_store_f64_seq_cst: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a2, 5 +; RV64I-NEXT: call __atomic_store_8 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IA-WMO-LABEL: atomic_store_f64_seq_cst: +; RV64IA-WMO: # %bb.0: +; RV64IA-WMO-NEXT: fence rw, w +; RV64IA-WMO-NEXT: fmv.x.d a1, fa0 +; RV64IA-WMO-NEXT: sd a1, 0(a0) +; RV64IA-WMO-NEXT: ret +; +; RV64IA-TSO-LABEL: atomic_store_f64_seq_cst: +; RV64IA-TSO: # %bb.0: +; RV64IA-TSO-NEXT: fmv.x.d a1, fa0 +; RV64IA-TSO-NEXT: sd a1, 0(a0) +; RV64IA-TSO-NEXT: fence rw, rw +; RV64IA-TSO-NEXT: ret +; +; RV64IA-WMO-TRAILING-FENCE-LABEL: atomic_store_f64_seq_cst: +; RV64IA-WMO-TRAILING-FENCE: # %bb.0: +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, w +; RV64IA-WMO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 +; RV64IA-WMO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-WMO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-WMO-TRAILING-FENCE-NEXT: ret +; +; RV64IA-TSO-TRAILING-FENCE-LABEL: atomic_store_f64_seq_cst: +; RV64IA-TSO-TRAILING-FENCE: # %bb.0: +; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0 +; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0) +; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw +; RV64IA-TSO-TRAILING-FENCE-NEXT: ret + store atomic double %b, ptr %a seq_cst, align 8 + ret void +}