From 845c1c97e774f76be079af4ed4462abb1ff14a87 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Mon, 6 Oct 2025 09:37:51 +0100 Subject: [PATCH 1/6] [llvm][Dwarf] Add LanguageDescription API that accounts for version Currently `llvm::dwarf::LanguageDescription` returns a stringified `DW_LNAME`. It would be useful to have an API that returns the language name for a particular `DW_LNAME_`/version pair. LLDB's use case is that it wants to display a human readable description of the language we got from debug-info in diagnostics. We could maintain a side-table in LLDB but though this might generally be useful to live next to the `LanguageDescription` API. --- llvm/include/llvm/BinaryFormat/Dwarf.h | 5 ++ llvm/lib/BinaryFormat/Dwarf.cpp | 111 +++++++++++++++++++++++++ 2 files changed, 116 insertions(+) diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index ba74ab9515a75..dea8b485027fe 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -500,8 +500,13 @@ toDW_LNAME(SourceLanguage language) { return {}; } +/// Returns a version-independent language name. LLVM_ABI llvm::StringRef LanguageDescription(SourceLanguageName name); +/// Returns a language name corresponding to the specified version. +LLVM_ABI llvm::StringRef LanguageDescription(SourceLanguageName Name, + uint32_t Version); + inline bool isCPlusPlus(SourceLanguage S) { bool result = false; // Deliberately enumerate all the language options so we get a warning when diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp index 9690ff9107df8..e48df7035e1e7 100644 --- a/llvm/lib/BinaryFormat/Dwarf.cpp +++ b/llvm/lib/BinaryFormat/Dwarf.cpp @@ -472,6 +472,117 @@ StringRef llvm::dwarf::LanguageDescription(dwarf::SourceLanguageName lname) { return "Unknown"; } +StringRef llvm::dwarf::LanguageDescription(dwarf::SourceLanguageName Name, + uint32_t Version) { + switch (Name) { + // YYYY + case DW_LNAME_Ada: { + if (Version <= 1983) + return "Ada 83"; + if (Version <= 1995) + return "Ada 95"; + if (Version <= 2005) + return "Ada 2005"; + if (Version <= 2012) + return "Ada 2012"; + } break; + + case DW_LNAME_Cobol: { + if (Version <= 1974) + return "COBOL-74"; + if (Version <= 1985) + return "COBOL-85"; + } break; + + case DW_LNAME_Fortran: { + if (Version <= 1977) + return "FORTRAN 77"; + if (Version <= 1990) + return "FORTRAN 90"; + if (Version <= 1995) + return "Fortran 95"; + if (Version <= 2003) + return "Fortran 2003"; + if (Version <= 2008) + return "Fortran 2008"; + if (Version <= 2018) + return "Fortran 2018"; + } break; + + // YYYYMM + case DW_LNAME_C: { + if (Version == 0) + return "K&R C"; + if (Version <= 198912) + return "C89"; + if (Version <= 199901) + return "C99"; + if (Version <= 201112) + return "C11"; + if (Version <= 201710) + return "C17"; + } break; + + case DW_LNAME_C_plus_plus: { + if (Version == 0) + break; + if (Version <= 199711) + return "C++98"; + if (Version <= 200310) + return "C++03"; + if (Version <= 201103) + return "C++11"; + if (Version <= 201402) + return "C++14"; + if (Version <= 201703) + return "C++17"; + if (Version <= 202002) + return "C++20"; + } break; + + case DW_LNAME_ObjC_plus_plus: + case DW_LNAME_ObjC: + case DW_LNAME_Move: + case DW_LNAME_SYCL: + case DW_LNAME_BLISS: + case DW_LNAME_Crystal: + case DW_LNAME_D: + case DW_LNAME_Dylan: + case DW_LNAME_Go: + case DW_LNAME_Haskell: + case DW_LNAME_HLSL: + case DW_LNAME_Java: + case DW_LNAME_Julia: + case DW_LNAME_Kotlin: + case DW_LNAME_Modula2: + case DW_LNAME_Modula3: + case DW_LNAME_OCaml: + case DW_LNAME_OpenCL_C: + case DW_LNAME_Pascal: + case DW_LNAME_PLI: + case DW_LNAME_Python: + case DW_LNAME_RenderScript: + case DW_LNAME_Rust: + case DW_LNAME_Swift: + case DW_LNAME_UPC: + case DW_LNAME_Zig: + case DW_LNAME_Assembly: + case DW_LNAME_C_sharp: + case DW_LNAME_Mojo: + case DW_LNAME_GLSL: + case DW_LNAME_GLSL_ES: + case DW_LNAME_OpenCL_CPP: + case DW_LNAME_CPP_for_OpenCL: + case DW_LNAME_Ruby: + case DW_LNAME_Hylo: + case DW_LNAME_Metal: + break; + } + + // Fallback to un-versioned name. + return LanguageDescription(Name); +} + llvm::StringRef llvm::dwarf::SourceLanguageNameString(SourceLanguageName Lang) { switch (Lang) { #define HANDLE_DW_LNAME(ID, NAME, DESC, LOWER_BOUND) \ From 335d824a418a4918efba7b0e1056a7217a0785d7 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Mon, 6 Oct 2025 09:46:51 +0100 Subject: [PATCH 2/6] fixup! expand docs --- llvm/include/llvm/BinaryFormat/Dwarf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h index dea8b485027fe..815e85ddd7a92 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.h +++ b/llvm/include/llvm/BinaryFormat/Dwarf.h @@ -504,6 +504,8 @@ toDW_LNAME(SourceLanguage language) { LLVM_ABI llvm::StringRef LanguageDescription(SourceLanguageName name); /// Returns a language name corresponding to the specified version. +/// If the version is not recognized for the specified language, returns +/// the version-independent name. LLVM_ABI llvm::StringRef LanguageDescription(SourceLanguageName Name, uint32_t Version); From 427784dde0a1a1f926f6589c292a622a4ffb7c77 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Mon, 6 Oct 2025 09:59:36 +0100 Subject: [PATCH 3/6] fixup! handle C version 0 --- llvm/lib/BinaryFormat/Dwarf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp index e48df7035e1e7..969047a8e5b5c 100644 --- a/llvm/lib/BinaryFormat/Dwarf.cpp +++ b/llvm/lib/BinaryFormat/Dwarf.cpp @@ -512,7 +512,7 @@ StringRef llvm::dwarf::LanguageDescription(dwarf::SourceLanguageName Name, // YYYYMM case DW_LNAME_C: { if (Version == 0) - return "K&R C"; + break; if (Version <= 198912) return "C89"; if (Version <= 199901) From 1c62bcc3eea2f4da3899d622abd06f0d91548cdf Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Mon, 6 Oct 2025 18:15:21 +0100 Subject: [PATCH 4/6] fixup! unittests --- .../DebugInfo/DWARF/DWARFDebugInfoTest.cpp | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp index 373a58d259af5..ae7960e948801 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp @@ -2276,4 +2276,46 @@ TEST(DWARFDebugInfo, TestDWARF64UnitLength) { }); } +TEST(DWARFDebugInfo, TestLanguageDescription_Versioned) { + // Tests for the llvm::dwarf::LanguageDescription API that + // takes a name *and* a version. + + // Unknown language. + EXPECT_EQ( + llvm::dwarf::LanguageDescription(static_cast(0)), + "Unknown"); + + // Test that specifying an invalid version falls back to a valid language name + // regardless. + EXPECT_EQ(llvm::dwarf::LanguageDescription( + static_cast(DW_LNAME_ObjC), 0), + "Objective C"); + EXPECT_EQ(llvm::dwarf::LanguageDescription( + static_cast(DW_LNAME_Julia), 0), + "Julia"); + + // Check some versions. + EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 199711), + "C++98"); + EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201402), + "C++14"); + + // Versions round up. + EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201400), + "C++14"); + + // Version 0 for C and C++ is an unversioned name. + EXPECT_EQ(llvm::dwarf::LanguageDescription( + static_cast(DW_LNAME_C), 0), + "C (K&R and ISO)"); + EXPECT_EQ(llvm::dwarf::LanguageDescription( + static_cast(DW_LNAME_C_plus_plus), 0), + "ISO C++"); + + // Version 0 for other versioned languages may not be the unversioned name. + EXPECT_EQ(llvm::dwarf::LanguageDescription( + static_cast(DW_LNAME_Fortran), 0), + "FORTRAN 77"); +} + } // end anonymous namespace From 924d1118c223706287a3284e74eaf497e8112cce Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Wed, 8 Oct 2025 08:43:01 +0100 Subject: [PATCH 5/6] fixup! move tests, remove redundant casts --- llvm/unittests/BinaryFormat/DwarfTest.cpp | 38 ++++++++++++++++ .../DebugInfo/DWARF/DWARFDebugInfoTest.cpp | 43 ------------------- 2 files changed, 38 insertions(+), 43 deletions(-) diff --git a/llvm/unittests/BinaryFormat/DwarfTest.cpp b/llvm/unittests/BinaryFormat/DwarfTest.cpp index 1162eb709aa83..f4519f61adf85 100644 --- a/llvm/unittests/BinaryFormat/DwarfTest.cpp +++ b/llvm/unittests/BinaryFormat/DwarfTest.cpp @@ -254,4 +254,42 @@ TEST(DwarfTest, lname_SourceLanguageNameString) { EXPECT_EQ(SourceLanguageNameString(DW_LNAME_##NAME), xstr(DW_LNAME_##NAME)); #include "llvm/BinaryFormat/Dwarf.def" } + +TEST(DWARFDebugInfo, TestLanguageDescription_Versioned) { + // Tests for the llvm::dwarf::LanguageDescription API that + // takes a name *and* a version. + + // Unknown language. + EXPECT_EQ( + llvm::dwarf::LanguageDescription(static_cast(0)), + "Unknown"); + + EXPECT_EQ( + llvm::dwarf::LanguageDescription(static_cast(0), 0), + "Unknown"); + + // Test that specifying an invalid version falls back to a valid language name + // regardless. + EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_ObjC, 0), "Objective C"); + EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_Julia, 0), "Julia"); + + // Check some versions. + EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 199711), + "C++98"); + EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201402), + "C++14"); + + // Versions round up. + EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201400), + "C++14"); + + // Version 0 for C and C++ is an unversioned name. + EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C, 0), "C (K&R and ISO)"); + EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 0), + "ISO C++"); + + // Version 0 for other versioned languages may not be the unversioned name. + EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_Fortran, 0), + "FORTRAN 77"); +} } // end namespace diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp index ae7960e948801..49773196129a8 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp @@ -2275,47 +2275,4 @@ TEST(DWARFDebugInfo, TestDWARF64UnitLength) { ASSERT_EQ(0x1122334455667788ULL, CU.getLength()); }); } - -TEST(DWARFDebugInfo, TestLanguageDescription_Versioned) { - // Tests for the llvm::dwarf::LanguageDescription API that - // takes a name *and* a version. - - // Unknown language. - EXPECT_EQ( - llvm::dwarf::LanguageDescription(static_cast(0)), - "Unknown"); - - // Test that specifying an invalid version falls back to a valid language name - // regardless. - EXPECT_EQ(llvm::dwarf::LanguageDescription( - static_cast(DW_LNAME_ObjC), 0), - "Objective C"); - EXPECT_EQ(llvm::dwarf::LanguageDescription( - static_cast(DW_LNAME_Julia), 0), - "Julia"); - - // Check some versions. - EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 199711), - "C++98"); - EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201402), - "C++14"); - - // Versions round up. - EXPECT_EQ(llvm::dwarf::LanguageDescription(DW_LNAME_C_plus_plus, 201400), - "C++14"); - - // Version 0 for C and C++ is an unversioned name. - EXPECT_EQ(llvm::dwarf::LanguageDescription( - static_cast(DW_LNAME_C), 0), - "C (K&R and ISO)"); - EXPECT_EQ(llvm::dwarf::LanguageDescription( - static_cast(DW_LNAME_C_plus_plus), 0), - "ISO C++"); - - // Version 0 for other versioned languages may not be the unversioned name. - EXPECT_EQ(llvm::dwarf::LanguageDescription( - static_cast(DW_LNAME_Fortran), 0), - "FORTRAN 77"); -} - } // end anonymous namespace From 70e3ed1094cb73bb2d2852b4a9e296e5358675db Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Wed, 8 Oct 2025 17:55:49 +0100 Subject: [PATCH 6/6] fixup! add back newline --- llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp index 49773196129a8..373a58d259af5 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp @@ -2275,4 +2275,5 @@ TEST(DWARFDebugInfo, TestDWARF64UnitLength) { ASSERT_EQ(0x1122334455667788ULL, CU.getLength()); }); } + } // end anonymous namespace