diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.cpp b/llvm/lib/ObjCopy/COFF/COFFObject.cpp index 5fa13391c908f..91cf7e32a7396 100644 --- a/llvm/lib/ObjCopy/COFF/COFFObject.cpp +++ b/llvm/lib/ObjCopy/COFF/COFFObject.cpp @@ -18,6 +18,8 @@ using namespace object; void Object::addSymbols(ArrayRef NewSymbols) { for (Symbol S : NewSymbols) { S.UniqueId = NextSymbolUniqueId++; + S.OriginalRawIndex = NextSymbolOriginalIndex; + NextSymbolOriginalIndex += 1 + S.Sym.NumberOfAuxSymbols; Symbols.emplace_back(S); } updateSymbols(); diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.h b/llvm/lib/ObjCopy/COFF/COFFObject.h index cdd1f17fc6055..6b70add1bb1b7 100644 --- a/llvm/lib/ObjCopy/COFF/COFFObject.h +++ b/llvm/lib/ObjCopy/COFF/COFFObject.h @@ -89,6 +89,7 @@ struct Symbol { std::optional WeakTargetSymbolId; size_t UniqueId; size_t RawIndex; + size_t OriginalRawIndex; bool Referenced; }; @@ -140,6 +141,7 @@ struct Object { DenseMap SymbolMap; size_t NextSymbolUniqueId = 0; + size_t NextSymbolOriginalIndex = 0; std::vector
Sections; DenseMap SectionMap; diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp index 350c4aec572c9..fed67d67f13a7 100644 --- a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp +++ b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp @@ -12,6 +12,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/COFF.h" +#include "llvm/Support/CRC.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" #include "llvm/Support/ErrorHandling.h" #include @@ -92,6 +94,77 @@ Error COFFWriter::finalizeSymbolContents() { return Error::success(); } +Error COFFWriter::finalizeSymIdxContents() { + // CFGuards shouldn't be present in PE. + if (Obj.IsPE) + return Error::success(); + + // Currently handle only sections consisting only of .symidx. + // TODO: other sections such as .impcall and .hybmp$x require more complex + // handling as they have more complex layout. + auto IsSymIdxSection = [](StringRef Name) { + return Name == ".gljmp$y" || Name == ".giats$y" || Name == ".gfids$y" || + Name == ".gehcont$y"; + }; + + DenseMap SymIdMap; + SmallDenseMap SecIdMap; + for (Symbol &Sym : Obj.getMutableSymbols()) { + SymIdMap[Sym.OriginalRawIndex] = Sym.RawIndex; + + // We collect only definition symbols of the sections to update the + // checksums. + if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC && + Sym.Sym.NumberOfAuxSymbols == 1 && Sym.Sym.Value == 0 && + IsSymIdxSection(Sym.Name)) + SecIdMap[Sym.TargetSectionId] = + reinterpret_cast( + Sym.AuxData[0].Opaque); + } + + for (Section &Sec : Obj.getMutableSections()) { + if (!IsSymIdxSection(Sec.Name)) + continue; + + ArrayRef RawIds = Sec.getContents(); + // Nothing to do and also the checksum will be -1 instead of 0 if we + // recalculate it on empty input. + if (RawIds.size() == 0) + continue; + + auto SecDefIt = SecIdMap.find(Sec.UniqueId); + if (SecDefIt == SecIdMap.end()) + return createStringError(object_error::invalid_symbol_index, + "section '%s' does not have the corresponding " + "symbol or the symbol has unexpected format", + Sec.Name.str().c_str()); + + // Create updated content. + ArrayRef Ids( + reinterpret_cast(RawIds.data()), + RawIds.size() / 4); + std::vector NewIds; + for (support::ulittle32_t Id : Ids) { + auto SymIdIt = SymIdMap.find(Id); + if (SymIdIt == SymIdMap.end()) + return createStringError(object_error::invalid_symbol_index, + "section '%s' contains a .symidx (%d) that is " + "incorrect or was stripped", + Sec.Name.str().c_str(), Id.value()); + NewIds.push_back(support::ulittle32_t(SymIdIt->getSecond())); + } + ArrayRef NewRawIds(reinterpret_cast(NewIds.data()), + RawIds.size()); + // Update the checksum. + JamCRC JC(/*Init=*/0); + JC.update(NewRawIds); + SecDefIt->getSecond()->CheckSum = JC.getCRC(); + // Set new content. + Sec.setOwnedContents(NewRawIds.vec()); + } + return Error::success(); +} + void COFFWriter::layoutSections() { for (auto &S : Obj.getMutableSections()) { if (S.Header.SizeOfRawData > 0) @@ -183,6 +256,8 @@ Error COFFWriter::finalize(bool IsBigObj) { return E; if (Error E = finalizeSymbolContents()) return E; + if (Error E = finalizeSymIdxContents()) + return E; size_t SizeOfHeaders = 0; FileAlignment = 1; diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.h b/llvm/lib/ObjCopy/COFF/COFFWriter.h index b7dca69e9a81a..66d7f01c87f18 100644 --- a/llvm/lib/ObjCopy/COFF/COFFWriter.h +++ b/llvm/lib/ObjCopy/COFF/COFFWriter.h @@ -34,6 +34,7 @@ class COFFWriter { template std::pair finalizeSymbolTable(); Error finalizeRelocTargets(); Error finalizeSymbolContents(); + Error finalizeSymIdxContents(); void layoutSections(); Expected finalizeStringTable(); diff --git a/llvm/test/tools/llvm-objcopy/COFF/strip-invalid-symidx-section.test b/llvm/test/tools/llvm-objcopy/COFF/strip-invalid-symidx-section.test new file mode 100644 index 0000000000000..2b01116800091 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/COFF/strip-invalid-symidx-section.test @@ -0,0 +1,188 @@ +## Test that we bail out if a section consisting of symidx is invalid. + +## In this case, the symbol .gfids$y is not present at all. +# RUN: yaml2obj %s --docnum=1 -o %t1.in.o +# RUN: not llvm-objcopy --strip-debug %t1.in.o %t1.out.o 2>&1 | FileCheck %s --check-prefix=ERROR-NOSYM -DFILE=%t1.out.o + +# ERROR-NOSYM: error: '[[FILE]]': section '.gfids$y' does not have the corresponding symbol or the symbol has unexpected format + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + - Name: '.gfids$y' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: '04000000' + SizeOfRawData: 8 +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 4 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 1 + - Name: foo + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... + +## In this case, the symbol .giats$y has a non-zero offset. +# RUN: yaml2obj %s --docnum=2 -o %t2.in.o +# RUN: not llvm-objcopy --strip-debug %t2.in.o %t2.out.o 2>&1 | FileCheck %s --check-prefix=ERROR-OFFSET -DFILE=%t2.out.o + +# ERROR-OFFSET: error: '[[FILE]]': section '.giats$y' does not have the corresponding symbol or the symbol has unexpected format + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + - Name: '.giats$y' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: '0600000010000000' + SizeOfRawData: 8 +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 1 + - Name: '.giats$y' + Value: 42 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 8 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 1167279533 + Number: 5 + - Name: foo + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... + +## In this case, the symbol .gljmp$y has a non-static storage class. +# RUN: yaml2obj %s --docnum=3 -o %t3.in.o +# RUN: not llvm-objcopy --strip-debug %t3.in.o %t3.out.o 2>&1 | FileCheck %s --check-prefix=ERROR-EXTERNAL -DFILE=%t3.out.o + +# ERROR-EXTERNAL: error: '[[FILE]]': section '.gljmp$y' does not have the corresponding symbol or the symbol has unexpected format + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + - Name: '.gljmp$y' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: '0600000010000000' + SizeOfRawData: 8 +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 1 + - Name: '.gljmp$y' + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: foo + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... + +## In this case, .gfids$y contains a symbol index that is not present in the +## symbol table. Generally the behavior should be the same for every section consisting +## of .symidx directives, e.g .giats$y, .gljmp$y and .gehcont$y. +# RUN: yaml2obj %s --docnum=4 -o %t4.in.o +# RUN: not llvm-objcopy --strip-debug %t4.in.o %t4.out.o 2>&1 | FileCheck %s --check-prefix=ERROR-SYMIDX -DFILE=%t4.out.o + +# ERROR-SYMIDX: error: '[[FILE]]': section '.gfids$y' contains a .symidx (16) that is incorrect or was stripped +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + - Name: '.gfids$y' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: '0400000010000000' + SizeOfRawData: 8 +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 1 + - Name: '.gfids$y' + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 8 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 1167279533 + Number: 5 + - Name: foo + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +... diff --git a/llvm/test/tools/llvm-objcopy/COFF/strip-update-symidx-section.test b/llvm/test/tools/llvm-objcopy/COFF/strip-update-symidx-section.test new file mode 100644 index 0000000000000..04ec26afb644d --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/COFF/strip-update-symidx-section.test @@ -0,0 +1,173 @@ +## Check sections consisting only of .symidx directives. The test checks that +## indices in the sections are updated after stripping as the symbol table could +## be changed during stripping. +# RUN: yaml2obj %s -o %t.in.o + +# RUN: llvm-objcopy --strip-debug %t.in.o %t.out.o +# RUN: llvm-readobj -s -x '.gehcont$y' -x '.gfids$y' -x '.giats$y' -x '.gljmp$y' %t.out.o | FileCheck %s + +# CHECK: Symbols [ +# CHECK: Name: .text +# CHECK: Name: .gehcont$y +# CHECK: AuxSectionDef { +# CHECK: Checksum: 0x82EA2D2 +# CHECK: } +# CHECK: Name: $ehgcr_0_1 +# CHECK: Name: .gfids$y +# CHECK: AuxSectionDef { +# CHECK: Checksum: 0xAF00C48B +# CHECK: } +# CHECK: Name: .giats$y +# CHECK: AuxSectionDef { +# CHECK: Checksum: 0x4AD6BFB8 +# CHECK: } +# CHECK: Name: .gljmp$y +# CHECK: AuxSectionDef { +# CHECK: Checksum: 0xD457699C +# CHECK: } +# CHECK: Name: foo +# CHECK: ] + +# CHECK: Hex dump of section '.gehcont$y': +# CHECK-NEXT: 0x00000000 04000000 04000000 04000000 + +# CHECK: Hex dump of section '.gfids$y': +# CHECK-NEXT: 0x00000000 0b000000 0d000000 + +# CHECK: Hex dump of section '.giats$y': +# CHECK-NEXT: 0x00000000 0c000000 + +# CHECK: Hex dump of section '.gljmp$y': +# CHECK-NEXT: 0x00000000 0b000000 0c000000 0d000000 + + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: .text + Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ] + - Name: '.debug$S' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: 04000000F100000044656275672073656374696F6E20746F20626520737472697070656400 + SizeOfRawData: 37 + - Name: '.gehcont$y' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: '060000000600000006000000' + SizeOfRawData: 12 + - Name: '.gfids$y' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: '0d0000000f000000' + SizeOfRawData: 8 + - Name: '.giats$y' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: '0e000000' + SizeOfRawData: 4 + - Name: '.gljmp$y' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ] + Alignment: 4 + SectionData: '0d0000000e0000000f000000' + SizeOfRawData: 12 +symbols: + - Name: .text + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 0 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0 + Number: 1 + - Name: '.debug$S' + Value: 0 + SectionNumber: 2 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 37 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 820498156 + Number: 2 + - Name: '.gehcont$y' + Value: 0 + SectionNumber: 3 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 12 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0x30E7CEEC + Number: 3 + - Name: '$ehgcr_0_1' + Value: 0 + SectionNumber: 1 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + - Name: '.gfids$y' + Value: 0 + SectionNumber: 4 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 8 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0x459345AD + Number: 4 + - Name: '.giats$y' + Value: 0 + SectionNumber: 5 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 8 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0x31852256 + Number: 5 + - Name: '.gljmp$y' + Value: 0 + SectionNumber: 6 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_STATIC + SectionDefinition: + Length: 16 + NumberOfRelocations: 0 + NumberOfLinenumbers: 0 + CheckSum: 0xC608680B + Number: 6 + - Name: foo + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: bar + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL + - Name: baz + Value: 0 + SectionNumber: 0 + SimpleType: IMAGE_SYM_TYPE_NULL + ComplexType: IMAGE_SYM_DTYPE_NULL + StorageClass: IMAGE_SYM_CLASS_EXTERNAL +...