diff --git a/clang/docs/ClangOffloadPackager.rst b/clang/docs/ClangOffloadPackager.rst index 2b985e260e302..481069b5e4235 100644 --- a/clang/docs/ClangOffloadPackager.rst +++ b/clang/docs/ClangOffloadPackager.rst @@ -112,6 +112,8 @@ the following values for the :ref:`offload kind` and the +------------+-------+---------------------------------------+ | OFK_HIP | 0x03 | The producer was HIP | +------------+-------+---------------------------------------+ + | OFK_SYCL | 0x04 | The producer was SYCL | + +------------+-------+---------------------------------------+ The flags are used to signify certain conditions, such as the presence of debugging information or whether or not LTO was used. The string entry table is diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 0c77c2b34216a..921c9a11d2d32 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -2,6 +2,7 @@ // REQUIRES: x86-registered-target // REQUIRES: nvptx-registered-target // REQUIRES: amdgpu-registered-target +// REQUIRES: spirv-registered-target // An externally visible variable so static libraries extract. __attribute__((visibility("protected"), used)) int x; @@ -9,6 +10,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o // RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.nvptx.bc // RUN: %clang -cc1 %s -triple amdgcn-amd-amdhsa -emit-llvm-bc -o %t.amdgpu.bc +// RUN: %clang -cc1 %s -triple spirv64-unknown-unknown -emit-llvm-bc -o %t.spirv.bc // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \ @@ -49,6 +51,14 @@ __attribute__((visibility("protected"), used)) int x; // AMDGPU-LTO-TEMPS: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -flto {{.*}}-save-temps +// RUN: clang-offload-packager -o %t.out \ +// RUN: --image=file=%t.spirv.bc,kind=sycl,triple=spirv64-unknown-unknown,arch=generic +// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o -fembed-offload-object=%t.out +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ +// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=SPIRV-LINK + +// SPIRV-LINK: clang{{.*}} -o {{.*}}.img --target=spirv64-unknown-unknown {{.*}}.o --sycl-link -Xlinker -triple=spirv64-unknown-unknown -Xlinker -arch= + // RUN: clang-offload-packager -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 52d922abbcaec..7c09ce8c9e59d 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -464,7 +464,8 @@ fatbinary(ArrayRef> InputFiles, } // namespace amdgcn namespace generic { -Expected clang(ArrayRef InputFiles, const ArgList &Args) { +Expected clang(ArrayRef InputFiles, const ArgList &Args, + bool HasSYCLOffloadKind = false) { llvm::TimeTraceScope TimeScope("Clang"); // Use `clang` to invoke the appropriate device tools. Expected ClangPath = @@ -554,6 +555,17 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { if (Args.hasArg(OPT_embed_bitcode)) CmdArgs.push_back("-Wl,--lto-emit-llvm"); + // For linking device code with the SYCL offload kind, special handling is + // required. Passing --sycl-link to clang results in a call to + // clang-sycl-linker. Additional linker flags required by clang-sycl-linker + // will be communicated via the -Xlinker option. + if (HasSYCLOffloadKind) { + CmdArgs.push_back("--sycl-link"); + CmdArgs.append( + {"-Xlinker", Args.MakeArgString("-triple=" + Triple.getTriple())}); + CmdArgs.append({"-Xlinker", Args.MakeArgString("-arch=" + Arch)}); + } + for (StringRef Arg : Args.getAllArgValues(OPT_linker_arg_EQ)) CmdArgs.append({"-Xlinker", Args.MakeArgString(Arg)}); for (StringRef Arg : Args.getAllArgValues(OPT_compiler_arg_EQ)) @@ -567,7 +579,8 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { } // namespace generic Expected linkDevice(ArrayRef InputFiles, - const ArgList &Args) { + const ArgList &Args, + bool HasSYCLOffloadKind = false) { const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); switch (Triple.getArch()) { case Triple::nvptx: @@ -582,7 +595,7 @@ Expected linkDevice(ArrayRef InputFiles, case Triple::spirv64: case Triple::systemz: case Triple::loongarch64: - return generic::clang(InputFiles, Args); + return generic::clang(InputFiles, Args, HasSYCLOffloadKind); default: return createStringError(Triple.getArchName() + " linking is not supported"); @@ -924,9 +937,20 @@ Expected> linkAndWrapDeviceFiles( auto LinkerArgs = getLinkerArgs(Input, BaseArgs); DenseSet ActiveOffloadKinds; - for (const auto &File : Input) + // Currently, SYCL device code linking process differs from generic device + // code linking. + // TODO: Remove check for offload kind, once SYCL device code linking is + // aligned with generic linking. + bool HasSYCLOffloadKind = false; + bool HasNonSYCLOffloadKind = false; + for (const auto &File : Input) { if (File.getBinary()->getOffloadKind() != OFK_None) ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind()); + if (File.getBinary()->getOffloadKind() == OFK_SYCL) + HasSYCLOffloadKind = true; + else + HasNonSYCLOffloadKind = true; + } // Write any remaining device inputs to an output file. SmallVector InputFiles; @@ -937,6 +961,37 @@ Expected> linkAndWrapDeviceFiles( InputFiles.emplace_back(*FileNameOrErr); } + if (HasSYCLOffloadKind) { + // Link the remaining device files using the device linker. + auto OutputOrErr = linkDevice(InputFiles, LinkerArgs, HasSYCLOffloadKind); + if (!OutputOrErr) + return OutputOrErr.takeError(); + // Output is a packaged object of device images. Unpackage the images and + // copy them to Images[Kind] + ErrorOr> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(*OutputOrErr); + if (std::error_code EC = BufferOrErr.getError()) + return createFileError(*OutputOrErr, EC); + + MemoryBufferRef Buffer = **BufferOrErr; + SmallVector Binaries; + if (Error Err = extractOffloadBinaries(Buffer, Binaries)) + return std::move(Err); + for (auto &OffloadFile : Binaries) { + auto TheBinary = OffloadFile.getBinary(); + OffloadingImage TheImage{}; + TheImage.TheImageKind = TheBinary->getImageKind(); + TheImage.TheOffloadKind = TheBinary->getOffloadKind(); + TheImage.StringData["triple"] = TheBinary->getTriple(); + TheImage.StringData["arch"] = TheBinary->getArch(); + TheImage.Image = MemoryBuffer::getMemBufferCopy(TheBinary->getImage()); + Images[OFK_SYCL].emplace_back(std::move(TheImage)); + } + } + + if (!HasNonSYCLOffloadKind) + return Error::success(); + // Link the remaining device files using the device linker. auto OutputOrErr = linkDevice(InputFiles, LinkerArgs); if (!OutputOrErr) @@ -944,6 +999,9 @@ Expected> linkAndWrapDeviceFiles( // Store the offloading image for each linked output file. for (OffloadKind Kind : ActiveOffloadKinds) { + // For SYCL, Offloading images were created inside clang-sycl-linker + if (Kind == OFK_SYCL) + continue; llvm::ErrorOr> FileOrErr = llvm::MemoryBuffer::getFileOrSTDIN(*OutputOrErr); if (std::error_code EC = FileOrErr.getError()) { @@ -986,6 +1044,11 @@ Expected> linkAndWrapDeviceFiles( A.StringData["arch"] > B.StringData["arch"] || A.TheOffloadKind < B.TheOffloadKind; }); + if (Kind == OFK_SYCL) { + // TODO: Update once SYCL offload wrapping logic is available. + reportError( + createStringError("SYCL offload wrapping logic is not available")); + } auto BundledImagesOrErr = bundleLinkedOutput(Input, Args, Kind); if (!BundledImagesOrErr) return BundledImagesOrErr.takeError(); diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index c640deddc9e74..7c0b5de6ecb13 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -70,6 +70,8 @@ static StringRef OutputFile; /// Directory to dump SPIR-V IR if requested by user. static SmallString<128> SPIRVDumpDir; +using OffloadingImage = OffloadBinary::OffloadingImage; + static void printVersion(raw_ostream &OS) { OS << clang::getClangToolFullVersion("clang-sycl-linker") << '\n'; } @@ -168,10 +170,10 @@ Expected> getInput(const ArgList &Args) { /// are LLVM IR bitcode files. // TODO: Support SPIR-V IR files. Expected> getBitcodeModule(StringRef File, - LLVMContext &C) { + LLVMContext &Ctx) { SMDiagnostic Err; - auto M = getLazyIRFileModule(File, Err, C); + auto M = getLazyIRFileModule(File, Err, Ctx); if (M) return std::move(M); return createStringError(Err.getMessage()); @@ -211,16 +213,16 @@ Expected> getSYCLDeviceLibs(const ArgList &Args) { /// 3. Link all the images gathered in Step 2 with the output of Step 1 using /// linkInModule API. LinkOnlyNeeded flag is used. Expected linkDeviceCode(ArrayRef InputFiles, - const ArgList &Args, LLVMContext &C) { + const ArgList &Args, LLVMContext &Ctx) { llvm::TimeTraceScope TimeScope("SYCL link device code"); assert(InputFiles.size() && "No inputs to link"); - auto LinkerOutput = std::make_unique("sycl-device-link", C); + auto LinkerOutput = std::make_unique("sycl-device-link", Ctx); Linker L(*LinkerOutput); // Link SYCL device input files. for (auto &File : InputFiles) { - auto ModOrErr = getBitcodeModule(File, C); + auto ModOrErr = getBitcodeModule(File, Ctx); if (!ModOrErr) return ModOrErr.takeError(); if (L.linkInModule(std::move(*ModOrErr))) @@ -235,7 +237,7 @@ Expected linkDeviceCode(ArrayRef InputFiles, // Link in SYCL device library files. const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); for (auto &File : *SYCLDeviceLibFiles) { - auto LibMod = getBitcodeModule(File, C); + auto LibMod = getBitcodeModule(File, Ctx); if (!LibMod) return LibMod.takeError(); if ((*LibMod)->getTargetTriple() == Triple) { @@ -278,18 +280,18 @@ Expected linkDeviceCode(ArrayRef InputFiles, /// Converts 'File' from LLVM bitcode to SPIR-V format using SPIR-V backend. /// 'Args' encompasses all arguments required for linking device code and will /// be parsed to generate options required to be passed into the backend. -static Expected runSPIRVCodeGen(StringRef File, const ArgList &Args, - LLVMContext &C) { +static Error runSPIRVCodeGen(StringRef File, const ArgList &Args, + StringRef OutputFile, LLVMContext &Ctx) { llvm::TimeTraceScope TimeScope("SPIR-V code generation"); // Parse input module. - SMDiagnostic Err; - std::unique_ptr M = parseIRFile(File, Err, C); + SMDiagnostic E; + std::unique_ptr M = parseIRFile(File, E, Ctx); if (!M) - return createStringError(Err.getMessage()); + return createStringError(E.getMessage()); if (Error Err = M->materializeAll()) - return std::move(Err); + return Err; Triple TargetTriple(Args.getLastArgValue(OPT_triple_EQ)); M->setTargetTriple(TargetTriple); @@ -333,7 +335,7 @@ static Expected runSPIRVCodeGen(StringRef File, const ArgList &Args, errs() << formatv("SPIR-V Backend: input: {0}, output: {1}\n", File, OutputFile); - return OutputFile; + return Error::success(); } /// Performs the following steps: @@ -342,17 +344,61 @@ static Expected runSPIRVCodeGen(StringRef File, const ArgList &Args, Error runSYCLLink(ArrayRef Files, const ArgList &Args) { llvm::TimeTraceScope TimeScope("SYCL device linking"); - LLVMContext C; + LLVMContext Ctx; // Link all input bitcode files and SYCL device library files, if any. - auto LinkedFile = linkDeviceCode(Files, Args, C); + auto LinkedFile = linkDeviceCode(Files, Args, Ctx); if (!LinkedFile) reportError(LinkedFile.takeError()); + // TODO: SYCL post link functionality involves device code splitting and will + // result in multiple bitcode codes. + // The following lines are placeholders to represent multiple files and will + // be refactored once SYCL post link support is available. + SmallVector SplitModules; + SplitModules.emplace_back(*LinkedFile); + // SPIR-V code generation step. - auto SPVFile = runSPIRVCodeGen(*LinkedFile, Args, C); - if (!SPVFile) - return SPVFile.takeError(); + for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { + auto Stem = OutputFile.rsplit('.').first; + std::string SPVFile(Stem); + SPVFile.append("_" + utostr(I) + ".spv"); + auto Err = runSPIRVCodeGen(SplitModules[I], Args, SPVFile, Ctx); + if (Err) + return std::move(Err); + SplitModules[I] = SPVFile; + } + + // Write the final output into file. + int FD = -1; + if (std::error_code EC = sys::fs::openFileForWrite(OutputFile, FD)) + return errorCodeToError(EC); + llvm::raw_fd_ostream FS(FD, /*shouldClose=*/true); + + for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { + auto File = SplitModules[I]; + llvm::ErrorOr> FileOrErr = + llvm::MemoryBuffer::getFileOrSTDIN(File); + if (std::error_code EC = FileOrErr.getError()) { + if (DryRun) + FileOrErr = MemoryBuffer::getMemBuffer(""); + else + return createFileError(File, EC); + } + OffloadingImage TheImage{}; + TheImage.TheImageKind = IMG_Object; + TheImage.TheOffloadKind = OFK_SYCL; + TheImage.StringData["triple"] = + Args.MakeArgString(Args.getLastArgValue(OPT_triple_EQ)); + TheImage.StringData["arch"] = + Args.MakeArgString(Args.getLastArgValue(OPT_arch_EQ)); + TheImage.Image = std::move(*FileOrErr); + + llvm::SmallString<0> Buffer = OffloadBinary::write(TheImage); + if (Buffer.size() % OffloadBinary::getAlignment() != 0) + return createStringError("Offload binary has invalid size alignment"); + FS << Buffer; + } return Error::success(); } @@ -394,7 +440,7 @@ int main(int argc, char **argv) { DryRun = Args.hasArg(OPT_dry_run); SaveTemps = Args.hasArg(OPT_save_temps); - OutputFile = "a.spv"; + OutputFile = "a.out"; if (Args.hasArg(OPT_o)) OutputFile = Args.getLastArgValue(OPT_o); diff --git a/llvm/include/llvm/Object/OffloadBinary.h b/llvm/include/llvm/Object/OffloadBinary.h index c02aec8d956ed..b6e615740d72c 100644 --- a/llvm/include/llvm/Object/OffloadBinary.h +++ b/llvm/include/llvm/Object/OffloadBinary.h @@ -35,6 +35,7 @@ enum OffloadKind : uint16_t { OFK_OpenMP, OFK_Cuda, OFK_HIP, + OFK_SYCL, OFK_LAST, }; diff --git a/llvm/lib/Object/OffloadBinary.cpp b/llvm/lib/Object/OffloadBinary.cpp index 56687c9acb653..3fff6b6a09e08 100644 --- a/llvm/lib/Object/OffloadBinary.cpp +++ b/llvm/lib/Object/OffloadBinary.cpp @@ -301,6 +301,7 @@ OffloadKind object::getOffloadKind(StringRef Name) { .Case("openmp", OFK_OpenMP) .Case("cuda", OFK_Cuda) .Case("hip", OFK_HIP) + .Case("sycl", OFK_SYCL) .Default(OFK_None); } @@ -312,6 +313,8 @@ StringRef object::getOffloadKindName(OffloadKind Kind) { return "cuda"; case OFK_HIP: return "hip"; + case OFK_SYCL: + return "sycl"; default: return "none"; }