|
38 | 38 | #include "clang/AST/TypeOrdering.h" |
39 | 39 | #include "clang/Basic/BitmaskEnum.h" |
40 | 40 | #include "clang/Basic/Builtins.h" |
| 41 | +#include "clang/Basic/Cuda.h" |
41 | 42 | #include "clang/Basic/DarwinSDKInfo.h" |
42 | 43 | #include "clang/Basic/ExpressionTraits.h" |
43 | 44 | #include "clang/Basic/Module.h" |
@@ -183,6 +184,7 @@ class Preprocessor; |
183 | 184 | class PseudoDestructorTypeStorage; |
184 | 185 | class PseudoObjectExpr; |
185 | 186 | class QualType; |
| 187 | +class SemaCUDA; |
186 | 188 | class SemaHLSL; |
187 | 189 | class SemaOpenACC; |
188 | 190 | class SemaSYCL; |
@@ -435,14 +437,6 @@ enum class CXXSpecialMemberKind { |
435 | 437 | Invalid |
436 | 438 | }; |
437 | 439 |
|
438 | | -enum class CUDAFunctionTarget { |
439 | | - Device, |
440 | | - Global, |
441 | | - Host, |
442 | | - HostDevice, |
443 | | - InvalidTarget |
444 | | -}; |
445 | | - |
446 | 440 | /// Sema - This implements semantic analysis and AST building for C. |
447 | 441 | /// \nosubgrouping |
448 | 442 | class Sema final : public SemaBase { |
@@ -486,8 +480,7 @@ class Sema final : public SemaBase { |
486 | 480 | // 35. Code Completion (SemaCodeComplete.cpp) |
487 | 481 | // 36. FixIt Helpers (SemaFixItUtils.cpp) |
488 | 482 | // 37. Name Lookup for RISC-V Vector Intrinsic (SemaRISCVVectorLookup.cpp) |
489 | | - // 38. CUDA (SemaCUDA.cpp) |
490 | | - // 39. OpenMP Directives and Clauses (SemaOpenMP.cpp) |
| 483 | + // 38. OpenMP Directives and Clauses (SemaOpenMP.cpp) |
491 | 484 |
|
492 | 485 | /// \name Semantic Analysis |
493 | 486 | /// Implementations are in Sema.cpp |
@@ -981,9 +974,19 @@ class Sema final : public SemaBase { |
981 | 974 | return DelayedDiagnostics.push(pool); |
982 | 975 | } |
983 | 976 |
|
| 977 | + /// Diagnostics that are emitted only if we discover that the given function |
| 978 | + /// must be codegen'ed. Because handling these correctly adds overhead to |
| 979 | + /// compilation, this is currently only enabled for CUDA compilations. |
| 980 | + SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags; |
| 981 | + |
984 | 982 | /// CurContext - This is the current declaration context of parsing. |
985 | 983 | DeclContext *CurContext; |
986 | 984 |
|
| 985 | + SemaCUDA &CUDA() { |
| 986 | + assert(CUDAPtr); |
| 987 | + return *CUDAPtr; |
| 988 | + } |
| 989 | + |
987 | 990 | SemaHLSL &HLSL() { |
988 | 991 | assert(HLSLPtr); |
989 | 992 | return *HLSLPtr; |
@@ -1029,6 +1032,7 @@ class Sema final : public SemaBase { |
1029 | 1032 |
|
1030 | 1033 | mutable IdentifierInfo *Ident_super; |
1031 | 1034 |
|
| 1035 | + std::unique_ptr<SemaCUDA> CUDAPtr; |
1032 | 1036 | std::unique_ptr<SemaHLSL> HLSLPtr; |
1033 | 1037 | std::unique_ptr<SemaOpenACC> OpenACCPtr; |
1034 | 1038 | std::unique_ptr<SemaSYCL> SYCLPtr; |
@@ -12908,258 +12912,6 @@ class Sema final : public SemaBase { |
12908 | 12912 | // |
12909 | 12913 | // |
12910 | 12914 |
|
12911 | | - /// \name CUDA |
12912 | | - /// Implementations are in SemaCUDA.cpp |
12913 | | - ///@{ |
12914 | | - |
12915 | | -public: |
12916 | | - /// Increments our count of the number of times we've seen a pragma forcing |
12917 | | - /// functions to be __host__ __device__. So long as this count is greater |
12918 | | - /// than zero, all functions encountered will be __host__ __device__. |
12919 | | - void PushForceCUDAHostDevice(); |
12920 | | - |
12921 | | - /// Decrements our count of the number of times we've seen a pragma forcing |
12922 | | - /// functions to be __host__ __device__. Returns false if the count is 0 |
12923 | | - /// before incrementing, so you can emit an error. |
12924 | | - bool PopForceCUDAHostDevice(); |
12925 | | - |
12926 | | - ExprResult ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, |
12927 | | - MultiExprArg ExecConfig, |
12928 | | - SourceLocation GGGLoc); |
12929 | | - |
12930 | | - /// Diagnostics that are emitted only if we discover that the given function |
12931 | | - /// must be codegen'ed. Because handling these correctly adds overhead to |
12932 | | - /// compilation, this is currently only enabled for CUDA compilations. |
12933 | | - SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags; |
12934 | | - |
12935 | | - /// A pair of a canonical FunctionDecl and a SourceLocation. When used as the |
12936 | | - /// key in a hashtable, both the FD and location are hashed. |
12937 | | - struct FunctionDeclAndLoc { |
12938 | | - CanonicalDeclPtr<const FunctionDecl> FD; |
12939 | | - SourceLocation Loc; |
12940 | | - }; |
12941 | | - |
12942 | | - /// FunctionDecls and SourceLocations for which CheckCUDACall has emitted a |
12943 | | - /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the |
12944 | | - /// same deferred diag twice. |
12945 | | - llvm::DenseSet<FunctionDeclAndLoc> LocsWithCUDACallDiags; |
12946 | | - |
12947 | | - /// An inverse call graph, mapping known-emitted functions to one of their |
12948 | | - /// known-emitted callers (plus the location of the call). |
12949 | | - /// |
12950 | | - /// Functions that we can tell a priori must be emitted aren't added to this |
12951 | | - /// map. |
12952 | | - llvm::DenseMap</* Callee = */ CanonicalDeclPtr<const FunctionDecl>, |
12953 | | - /* Caller = */ FunctionDeclAndLoc> |
12954 | | - DeviceKnownEmittedFns; |
12955 | | - |
12956 | | - /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current |
12957 | | - /// context is "used as device code". |
12958 | | - /// |
12959 | | - /// - If CurContext is a __host__ function, does not emit any diagnostics |
12960 | | - /// unless \p EmitOnBothSides is true. |
12961 | | - /// - If CurContext is a __device__ or __global__ function, emits the |
12962 | | - /// diagnostics immediately. |
12963 | | - /// - If CurContext is a __host__ __device__ function and we are compiling for |
12964 | | - /// the device, creates a diagnostic which is emitted if and when we realize |
12965 | | - /// that the function will be codegen'ed. |
12966 | | - /// |
12967 | | - /// Example usage: |
12968 | | - /// |
12969 | | - /// // Variable-length arrays are not allowed in CUDA device code. |
12970 | | - /// if (CUDADiagIfDeviceCode(Loc, diag::err_cuda_vla) |
12971 | | - /// << llvm::to_underlying(CurrentCUDATarget())) |
12972 | | - /// return ExprError(); |
12973 | | - /// // Otherwise, continue parsing as normal. |
12974 | | - SemaDiagnosticBuilder CUDADiagIfDeviceCode(SourceLocation Loc, |
12975 | | - unsigned DiagID); |
12976 | | - |
12977 | | - /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current |
12978 | | - /// context is "used as host code". |
12979 | | - /// |
12980 | | - /// Same as CUDADiagIfDeviceCode, with "host" and "device" switched. |
12981 | | - SemaDiagnosticBuilder CUDADiagIfHostCode(SourceLocation Loc, unsigned DiagID); |
12982 | | - |
12983 | | - /// Determines whether the given function is a CUDA device/host/kernel/etc. |
12984 | | - /// function. |
12985 | | - /// |
12986 | | - /// Use this rather than examining the function's attributes yourself -- you |
12987 | | - /// will get it wrong. Returns CUDAFunctionTarget::Host if D is null. |
12988 | | - CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D, |
12989 | | - bool IgnoreImplicitHDAttr = false); |
12990 | | - CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs); |
12991 | | - |
12992 | | - enum CUDAVariableTarget { |
12993 | | - CVT_Device, /// Emitted on device side with a shadow variable on host side |
12994 | | - CVT_Host, /// Emitted on host side only |
12995 | | - CVT_Both, /// Emitted on both sides with different addresses |
12996 | | - CVT_Unified, /// Emitted as a unified address, e.g. managed variables |
12997 | | - }; |
12998 | | - /// Determines whether the given variable is emitted on host or device side. |
12999 | | - CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D); |
13000 | | - |
13001 | | - /// Defines kinds of CUDA global host/device context where a function may be |
13002 | | - /// called. |
13003 | | - enum CUDATargetContextKind { |
13004 | | - CTCK_Unknown, /// Unknown context |
13005 | | - CTCK_InitGlobalVar, /// Function called during global variable |
13006 | | - /// initialization |
13007 | | - }; |
13008 | | - |
13009 | | - /// Define the current global CUDA host/device context where a function may be |
13010 | | - /// called. Only used when a function is called outside of any functions. |
13011 | | - struct CUDATargetContext { |
13012 | | - CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice; |
13013 | | - CUDATargetContextKind Kind = CTCK_Unknown; |
13014 | | - Decl *D = nullptr; |
13015 | | - } CurCUDATargetCtx; |
13016 | | - |
13017 | | - struct CUDATargetContextRAII { |
13018 | | - Sema &S; |
13019 | | - CUDATargetContext SavedCtx; |
13020 | | - CUDATargetContextRAII(Sema &S_, CUDATargetContextKind K, Decl *D); |
13021 | | - ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; } |
13022 | | - }; |
13023 | | - |
13024 | | - /// Gets the CUDA target for the current context. |
13025 | | - CUDAFunctionTarget CurrentCUDATarget() { |
13026 | | - return IdentifyCUDATarget(dyn_cast<FunctionDecl>(CurContext)); |
13027 | | - } |
13028 | | - |
13029 | | - static bool isCUDAImplicitHostDeviceFunction(const FunctionDecl *D); |
13030 | | - |
13031 | | - // CUDA function call preference. Must be ordered numerically from |
13032 | | - // worst to best. |
13033 | | - enum CUDAFunctionPreference { |
13034 | | - CFP_Never, // Invalid caller/callee combination. |
13035 | | - CFP_WrongSide, // Calls from host-device to host or device |
13036 | | - // function that do not match current compilation |
13037 | | - // mode. |
13038 | | - CFP_HostDevice, // Any calls to host/device functions. |
13039 | | - CFP_SameSide, // Calls from host-device to host or device |
13040 | | - // function matching current compilation mode. |
13041 | | - CFP_Native, // host-to-host or device-to-device calls. |
13042 | | - }; |
13043 | | - |
13044 | | - /// Identifies relative preference of a given Caller/Callee |
13045 | | - /// combination, based on their host/device attributes. |
13046 | | - /// \param Caller function which needs address of \p Callee. |
13047 | | - /// nullptr in case of global context. |
13048 | | - /// \param Callee target function |
13049 | | - /// |
13050 | | - /// \returns preference value for particular Caller/Callee combination. |
13051 | | - CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller, |
13052 | | - const FunctionDecl *Callee); |
13053 | | - |
13054 | | - /// Determines whether Caller may invoke Callee, based on their CUDA |
13055 | | - /// host/device attributes. Returns false if the call is not allowed. |
13056 | | - /// |
13057 | | - /// Note: Will return true for CFP_WrongSide calls. These may appear in |
13058 | | - /// semantically correct CUDA programs, but only if they're never codegen'ed. |
13059 | | - bool IsAllowedCUDACall(const FunctionDecl *Caller, |
13060 | | - const FunctionDecl *Callee) { |
13061 | | - return IdentifyCUDAPreference(Caller, Callee) != CFP_Never; |
13062 | | - } |
13063 | | - |
13064 | | - /// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD, |
13065 | | - /// depending on FD and the current compilation settings. |
13066 | | - void maybeAddCUDAHostDeviceAttrs(FunctionDecl *FD, |
13067 | | - const LookupResult &Previous); |
13068 | | - |
13069 | | - /// May add implicit CUDAConstantAttr attribute to VD, depending on VD |
13070 | | - /// and current compilation settings. |
13071 | | - void MaybeAddCUDAConstantAttr(VarDecl *VD); |
13072 | | - |
13073 | | - /// Check whether we're allowed to call Callee from the current context. |
13074 | | - /// |
13075 | | - /// - If the call is never allowed in a semantically-correct program |
13076 | | - /// (CFP_Never), emits an error and returns false. |
13077 | | - /// |
13078 | | - /// - If the call is allowed in semantically-correct programs, but only if |
13079 | | - /// it's never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to |
13080 | | - /// be emitted if and when the caller is codegen'ed, and returns true. |
13081 | | - /// |
13082 | | - /// Will only create deferred diagnostics for a given SourceLocation once, |
13083 | | - /// so you can safely call this multiple times without generating duplicate |
13084 | | - /// deferred errors. |
13085 | | - /// |
13086 | | - /// - Otherwise, returns true without emitting any diagnostics. |
13087 | | - bool CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee); |
13088 | | - |
13089 | | - void CUDACheckLambdaCapture(CXXMethodDecl *D, const sema::Capture &Capture); |
13090 | | - |
13091 | | - /// Set __device__ or __host__ __device__ attributes on the given lambda |
13092 | | - /// operator() method. |
13093 | | - /// |
13094 | | - /// CUDA lambdas by default is host device function unless it has explicit |
13095 | | - /// host or device attribute. |
13096 | | - void CUDASetLambdaAttrs(CXXMethodDecl *Method); |
13097 | | - |
13098 | | - /// Record \p FD if it is a CUDA/HIP implicit host device function used on |
13099 | | - /// device side in device compilation. |
13100 | | - void CUDARecordImplicitHostDeviceFuncUsedByDevice(const FunctionDecl *FD); |
13101 | | - |
13102 | | - /// Finds a function in \p Matches with highest calling priority |
13103 | | - /// from \p Caller context and erases all functions with lower |
13104 | | - /// calling priority. |
13105 | | - void EraseUnwantedCUDAMatches( |
13106 | | - const FunctionDecl *Caller, |
13107 | | - SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches); |
13108 | | - |
13109 | | - /// Given a implicit special member, infer its CUDA target from the |
13110 | | - /// calls it needs to make to underlying base/field special members. |
13111 | | - /// \param ClassDecl the class for which the member is being created. |
13112 | | - /// \param CSM the kind of special member. |
13113 | | - /// \param MemberDecl the special member itself. |
13114 | | - /// \param ConstRHS true if this is a copy operation with a const object on |
13115 | | - /// its RHS. |
13116 | | - /// \param Diagnose true if this call should emit diagnostics. |
13117 | | - /// \return true if there was an error inferring. |
13118 | | - /// The result of this call is implicit CUDA target attribute(s) attached to |
13119 | | - /// the member declaration. |
13120 | | - bool inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, |
13121 | | - CXXSpecialMemberKind CSM, |
13122 | | - CXXMethodDecl *MemberDecl, |
13123 | | - bool ConstRHS, bool Diagnose); |
13124 | | - |
13125 | | - /// \return true if \p CD can be considered empty according to CUDA |
13126 | | - /// (E.2.3.1 in CUDA 7.5 Programming guide). |
13127 | | - bool isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD); |
13128 | | - bool isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *CD); |
13129 | | - |
13130 | | - // \brief Checks that initializers of \p Var satisfy CUDA restrictions. In |
13131 | | - // case of error emits appropriate diagnostic and invalidates \p Var. |
13132 | | - // |
13133 | | - // \details CUDA allows only empty constructors as initializers for global |
13134 | | - // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all |
13135 | | - // __shared__ variables whether they are local or not (they all are implicitly |
13136 | | - // static in CUDA). One exception is that CUDA allows constant initializers |
13137 | | - // for __constant__ and __device__ variables. |
13138 | | - void checkAllowedCUDAInitializer(VarDecl *VD); |
13139 | | - |
13140 | | - /// Check whether NewFD is a valid overload for CUDA. Emits |
13141 | | - /// diagnostics and invalidates NewFD if not. |
13142 | | - void checkCUDATargetOverload(FunctionDecl *NewFD, |
13143 | | - const LookupResult &Previous); |
13144 | | - /// Copies target attributes from the template TD to the function FD. |
13145 | | - void inheritCUDATargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD); |
13146 | | - |
13147 | | - /// Returns the name of the launch configuration function. This is the name |
13148 | | - /// of the function that will be called to configure kernel call, with the |
13149 | | - /// parameters specified via <<<>>>. |
13150 | | - std::string getCudaConfigureFuncName() const; |
13151 | | - |
13152 | | -private: |
13153 | | - unsigned ForceCUDAHostDeviceDepth = 0; |
13154 | | - |
13155 | | - ///@} |
13156 | | - |
13157 | | - // |
13158 | | - // |
13159 | | - // ------------------------------------------------------------------------- |
13160 | | - // |
13161 | | - // |
13162 | | - |
13163 | 12915 | /// \name OpenMP Directives and Clauses |
13164 | 12916 | /// Implementations are in SemaOpenMP.cpp |
13165 | 12917 | ///@{ |
@@ -14546,32 +14298,4 @@ std::unique_ptr<sema::RISCVIntrinsicManager> |
14546 | 14298 | CreateRISCVIntrinsicManager(Sema &S); |
14547 | 14299 | } // end namespace clang |
14548 | 14300 |
|
14549 | | -namespace llvm { |
14550 | | -// Hash a FunctionDeclAndLoc by looking at both its FunctionDecl and its |
14551 | | -// SourceLocation. |
14552 | | -template <> struct DenseMapInfo<clang::Sema::FunctionDeclAndLoc> { |
14553 | | - using FunctionDeclAndLoc = clang::Sema::FunctionDeclAndLoc; |
14554 | | - using FDBaseInfo = |
14555 | | - DenseMapInfo<clang::CanonicalDeclPtr<const clang::FunctionDecl>>; |
14556 | | - |
14557 | | - static FunctionDeclAndLoc getEmptyKey() { |
14558 | | - return {FDBaseInfo::getEmptyKey(), clang::SourceLocation()}; |
14559 | | - } |
14560 | | - |
14561 | | - static FunctionDeclAndLoc getTombstoneKey() { |
14562 | | - return {FDBaseInfo::getTombstoneKey(), clang::SourceLocation()}; |
14563 | | - } |
14564 | | - |
14565 | | - static unsigned getHashValue(const FunctionDeclAndLoc &FDL) { |
14566 | | - return hash_combine(FDBaseInfo::getHashValue(FDL.FD), |
14567 | | - FDL.Loc.getHashValue()); |
14568 | | - } |
14569 | | - |
14570 | | - static bool isEqual(const FunctionDeclAndLoc &LHS, |
14571 | | - const FunctionDeclAndLoc &RHS) { |
14572 | | - return LHS.FD == RHS.FD && LHS.Loc == RHS.Loc; |
14573 | | - } |
14574 | | -}; |
14575 | | -} // namespace llvm |
14576 | | - |
14577 | 14301 | #endif |
0 commit comments