@@ -3702,6 +3702,19 @@ template <typename AttrT> static bool hasImplicitAttr(const ValueDecl *D) {
37023702 return D->isImplicit ();
37033703}
37043704
3705+ bool CodeGenModule::shouldEmitCUDAGlobalVar (const VarDecl *Global) const {
3706+ assert (LangOpts.CUDA && " Should not be called by non-CUDA languages" );
3707+ // We need to emit host-side 'shadows' for all global
3708+ // device-side variables because the CUDA runtime needs their
3709+ // size and host-side address in order to provide access to
3710+ // their device-side incarnations.
3711+ return !LangOpts.CUDAIsDevice || Global->hasAttr <CUDADeviceAttr>() ||
3712+ Global->hasAttr <CUDAConstantAttr>() ||
3713+ Global->hasAttr <CUDASharedAttr>() ||
3714+ Global->getType ()->isCUDADeviceBuiltinSurfaceType () ||
3715+ Global->getType ()->isCUDADeviceBuiltinTextureType ();
3716+ }
3717+
37053718void CodeGenModule::EmitGlobal (GlobalDecl GD) {
37063719 const auto *Global = cast<ValueDecl>(GD.getDecl ());
37073720
@@ -3726,36 +3739,27 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
37263739 // Non-constexpr non-lambda implicit host device functions are not emitted
37273740 // unless they are used on device side.
37283741 if (LangOpts.CUDA ) {
3729- if (LangOpts.CUDAIsDevice ) {
3742+ assert ((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
3743+ " Expected Variable or Function" );
3744+ if (const auto *VD = dyn_cast<VarDecl>(Global)) {
3745+ if (!shouldEmitCUDAGlobalVar (VD))
3746+ return ;
3747+ } else if (LangOpts.CUDAIsDevice ) {
37303748 const auto *FD = dyn_cast<FunctionDecl>(Global);
37313749 if ((!Global->hasAttr <CUDADeviceAttr>() ||
3732- (LangOpts.OffloadImplicitHostDeviceTemplates && FD &&
3750+ (LangOpts.OffloadImplicitHostDeviceTemplates &&
37333751 hasImplicitAttr<CUDAHostAttr>(FD) &&
37343752 hasImplicitAttr<CUDADeviceAttr>(FD) && !FD->isConstexpr () &&
37353753 !isLambdaCallOperator (FD) &&
37363754 !getContext ().CUDAImplicitHostDeviceFunUsedByDevice .count (FD))) &&
37373755 !Global->hasAttr <CUDAGlobalAttr>() &&
3738- !Global->hasAttr <CUDAConstantAttr>() &&
3739- !Global->hasAttr <CUDASharedAttr>() &&
3740- !Global->getType ()->isCUDADeviceBuiltinSurfaceType () &&
3741- !Global->getType ()->isCUDADeviceBuiltinTextureType () &&
37423756 !(LangOpts.HIPStdPar && isa<FunctionDecl>(Global) &&
37433757 !Global->hasAttr <CUDAHostAttr>()))
37443758 return ;
3745- } else {
3746- // We need to emit host-side 'shadows' for all global
3747- // device-side variables because the CUDA runtime needs their
3748- // size and host-side address in order to provide access to
3749- // their device-side incarnations.
3750-
3751- // So device-only functions are the only things we skip.
3752- if (isa<FunctionDecl>(Global) && !Global->hasAttr <CUDAHostAttr>() &&
3753- Global->hasAttr <CUDADeviceAttr>())
3754- return ;
3755-
3756- assert ((isa<FunctionDecl>(Global) || isa<VarDecl>(Global)) &&
3757- " Expected Variable or Function" );
3758- }
3759+ // Device-only functions are the only things we skip.
3760+ } else if (!Global->hasAttr <CUDAHostAttr>() &&
3761+ Global->hasAttr <CUDADeviceAttr>())
3762+ return ;
37593763 }
37603764
37613765 if (LangOpts.OpenMP ) {
0 commit comments