@@ -149,11 +149,33 @@ static cl::opt<bool> ClMemProfMatchHotColdNew(
149149 " Match allocation profiles onto existing hot/cold operator new calls" ),
150150 cl::Hidden, cl::init(false ));
151151
152+ static cl::opt<bool >
153+ ClPrintMemProfMatchInfo (" memprof-print-match-info" ,
154+ cl::desc (" Print matching stats for each allocation "
155+ " context in this module's profiles" ),
156+ cl::Hidden, cl::init(false ));
157+
158+ // Instrumentation statistics
152159STATISTIC (NumInstrumentedReads, " Number of instrumented reads" );
153160STATISTIC (NumInstrumentedWrites, " Number of instrumented writes" );
154161STATISTIC (NumSkippedStackReads, " Number of non-instrumented stack reads" );
155162STATISTIC (NumSkippedStackWrites, " Number of non-instrumented stack writes" );
163+
164+ // Matching statistics
156165STATISTIC (NumOfMemProfMissing, " Number of functions without memory profile." );
166+ STATISTIC (NumOfMemProfMismatch,
167+ " Number of functions having mismatched memory profile hash." );
168+ STATISTIC (NumOfMemProfFunc, " Number of functions having valid memory profile." );
169+ STATISTIC (NumOfMemProfAllocContextProfiles,
170+ " Number of alloc contexts in memory profile." );
171+ STATISTIC (NumOfMemProfCallSiteProfiles,
172+ " Number of callsites in memory profile." );
173+ STATISTIC (NumOfMemProfMatchedAllocContexts,
174+ " Number of matched memory profile alloc contexts." );
175+ STATISTIC (NumOfMemProfMatchedAllocs,
176+ " Number of matched memory profile allocs." );
177+ STATISTIC (NumOfMemProfMatchedCallSites,
178+ " Number of matched memory profile callsites." );
157179
158180namespace {
159181
@@ -637,15 +659,30 @@ static uint64_t computeStackId(const memprof::Frame &Frame) {
637659 return computeStackId (Frame.Function , Frame.LineOffset , Frame.Column );
638660}
639661
640- static void addCallStack (CallStackTrie &AllocTrie,
641- const AllocationInfo *AllocInfo) {
662+ // Helper to generate a single hash id for a given callstack, used for emitting
663+ // matching statistics and useful for uniquing such statistics across modules.
664+ static uint64_t
665+ computeFullStackId (const SmallVectorImpl<memprof::Frame> &CallStack) {
666+ llvm::HashBuilder<llvm::TruncatedBLAKE3<8 >, llvm::endianness::little>
667+ HashBuilder;
668+ for (auto &F : CallStack)
669+ HashBuilder.add (F.Function , F.LineOffset , F.Column );
670+ llvm::BLAKE3Result<8 > Hash = HashBuilder.final ();
671+ uint64_t Id;
672+ std::memcpy (&Id, Hash.data (), sizeof (Hash));
673+ return Id;
674+ }
675+
676+ static AllocationType addCallStack (CallStackTrie &AllocTrie,
677+ const AllocationInfo *AllocInfo) {
642678 SmallVector<uint64_t > StackIds;
643679 for (const auto &StackFrame : AllocInfo->CallStack )
644680 StackIds.push_back (computeStackId (StackFrame));
645681 auto AllocType = getAllocType (AllocInfo->Info .getTotalLifetimeAccessDensity (),
646682 AllocInfo->Info .getAllocCount (),
647683 AllocInfo->Info .getTotalLifetime ());
648684 AllocTrie.addCallStack (AllocType, StackIds);
685+ return AllocType;
649686}
650687
651688// Helper to compare the InlinedCallStack computed from an instruction's debug
@@ -701,9 +738,16 @@ static bool isNewWithHotColdVariant(Function *Callee,
701738 }
702739}
703740
704- static void readMemprof (Module &M, Function &F,
705- IndexedInstrProfReader *MemProfReader,
706- const TargetLibraryInfo &TLI) {
741+ struct AllocMatchInfo {
742+ uint64_t TotalSize = 0 ;
743+ AllocationType AllocType = AllocationType::None;
744+ bool Matched = false ;
745+ };
746+
747+ static void
748+ readMemprof (Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
749+ const TargetLibraryInfo &TLI,
750+ std::map<uint64_t , AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
707751 auto &Ctx = M.getContext ();
708752 // Previously we used getIRPGOFuncName() here. If F is local linkage,
709753 // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -727,6 +771,7 @@ static void readMemprof(Module &M, Function &F,
727771 SkipWarning = !PGOWarnMissing;
728772 LLVM_DEBUG (dbgs () << " unknown function" );
729773 } else if (Err == instrprof_error::hash_mismatch) {
774+ NumOfMemProfMismatch++;
730775 SkipWarning =
731776 NoPGOWarnMismatch ||
732777 (NoPGOWarnMismatchComdatWeak &&
@@ -748,6 +793,8 @@ static void readMemprof(Module &M, Function &F,
748793 return ;
749794 }
750795
796+ NumOfMemProfFunc++;
797+
751798 // Detect if there are non-zero column numbers in the profile. If not,
752799 // treat all column numbers as 0 when matching (i.e. ignore any non-zero
753800 // columns in the IR). The profiled binary might have been built with
@@ -762,6 +809,7 @@ static void readMemprof(Module &M, Function &F,
762809 std::map<uint64_t , std::set<std::pair<const std::vector<Frame> *, unsigned >>>
763810 LocHashToCallSites;
764811 for (auto &AI : MemProfRec->AllocSites ) {
812+ NumOfMemProfAllocContextProfiles++;
765813 // Associate the allocation info with the leaf frame. The later matching
766814 // code will match any inlined call sequences in the IR with a longer prefix
767815 // of call stack frames.
@@ -770,6 +818,7 @@ static void readMemprof(Module &M, Function &F,
770818 ProfileHasColumns |= AI.CallStack [0 ].Column ;
771819 }
772820 for (auto &CS : MemProfRec->CallSites ) {
821+ NumOfMemProfCallSiteProfiles++;
773822 // Need to record all frames from leaf up to and including this function,
774823 // as any of these may or may not have been inlined at this point.
775824 unsigned Idx = 0 ;
@@ -863,13 +912,23 @@ static void readMemprof(Module &M, Function &F,
863912 // If we found and thus matched all frames on the call, include
864913 // this MIB.
865914 if (stackFrameIncludesInlinedCallStack (AllocInfo->CallStack ,
866- InlinedCallStack))
867- addCallStack (AllocTrie, AllocInfo);
915+ InlinedCallStack)) {
916+ NumOfMemProfMatchedAllocContexts++;
917+ auto AllocType = addCallStack (AllocTrie, AllocInfo);
918+ // Record information about the allocation if match info printing
919+ // was requested.
920+ if (ClPrintMemProfMatchInfo) {
921+ auto FullStackId = computeFullStackId (AllocInfo->CallStack );
922+ FullStackIdToAllocMatchInfo[FullStackId] = {
923+ AllocInfo->Info .getTotalSize (), AllocType, /* Matched=*/ true };
924+ }
925+ }
868926 }
869927 // We might not have matched any to the full inlined call stack.
870928 // But if we did, create and attach metadata, or a function attribute if
871929 // all contexts have identical profiled behavior.
872930 if (!AllocTrie.empty ()) {
931+ NumOfMemProfMatchedAllocs++;
873932 // MemprofMDAttached will be false if a function attribute was
874933 // attached.
875934 bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata (CI);
@@ -897,6 +956,7 @@ static void readMemprof(Module &M, Function &F,
897956 // attach call stack metadata.
898957 if (stackFrameIncludesInlinedCallStack (
899958 *CallStackIdx.first , InlinedCallStack, CallStackIdx.second )) {
959+ NumOfMemProfMatchedCallSites++;
900960 addCallsiteMetadata (I, InlinedCallStack, Ctx);
901961 // Only need to find one with a matching call stack and add a single
902962 // callsite metadata.
@@ -942,12 +1002,25 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
9421002
9431003 auto &FAM = AM.getResult <FunctionAnalysisManagerModuleProxy>(M).getManager ();
9441004
1005+ // Map from the stack has of each allocation context in the function profiles
1006+ // to the total profiled size (bytes), allocation type, and whether we matched
1007+ // it to an allocation in the IR.
1008+ std::map<uint64_t , AllocMatchInfo> FullStackIdToAllocMatchInfo;
1009+
9451010 for (auto &F : M) {
9461011 if (F.isDeclaration ())
9471012 continue ;
9481013
9491014 const TargetLibraryInfo &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
950- readMemprof (M, F, MemProfReader.get (), TLI);
1015+ readMemprof (M, F, MemProfReader.get (), TLI, FullStackIdToAllocMatchInfo);
1016+ }
1017+
1018+ if (ClPrintMemProfMatchInfo) {
1019+ for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
1020+ errs () << " MemProf " << getAllocTypeAttributeString (Info.AllocType )
1021+ << " context with id " << Id << " has total profiled size "
1022+ << Info.TotalSize << (Info.Matched ? " is" : " not" )
1023+ << " matched\n " ;
9511024 }
9521025
9531026 return PreservedAnalyses::none ();
0 commit comments