@@ -136,7 +136,11 @@ void jl_dump_llvm_opt_impl(void *s)
136136 **jl_ExecutionEngine->get_dump_llvm_opt_stream () = (JL_STREAM*)s;
137137}
138138
139- static void jl_add_to_ee (orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports);
139+ static int jl_add_to_ee (
140+ orc::ThreadSafeModule &M,
141+ const StringMap<orc::ThreadSafeModule*> &NewExports,
142+ DenseMap<orc::ThreadSafeModule*, int > &Queued,
143+ std::vector<orc::ThreadSafeModule*> &Stack);
140144static void jl_decorate_module (Module &M);
141145static uint64_t getAddressForFunction (StringRef fname);
142146
@@ -228,10 +232,13 @@ static jl_callptr_t _jl_compile_codeinst(
228232 }
229233 }
230234 }
235+ DenseMap<orc::ThreadSafeModule*, int > Queued;
236+ std::vector<orc::ThreadSafeModule*> Stack;
231237 for (auto &def : emitted) {
232238 // Add the results to the execution engine now
233239 orc::ThreadSafeModule &M = std::get<0 >(def.second );
234- jl_add_to_ee (M, NewExports);
240+ jl_add_to_ee (M, NewExports, Queued, Stack);
241+ assert (Queued.empty () && Stack.empty () && !M);
235242 }
236243 ++CompiledCodeinsts;
237244 MaxWorkqueueSize.updateMax (emitted.size ());
@@ -1700,76 +1707,70 @@ static void jl_decorate_module(Module &M) {
17001707#endif
17011708}
17021709
1710+ // Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
17031711static int jl_add_to_ee (
17041712 orc::ThreadSafeModule &M,
1705- StringMap<orc::ThreadSafeModule*> &NewExports,
1713+ const StringMap<orc::ThreadSafeModule*> &NewExports,
17061714 DenseMap<orc::ThreadSafeModule*, int > &Queued,
1707- std::vector<std::vector<orc::ThreadSafeModule*>> &ToMerge,
1708- int depth)
1715+ std::vector<orc::ThreadSafeModule*> &Stack)
17091716{
1710- // DAG-sort (post-dominator) the compile to compute the minimum
1711- // merge-module sets for linkage
1717+ // First check if the TSM is empty (already compiled)
17121718 if (!M)
17131719 return 0 ;
1714- // First check and record if it's on the stack somewhere
1720+ // Next check and record if it is on the stack somewhere
17151721 {
1716- auto &Cycle = Queued[&M];
1717- if (Cycle )
1718- return Cycle ;
1719- ToMerge .push_back ({} );
1720- Cycle = depth ;
1722+ auto &Id = Queued[&M];
1723+ if (Id )
1724+ return Id ;
1725+ Stack .push_back (&M );
1726+ Id = Stack. size () ;
17211727 }
1728+ // Finally work out the SCC
1729+ int depth = Stack.size ();
17221730 int MergeUp = depth;
1723- // Compute the cycle-id
1731+ std::vector<orc::ThreadSafeModule*> Children;
17241732 M.withModuleDo ([&](Module &m) {
17251733 for (auto &F : m.global_objects ()) {
17261734 if (F.isDeclaration () && F.getLinkage () == GlobalValue::ExternalLinkage) {
17271735 auto Callee = NewExports.find (F.getName ());
17281736 if (Callee != NewExports.end ()) {
1729- auto &CM = Callee->second ;
1730- int Down = jl_add_to_ee (*CM, NewExports, Queued, ToMerge, depth + 1 );
1731- assert (Down <= depth);
1732- if (Down && Down < MergeUp)
1733- MergeUp = Down;
1737+ auto *CM = Callee->second ;
1738+ if (*CM && CM != &M) {
1739+ auto Down = Queued.find (CM);
1740+ if (Down != Queued.end ())
1741+ MergeUp = std::min (MergeUp, Down->second );
1742+ else
1743+ Children.push_back (CM);
1744+ }
17341745 }
17351746 }
17361747 }
17371748 });
1738- if (MergeUp == depth) {
1739- // Not in a cycle (or at the top of it)
1740- Queued.erase (&M);
1741- for (auto &CM : ToMerge.at (depth - 1 )) {
1742- assert (Queued.find (CM)->second == depth);
1743- Queued.erase (CM);
1744- jl_merge_module (M, std::move (*CM));
1745- }
1746- jl_ExecutionEngine->addModule (std::move (M));
1747- MergeUp = 0 ;
1749+ assert (MergeUp > 0 );
1750+ for (auto *CM : Children) {
1751+ int Down = jl_add_to_ee (*CM, NewExports, Queued, Stack);
1752+ assert (Down <= (int )Stack.size ());
1753+ if (Down)
1754+ MergeUp = std::min (MergeUp, Down);
17481755 }
1749- else {
1750- // Add our frame(s) to the top of the cycle
1751- Queued[&M] = MergeUp;
1752- auto &Top = ToMerge.at (MergeUp - 1 );
1753- Top.push_back (&M);
1754- for (auto &CM : ToMerge.at (depth - 1 )) {
1755- assert (Queued.find (CM)->second == depth);
1756- Queued[CM] = MergeUp;
1757- Top.push_back (CM);
1756+ if (MergeUp < depth)
1757+ return MergeUp;
1758+ while (1 ) {
1759+ // Not in a cycle (or at the top of it)
1760+ // remove SCC state and merge every CM from the cycle into M
1761+ orc::ThreadSafeModule *CM = Stack.back ();
1762+ Stack.pop_back ();
1763+ Queued.erase (CM);
1764+ if ((int )Stack.size () < depth) {
1765+ assert (&M == CM);
1766+ break ;
17581767 }
1768+ jl_merge_module (M, std::move (*CM));
17591769 }
1760- ToMerge.pop_back ();
1761- return MergeUp;
1762- }
1763-
1764- static void jl_add_to_ee (orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports)
1765- {
1766- DenseMap<orc::ThreadSafeModule*, int > Queued;
1767- std::vector<std::vector<orc::ThreadSafeModule*>> ToMerge;
1768- jl_add_to_ee (M, NewExports, Queued, ToMerge, 1 );
1769- assert (!M);
1770+ jl_ExecutionEngine->addModule (std::move (M));
1771+ return 0 ;
17701772}
17711773
1772-
17731774static uint64_t getAddressForFunction (StringRef fname)
17741775{
17751776 auto addr = jl_ExecutionEngine->getFunctionAddress (fname);
0 commit comments