@@ -136,7 +136,11 @@ void jl_dump_llvm_opt_impl(void *s)
136136    **jl_ExecutionEngine->get_dump_llvm_opt_stream () = (JL_STREAM*)s;
137137}
138138
139- static  void  jl_add_to_ee (orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports);
139+ static  int  jl_add_to_ee (
140+         orc::ThreadSafeModule &M,
141+         const  StringMap<orc::ThreadSafeModule*> &NewExports,
142+         DenseMap<orc::ThreadSafeModule*, int > &Queued,
143+         std::vector<orc::ThreadSafeModule*> &Stack);
140144static  void  jl_decorate_module (Module &M);
141145static  uint64_t  getAddressForFunction (StringRef fname);
142146
@@ -228,10 +232,13 @@ static jl_callptr_t _jl_compile_codeinst(
228232                }
229233            }
230234        }
235+         DenseMap<orc::ThreadSafeModule*, int > Queued;
236+         std::vector<orc::ThreadSafeModule*> Stack;
231237        for  (auto  &def : emitted) {
232238            //  Add the results to the execution engine now
233239            orc::ThreadSafeModule &M = std::get<0 >(def.second );
234-             jl_add_to_ee (M, NewExports);
240+             jl_add_to_ee (M, NewExports, Queued, Stack);
241+             assert (Queued.empty () && Stack.empty () && !M);
235242        }
236243        ++CompiledCodeinsts;
237244        MaxWorkqueueSize.updateMax (emitted.size ());
@@ -1704,76 +1711,72 @@ static void jl_decorate_module(Module &M) {
17041711#endif 
17051712}
17061713
1714+ //  Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
17071715static  int  jl_add_to_ee (
17081716        orc::ThreadSafeModule &M,
1709-         StringMap<orc::ThreadSafeModule*> &NewExports,
1717+         const   StringMap<orc::ThreadSafeModule*> &NewExports,
17101718        DenseMap<orc::ThreadSafeModule*, int > &Queued,
1711-         std::vector<std::vector<orc::ThreadSafeModule*>> &ToMerge,
1712-         int  depth)
1719+         std::vector<orc::ThreadSafeModule*> &Stack)
17131720{
1714-     //  DAG-sort (post-dominator) the compile to compute the minimum
1715-     //  merge-module sets for linkage
1721+     //  First check if the TSM is empty (already compiled)
17161722    if  (!M)
17171723        return  0 ;
1718-     //  First  check and record if it's  on the stack somewhere
1724+     //  Next  check and record if it is  on the stack somewhere
17191725    {
1720-         auto  &Cycle  = Queued[&M];
1721-         if  (Cycle )
1722-             return  Cycle ;
1723-         ToMerge .push_back ({} );
1724-         Cycle  = depth ;
1726+         auto  &Id  = Queued[&M];
1727+         if  (Id )
1728+             return  Id ;
1729+         Stack .push_back (&M );
1730+         Id  = Stack. size () ;
17251731    }
1732+     //  Finally work out the SCC
1733+     int  depth = Stack.size ();
17261734    int  MergeUp = depth;
1727-     //  Compute the cycle-id 
1735+     std::vector<orc::ThreadSafeModule*> Children; 
17281736    M.withModuleDo ([&](Module &m) {
17291737        for  (auto  &F : m.global_objects ()) {
17301738            if  (F.isDeclaration () && F.getLinkage () == GlobalValue::ExternalLinkage) {
17311739                auto  Callee = NewExports.find (F.getName ());
17321740                if  (Callee != NewExports.end ()) {
1733-                     auto  &CM = Callee->second ;
1734-                     int  Down = jl_add_to_ee (*CM, NewExports, Queued, ToMerge, depth + 1 );
1735-                     assert (Down <= depth);
1736-                     if  (Down && Down < MergeUp)
1737-                         MergeUp = Down;
1741+                     auto  *CM = Callee->second ;
1742+                     if  (*CM && CM != &M) {
1743+                         auto  Down = Queued.find (CM);
1744+                         if  (Down != Queued.end ())
1745+                             MergeUp = std::min (MergeUp, Down->second );
1746+                         else 
1747+                             Children.push_back (CM);
1748+                     }
17381749                }
17391750            }
17401751        }
17411752    });
1742-     if  (MergeUp == depth) {
1743-         //  Not in a cycle (or at the top of it)
1744-         Queued.erase (&M);
1745-         for  (auto  &CM : ToMerge.at (depth - 1 )) {
1746-             assert (Queued.find (CM)->second  == depth);
1747-             Queued.erase (CM);
1748-             jl_merge_module (M, std::move (*CM));
1749-         }
1750-         jl_ExecutionEngine->addModule (std::move (M));
1751-         MergeUp = 0 ;
1753+     assert (MergeUp > 0 );
1754+     for  (auto  *CM : Children) {
1755+         int  Down = jl_add_to_ee (*CM, NewExports, Queued, Stack);
1756+         assert (Down <= (int )Stack.size ());
1757+         if  (Down)
1758+             MergeUp = std::min (MergeUp, Down);
17521759    }
1753-     else  {
1754-         //  Add our frame(s) to the top of the cycle
1755-         Queued[&M] = MergeUp;
1756-         auto  &Top = ToMerge.at (MergeUp - 1 );
1757-         Top.push_back (&M);
1758-         for  (auto  &CM : ToMerge.at (depth - 1 )) {
1759-             assert (Queued.find (CM)->second  == depth);
1760-             Queued[CM] = MergeUp;
1761-             Top.push_back (CM);
1760+     if  (MergeUp < depth)
1761+         return  MergeUp;
1762+     while  (1 ) {
1763+         //  Not in a cycle (or at the top of it)
1764+         //  remove SCC state and merge every CM from the cycle into M
1765+         orc::ThreadSafeModule *CM = Stack.back ();
1766+         auto  it = Queued.find (CM);
1767+         assert (it->second  == (int )Stack.size ());
1768+         Queued.erase (it);
1769+         Stack.pop_back ();
1770+         if  ((int )Stack.size () < depth) {
1771+             assert (&M == CM);
1772+             break ;
17621773        }
1774+         jl_merge_module (M, std::move (*CM));
17631775    }
1764-     ToMerge.pop_back ();
1765-     return  MergeUp;
1766- }
1767- 
1768- static  void  jl_add_to_ee (orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports)
1769- {
1770-     DenseMap<orc::ThreadSafeModule*, int > Queued;
1771-     std::vector<std::vector<orc::ThreadSafeModule*>> ToMerge;
1772-     jl_add_to_ee (M, NewExports, Queued, ToMerge, 1 );
1773-     assert (!M);
1776+     jl_ExecutionEngine->addModule (std::move (M));
1777+     return  0 ;
17741778}
17751779
1776- 
17771780static  uint64_t  getAddressForFunction (StringRef fname)
17781781{
17791782    auto  addr = jl_ExecutionEngine->getFunctionAddress (fname);
0 commit comments