Skip to content

Commit b03439c

Browse files
authored
convert algorithms to SCC (#47866)
These places in the code can either be more efficient O(1) or more correct using something more similar to the published SCC algorithm by Tarjan for strongly connected components.
1 parent 4ff6288 commit b03439c

File tree

3 files changed

+111
-163
lines changed

3 files changed

+111
-163
lines changed

src/gf.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3377,6 +3377,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
33773377
}
33783378
}
33793379
// then we'll merge those numbers to assign each item in the group the same number
3380+
// (similar to Kosaraju's SCC algorithm?)
33803381
uint32_t groupid = 0;
33813382
uint32_t grouphi = 0;
33823383
for (i = 0; i < len; i++) {

src/jitlayers.cpp

Lines changed: 52 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,11 @@ void jl_dump_llvm_opt_impl(void *s)
136136
**jl_ExecutionEngine->get_dump_llvm_opt_stream() = (JL_STREAM*)s;
137137
}
138138

139-
static void jl_add_to_ee(orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports);
139+
static int jl_add_to_ee(
140+
orc::ThreadSafeModule &M,
141+
const StringMap<orc::ThreadSafeModule*> &NewExports,
142+
DenseMap<orc::ThreadSafeModule*, int> &Queued,
143+
std::vector<orc::ThreadSafeModule*> &Stack);
140144
static void jl_decorate_module(Module &M);
141145
static uint64_t getAddressForFunction(StringRef fname);
142146

@@ -228,10 +232,13 @@ static jl_callptr_t _jl_compile_codeinst(
228232
}
229233
}
230234
}
235+
DenseMap<orc::ThreadSafeModule*, int> Queued;
236+
std::vector<orc::ThreadSafeModule*> Stack;
231237
for (auto &def : emitted) {
232238
// Add the results to the execution engine now
233239
orc::ThreadSafeModule &M = std::get<0>(def.second);
234-
jl_add_to_ee(M, NewExports);
240+
jl_add_to_ee(M, NewExports, Queued, Stack);
241+
assert(Queued.empty() && Stack.empty() && !M);
235242
}
236243
++CompiledCodeinsts;
237244
MaxWorkqueueSize.updateMax(emitted.size());
@@ -1704,76 +1711,72 @@ static void jl_decorate_module(Module &M) {
17041711
#endif
17051712
}
17061713

1714+
// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
17071715
static int jl_add_to_ee(
17081716
orc::ThreadSafeModule &M,
1709-
StringMap<orc::ThreadSafeModule*> &NewExports,
1717+
const StringMap<orc::ThreadSafeModule*> &NewExports,
17101718
DenseMap<orc::ThreadSafeModule*, int> &Queued,
1711-
std::vector<std::vector<orc::ThreadSafeModule*>> &ToMerge,
1712-
int depth)
1719+
std::vector<orc::ThreadSafeModule*> &Stack)
17131720
{
1714-
// DAG-sort (post-dominator) the compile to compute the minimum
1715-
// merge-module sets for linkage
1721+
// First check if the TSM is empty (already compiled)
17161722
if (!M)
17171723
return 0;
1718-
// First check and record if it's on the stack somewhere
1724+
// Next check and record if it is on the stack somewhere
17191725
{
1720-
auto &Cycle = Queued[&M];
1721-
if (Cycle)
1722-
return Cycle;
1723-
ToMerge.push_back({});
1724-
Cycle = depth;
1726+
auto &Id = Queued[&M];
1727+
if (Id)
1728+
return Id;
1729+
Stack.push_back(&M);
1730+
Id = Stack.size();
17251731
}
1732+
// Finally work out the SCC
1733+
int depth = Stack.size();
17261734
int MergeUp = depth;
1727-
// Compute the cycle-id
1735+
std::vector<orc::ThreadSafeModule*> Children;
17281736
M.withModuleDo([&](Module &m) {
17291737
for (auto &F : m.global_objects()) {
17301738
if (F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
17311739
auto Callee = NewExports.find(F.getName());
17321740
if (Callee != NewExports.end()) {
1733-
auto &CM = Callee->second;
1734-
int Down = jl_add_to_ee(*CM, NewExports, Queued, ToMerge, depth + 1);
1735-
assert(Down <= depth);
1736-
if (Down && Down < MergeUp)
1737-
MergeUp = Down;
1741+
auto *CM = Callee->second;
1742+
if (*CM && CM != &M) {
1743+
auto Down = Queued.find(CM);
1744+
if (Down != Queued.end())
1745+
MergeUp = std::min(MergeUp, Down->second);
1746+
else
1747+
Children.push_back(CM);
1748+
}
17381749
}
17391750
}
17401751
}
17411752
});
1742-
if (MergeUp == depth) {
1743-
// Not in a cycle (or at the top of it)
1744-
Queued.erase(&M);
1745-
for (auto &CM : ToMerge.at(depth - 1)) {
1746-
assert(Queued.find(CM)->second == depth);
1747-
Queued.erase(CM);
1748-
jl_merge_module(M, std::move(*CM));
1749-
}
1750-
jl_ExecutionEngine->addModule(std::move(M));
1751-
MergeUp = 0;
1753+
assert(MergeUp > 0);
1754+
for (auto *CM : Children) {
1755+
int Down = jl_add_to_ee(*CM, NewExports, Queued, Stack);
1756+
assert(Down <= (int)Stack.size());
1757+
if (Down)
1758+
MergeUp = std::min(MergeUp, Down);
17521759
}
1753-
else {
1754-
// Add our frame(s) to the top of the cycle
1755-
Queued[&M] = MergeUp;
1756-
auto &Top = ToMerge.at(MergeUp - 1);
1757-
Top.push_back(&M);
1758-
for (auto &CM : ToMerge.at(depth - 1)) {
1759-
assert(Queued.find(CM)->second == depth);
1760-
Queued[CM] = MergeUp;
1761-
Top.push_back(CM);
1760+
if (MergeUp < depth)
1761+
return MergeUp;
1762+
while (1) {
1763+
// Not in a cycle (or at the top of it)
1764+
// remove SCC state and merge every CM from the cycle into M
1765+
orc::ThreadSafeModule *CM = Stack.back();
1766+
auto it = Queued.find(CM);
1767+
assert(it->second == (int)Stack.size());
1768+
Queued.erase(it);
1769+
Stack.pop_back();
1770+
if ((int)Stack.size() < depth) {
1771+
assert(&M == CM);
1772+
break;
17621773
}
1774+
jl_merge_module(M, std::move(*CM));
17631775
}
1764-
ToMerge.pop_back();
1765-
return MergeUp;
1766-
}
1767-
1768-
static void jl_add_to_ee(orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports)
1769-
{
1770-
DenseMap<orc::ThreadSafeModule*, int> Queued;
1771-
std::vector<std::vector<orc::ThreadSafeModule*>> ToMerge;
1772-
jl_add_to_ee(M, NewExports, Queued, ToMerge, 1);
1773-
assert(!M);
1776+
jl_ExecutionEngine->addModule(std::move(M));
1777+
return 0;
17741778
}
17751779

1776-
17771780
static uint64_t getAddressForFunction(StringRef fname)
17781781
{
17791782
auto addr = jl_ExecutionEngine->getFunctionAddress(fname);

0 commit comments

Comments
 (0)