Skip to content

Commit a4da5e1

Browse files
committed
convert algorithms to SCC
These places in the code can either be more efficient O(1) or more correct using something more similar to the published SCC algorithm by Tarjan for strongly connected components.
1 parent 0457fde commit a4da5e1

File tree

3 files changed

+110
-163
lines changed

3 files changed

+110
-163
lines changed

src/gf.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3255,6 +3255,7 @@ static jl_value_t *ml_matches(jl_methtable_t *mt,
32553255
}
32563256
}
32573257
// then we'll merge those numbers to assign each item in the group the same number
3258+
// (similar to Kosaraju's SCC algorithm?)
32583259
uint32_t groupid = 0;
32593260
uint32_t grouphi = 0;
32603261
for (i = 0; i < len; i++) {

src/jitlayers.cpp

Lines changed: 50 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,11 @@ void jl_dump_llvm_opt_impl(void *s)
136136
**jl_ExecutionEngine->get_dump_llvm_opt_stream() = (JL_STREAM*)s;
137137
}
138138

139-
static void jl_add_to_ee(orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports);
139+
static int jl_add_to_ee(
140+
orc::ThreadSafeModule &M,
141+
const StringMap<orc::ThreadSafeModule*> &NewExports,
142+
DenseMap<orc::ThreadSafeModule*, int> &Queued,
143+
std::vector<orc::ThreadSafeModule*> &Stack);
140144
static void jl_decorate_module(Module &M);
141145
static uint64_t getAddressForFunction(StringRef fname);
142146

@@ -228,10 +232,13 @@ static jl_callptr_t _jl_compile_codeinst(
228232
}
229233
}
230234
}
235+
DenseMap<orc::ThreadSafeModule*, int> Queued;
236+
std::vector<orc::ThreadSafeModule*> Stack;
231237
for (auto &def : emitted) {
232238
// Add the results to the execution engine now
233239
orc::ThreadSafeModule &M = std::get<0>(def.second);
234-
jl_add_to_ee(M, NewExports);
240+
jl_add_to_ee(M, NewExports, Queued, Stack);
241+
assert(Queued.empty() && Stack.empty() && !M);
235242
}
236243
++CompiledCodeinsts;
237244
MaxWorkqueueSize.updateMax(emitted.size());
@@ -1700,76 +1707,70 @@ static void jl_decorate_module(Module &M) {
17001707
#endif
17011708
}
17021709

1710+
// Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
17031711
static int jl_add_to_ee(
17041712
orc::ThreadSafeModule &M,
1705-
StringMap<orc::ThreadSafeModule*> &NewExports,
1713+
const StringMap<orc::ThreadSafeModule*> &NewExports,
17061714
DenseMap<orc::ThreadSafeModule*, int> &Queued,
1707-
std::vector<std::vector<orc::ThreadSafeModule*>> &ToMerge,
1708-
int depth)
1715+
std::vector<orc::ThreadSafeModule*> &Stack)
17091716
{
1710-
// DAG-sort (post-dominator) the compile to compute the minimum
1711-
// merge-module sets for linkage
1717+
// First check if the TSM is empty (already compiled)
17121718
if (!M)
17131719
return 0;
1714-
// First check and record if it's on the stack somewhere
1720+
// Next check and record if it is on the stack somewhere
17151721
{
1716-
auto &Cycle = Queued[&M];
1717-
if (Cycle)
1718-
return Cycle;
1719-
ToMerge.push_back({});
1720-
Cycle = depth;
1722+
auto &Id = Queued[&M];
1723+
if (Id)
1724+
return Id;
1725+
Stack.push_back(&M);
1726+
Id = Stack.size();
17211727
}
1728+
// Finally work out the SCC
1729+
int depth = Stack.size();
17221730
int MergeUp = depth;
1723-
// Compute the cycle-id
1731+
std::vector<orc::ThreadSafeModule*> Children;
17241732
M.withModuleDo([&](Module &m) {
17251733
for (auto &F : m.global_objects()) {
17261734
if (F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
17271735
auto Callee = NewExports.find(F.getName());
17281736
if (Callee != NewExports.end()) {
1729-
auto &CM = Callee->second;
1730-
int Down = jl_add_to_ee(*CM, NewExports, Queued, ToMerge, depth + 1);
1731-
assert(Down <= depth);
1732-
if (Down && Down < MergeUp)
1733-
MergeUp = Down;
1737+
auto *CM = Callee->second;
1738+
if (*CM && CM != &M) {
1739+
auto Down = Queued.find(CM);
1740+
if (Down != Queued.end())
1741+
MergeUp = std::min(MergeUp, Down->second);
1742+
else
1743+
Children.push_back(CM);
1744+
}
17341745
}
17351746
}
17361747
}
17371748
});
1738-
if (MergeUp == depth) {
1739-
// Not in a cycle (or at the top of it)
1740-
Queued.erase(&M);
1741-
for (auto &CM : ToMerge.at(depth - 1)) {
1742-
assert(Queued.find(CM)->second == depth);
1743-
Queued.erase(CM);
1744-
jl_merge_module(M, std::move(*CM));
1745-
}
1746-
jl_ExecutionEngine->addModule(std::move(M));
1747-
MergeUp = 0;
1749+
assert(MergeUp > 0);
1750+
for (auto *CM : Children) {
1751+
int Down = jl_add_to_ee(*CM, NewExports, Queued, Stack);
1752+
assert(Down <= (int)Stack.size());
1753+
if (Down)
1754+
MergeUp = std::min(MergeUp, Down);
17481755
}
1749-
else {
1750-
// Add our frame(s) to the top of the cycle
1751-
Queued[&M] = MergeUp;
1752-
auto &Top = ToMerge.at(MergeUp - 1);
1753-
Top.push_back(&M);
1754-
for (auto &CM : ToMerge.at(depth - 1)) {
1755-
assert(Queued.find(CM)->second == depth);
1756-
Queued[CM] = MergeUp;
1757-
Top.push_back(CM);
1756+
if (MergeUp < depth)
1757+
return MergeUp;
1758+
while (1) {
1759+
// Not in a cycle (or at the top of it)
1760+
// remove SCC state and merge every CM from the cycle into M
1761+
orc::ThreadSafeModule *CM = Stack.back();
1762+
Stack.pop_back();
1763+
Queued.erase(CM);
1764+
if ((int)Stack.size() < depth) {
1765+
assert(&M == CM);
1766+
break;
17581767
}
1768+
jl_merge_module(M, std::move(*CM));
17591769
}
1760-
ToMerge.pop_back();
1761-
return MergeUp;
1762-
}
1763-
1764-
static void jl_add_to_ee(orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports)
1765-
{
1766-
DenseMap<orc::ThreadSafeModule*, int> Queued;
1767-
std::vector<std::vector<orc::ThreadSafeModule*>> ToMerge;
1768-
jl_add_to_ee(M, NewExports, Queued, ToMerge, 1);
1769-
assert(!M);
1770+
jl_ExecutionEngine->addModule(std::move(M));
1771+
return 0;
17701772
}
17711773

1772-
17731774
static uint64_t getAddressForFunction(StringRef fname)
17741775
{
17751776
auto addr = jl_ExecutionEngine->getFunctionAddress(fname);

0 commit comments

Comments
 (0)