Skip to content

Commit 9e745a3

Browse files
committed
rewriter: directly embed additional ref uses
1 parent 5d22247 commit 9e745a3

File tree

6 files changed

+166
-170
lines changed

6 files changed

+166
-170
lines changed

src/asm_writing/rewriter.cpp

Lines changed: 63 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -917,55 +917,75 @@ RewriterVar* Rewriter::loadConst(int64_t val, Location dest) {
917917
}
918918

919919
RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, llvm::ArrayRef<RewriterVar*> args,
920-
llvm::ArrayRef<RewriterVar*> args_xmm) {
920+
llvm::ArrayRef<RewriterVar*> args_xmm, llvm::ArrayRef<RewriterVar*> additional_uses) {
921921
STAT_TIMER(t0, "us_timer_rewriter", 10);
922922
RewriterVar* result = createNewVar();
923-
RewriterVar::SmallVector uses;
924-
for (RewriterVar* v : args) {
925-
assert(v != NULL);
926-
uses.push_back(v);
927-
}
928-
for (RewriterVar* v : args_xmm) {
929-
assert(v != NULL);
930-
uses.push_back(v);
931-
}
932923

933924
ActionType type;
934925
if (has_side_effects)
935926
type = ActionType::MUTATION;
936927
else
937928
type = ActionType::NORMAL;
938929

939-
// It's not nice to pass llvm::SmallVectors through a closure, especially with our SmallFunction
940-
// optimization, so just regionAlloc them and copy the data in:
941-
RewriterVar** _args = (RewriterVar**)this->regionAlloc(sizeof(RewriterVar*) * args.size());
942-
memcpy(_args, args.begin(), sizeof(RewriterVar*) * args.size());
943-
RewriterVar** _args_xmm = (RewriterVar**)this->regionAlloc(sizeof(RewriterVar*) * args_xmm.size());
944-
memcpy(_args_xmm, args_xmm.begin(), sizeof(RewriterVar*) * args_xmm.size());
945-
946-
int args_size = args.size();
947-
assert(args_xmm.size() <= 0x7fff);
948-
// Hack: pack this into a short to make sure it fits in the closure
949-
short xmm_args_size = args_xmm.size();
950930

951931
// TODO: we don't need to generate the decref info for calls which can't throw
952932
bool can_throw = true;
933+
auto args_array_ref = regionAllocArgs(args, args_xmm, additional_uses);
953934

954935
// Hack: explicitly order the closure arguments so they pad nicer
955-
addAction([args_size, xmm_args_size, has_side_effects, can_throw, this, result, func_addr, _args, _args_xmm]() {
956-
this->_call(result, has_side_effects, can_throw, func_addr, llvm::ArrayRef<RewriterVar*>(_args, args_size),
957-
llvm::ArrayRef<RewriterVar*>(_args_xmm, xmm_args_size));
958-
for (int i = 0; i < args_size; i++)
959-
_args[i]->bumpUse();
960-
for (int i = 0; i < xmm_args_size; i++)
961-
_args_xmm[i]->bumpUse();
962-
}, uses, type);
936+
struct LambdaClosure {
937+
RewriterVar** args_array;
938+
939+
struct {
940+
unsigned int has_side_effects : 1;
941+
unsigned int can_throw : 1;
942+
unsigned int num_args : 16;
943+
unsigned int num_args_xmm : 16;
944+
unsigned int num_additional_uses : 16;
945+
};
946+
947+
llvm::ArrayRef<RewriterVar*> allArgs() const {
948+
return llvm::makeArrayRef(args_array, num_args + num_args_xmm + num_additional_uses);
949+
}
950+
951+
llvm::ArrayRef<RewriterVar*> args() const { return allArgs().slice(0, num_args); }
952+
953+
llvm::ArrayRef<RewriterVar*> argsXmm() const { return allArgs().slice(num_args, num_args_xmm); }
954+
955+
llvm::ArrayRef<RewriterVar*> additionalUses() const {
956+
return allArgs().slice((int)num_args + (int)num_args_xmm, num_additional_uses);
957+
}
958+
959+
LambdaClosure(llvm::MutableArrayRef<RewriterVar*> args_array_ref, llvm::ArrayRef<RewriterVar*> _args,
960+
llvm::ArrayRef<RewriterVar*> _args_xmm, llvm::ArrayRef<RewriterVar*> _addition_uses,
961+
bool has_side_effects, bool can_throw)
962+
: args_array(args_array_ref.data()),
963+
has_side_effects(has_side_effects),
964+
can_throw(can_throw),
965+
num_args(_args.size()),
966+
num_args_xmm(_args_xmm.size()),
967+
num_additional_uses(_addition_uses.size()) {
968+
assert(_args.size() < 1 << 16);
969+
assert(_args_xmm.size() < 1 << 16);
970+
assert(_addition_uses.size() < 1 << 16);
971+
}
972+
973+
} lambda_closure(args_array_ref, args, args_xmm, additional_uses, has_side_effects, can_throw);
974+
assert(lambda_closure.args().size() == args.size());
975+
assert(lambda_closure.argsXmm().size() == args_xmm.size());
976+
assert(lambda_closure.additionalUses().size() == additional_uses.size());
977+
978+
addAction([this, result, func_addr, lambda_closure]() {
979+
this->_call(result, lambda_closure.has_side_effects, lambda_closure.can_throw, func_addr, lambda_closure.args(),
980+
lambda_closure.argsXmm(), lambda_closure.allArgs());
981+
}, lambda_closure.allArgs(), type);
963982

964983
return result;
965984
}
966985

967986
void Rewriter::_setupCall(bool has_side_effects, llvm::ArrayRef<RewriterVar*> args,
968-
llvm::ArrayRef<RewriterVar*> args_xmm, Location preserve) {
987+
llvm::ArrayRef<RewriterVar*> args_xmm, Location preserve,
988+
llvm::ArrayRef<RewriterVar*> stuff_to_bump) {
969989
if (has_side_effects)
970990
assert(done_guarding);
971991

@@ -1052,6 +1072,10 @@ void Rewriter::_setupCall(bool has_side_effects, llvm::ArrayRef<RewriterVar*> ar
10521072
}
10531073
#endif
10541074

1075+
for (auto&& use : stuff_to_bump) {
1076+
use->bumpUseEarlyIfPossible();
1077+
}
1078+
10551079
// Spill caller-saved registers:
10561080
for (auto check_reg : caller_save_registers) {
10571081
// check_reg.dump();
@@ -1114,7 +1138,8 @@ void Rewriter::_setupCall(bool has_side_effects, llvm::ArrayRef<RewriterVar*> ar
11141138
}
11151139

11161140
void Rewriter::_call(RewriterVar* result, bool has_side_effects, bool can_throw, void* func_addr,
1117-
llvm::ArrayRef<RewriterVar*> args, llvm::ArrayRef<RewriterVar*> args_xmm) {
1141+
llvm::ArrayRef<RewriterVar*> args, llvm::ArrayRef<RewriterVar*> args_xmm,
1142+
llvm::ArrayRef<RewriterVar*> stuff_to_bump) {
11181143
if (LOG_IC_ASSEMBLY)
11191144
assembler->comment("_call");
11201145

@@ -1123,17 +1148,7 @@ void Rewriter::_call(RewriterVar* result, bool has_side_effects, bool can_throw,
11231148
if (failed)
11241149
return;
11251150

1126-
_setupCall(has_side_effects, args, args_xmm, assembler::R11);
1127-
1128-
// This duty is now on the caller:
1129-
/*
1130-
for (RewriterVar* arg : args) {
1131-
arg->bumpUse();
1132-
}
1133-
for (RewriterVar* arg_xmm : args_xmm) {
1134-
arg_xmm->bumpUse();
1135-
}
1136-
*/
1151+
_setupCall(has_side_effects, args, args_xmm, assembler::R11, stuff_to_bump);
11371152

11381153
assertConsistent();
11391154

@@ -1164,6 +1179,10 @@ void Rewriter::_call(RewriterVar* result, bool has_side_effects, bool can_throw,
11641179

11651180
if (result)
11661181
result->releaseIfNoUses();
1182+
1183+
for (RewriterVar* arg : stuff_to_bump) {
1184+
arg->bumpUseLateIfNecessary();
1185+
}
11671186
}
11681187

11691188
std::vector<Location> Rewriter::getDecrefLocations() {
@@ -1286,12 +1305,7 @@ void RewriterVar::refConsumed(RewriterAction* action) {
12861305
last_refconsumed_numuses = uses.size();
12871306
if (!action)
12881307
action = rewriter->getLastAction();
1289-
action->consumed_refs.emplace_back(this);
1290-
}
1291-
1292-
void RewriterVar::refUsed() {
1293-
// TODO: This is a pretty silly implementation that might prevent other optimizations?
1294-
rewriter->addAction([=]() { this->bumpUse(); }, { this }, ActionType::NORMAL);
1308+
action->consumed_refs.push_front(this);
12951309
}
12961310

12971311
bool RewriterVar::needsDecref() {
@@ -1893,7 +1907,7 @@ void Rewriter::_checkAndThrowCAPIException(RewriterVar* r, int64_t exc_val, asse
18931907
} else
18941908
assembler->cmp(var_reg, assembler::Immediate(exc_val), type);
18951909

1896-
_setupCall(false, RewriterVar::SmallVector(), RewriterVar::SmallVector());
1910+
_setupCall(false, {});
18971911
{
18981912
assembler::ForwardJump jnz(*assembler, assembler::COND_NOT_ZERO);
18991913
assembler->mov(assembler::Immediate((void*)throwCAPIException), assembler::R11);

src/asm_writing/rewriter.h

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define PYSTON_ASMWRITING_REWRITER_H
1717

1818
#include <deque>
19+
#include <forward_list>
1920
#include <list>
2021
#include <map>
2122
#include <memory>
@@ -192,8 +193,6 @@ class RewriterVar {
192193
// if no action is specified it will assume the last action consumed the reference
193194
void refConsumed(RewriterAction* action = NULL);
194195

195-
void refUsed();
196-
197196
// registerOwnedAttr tells the refcounter that a certain memory location holds a pointer
198197
// to an owned reference. This must be paired with a call to deregisterOwnedAttr
199198
// Call these right before emitting the store (for register) or decref (for deregister).
@@ -237,11 +236,11 @@ class RewriterVar {
237236
// /* some code */
238237
// bumpUseLateIfNecessary();
239238
void bumpUseEarlyIfPossible() {
240-
if (reftype != RefType::OWNED)
239+
if (reftype != RefType::OWNED && !hasScratchAllocation())
241240
bumpUse();
242241
}
243242
void bumpUseLateIfNecessary() {
244-
if (reftype == RefType::OWNED)
243+
if (reftype == RefType::OWNED || hasScratchAllocation())
245244
bumpUse();
246245
}
247246

@@ -339,8 +338,9 @@ template <int N = 24> class SmallFunction {
339338

340339
class RewriterAction {
341340
public:
342-
SmallFunction<56> action;
343-
std::vector<RewriterVar*> consumed_refs;
341+
SmallFunction<48> action;
342+
std::forward_list<RewriterVar*> consumed_refs;
343+
344344

345345
template <typename F> RewriterAction(F&& action) : action(std::forward<F>(action)) {}
346346

@@ -367,7 +367,33 @@ class Rewriter : public ICSlotRewrite::CommitHook {
367367

368368
protected:
369369
// Allocates `bytes` bytes of data. The allocation will get freed when the rewriter gets freed.
370-
void* regionAlloc(size_t bytes) { return allocator.Allocate(bytes, 16 /* alignment */); }
370+
void* regionAlloc(size_t bytes, int alignment = 16) { return allocator.Allocate(bytes, alignment); }
371+
template <typename T> llvm::MutableArrayRef<T> regionAlloc(size_t num_elements) {
372+
return llvm::MutableArrayRef<T>(allocator.Allocate<T>(num_elements), num_elements);
373+
}
374+
375+
// This takes a variable number of llvm::ArrayRef<RewriterVar*> and copies in all elements into a single contiguous
376+
// memory location.
377+
template <typename... Args>
378+
llvm::MutableArrayRef<RewriterVar*> regionAllocArgs(llvm::ArrayRef<RewriterVar*> arg1, Args... args) {
379+
size_t num_total_args = 0;
380+
for (auto&& array : { arg1, args... }) {
381+
num_total_args += array.size();
382+
}
383+
if (num_total_args == 0)
384+
return llvm::MutableArrayRef<RewriterVar*>();
385+
386+
auto args_array_ref = regionAlloc<RewriterVar*>(num_total_args);
387+
auto insert_point = args_array_ref;
388+
for (auto&& array : { arg1, args... }) {
389+
if (!array.empty()) {
390+
memcpy(insert_point.data(), array.data(), array.size() * sizeof(RewriterVar*));
391+
insert_point = insert_point.slice(array.size());
392+
}
393+
}
394+
assert(insert_point.size() == 0);
395+
return args_array_ref;
396+
}
371397

372398
// Helps generating the best code for loading a const integer value.
373399
// By keeping track of the last known value of every register and reusing it.
@@ -509,11 +535,13 @@ class Rewriter : public ICSlotRewrite::CommitHook {
509535
void _slowpathJump(bool condition_eq);
510536
void _trap();
511537
void _loadConst(RewriterVar* result, int64_t val);
512-
void _setupCall(bool has_side_effects, llvm::ArrayRef<RewriterVar*> args, llvm::ArrayRef<RewriterVar*> args_xmm,
513-
Location preserve = Location::any());
538+
void _setupCall(bool has_side_effects, llvm::ArrayRef<RewriterVar*> args = {},
539+
llvm::ArrayRef<RewriterVar*> args_xmm = {}, Location preserve = Location::any(),
540+
llvm::ArrayRef<RewriterVar*> stuff_to_bump = {});
514541
// _call does not call bumpUse on its arguments:
515542
void _call(RewriterVar* result, bool has_side_effects, bool can_throw, void* func_addr,
516-
llvm::ArrayRef<RewriterVar*> args, llvm::ArrayRef<RewriterVar*> args_xmm = {});
543+
llvm::ArrayRef<RewriterVar*> args, llvm::ArrayRef<RewriterVar*> args_xmm = {},
544+
llvm::ArrayRef<RewriterVar*> stuff_to_bump = {});
517545
void _add(RewriterVar* result, RewriterVar* a, int64_t b, Location dest);
518546
int _allocate(RewriterVar* result, int n);
519547
void _allocateAndCopy(RewriterVar* result, RewriterVar* array, int n);
@@ -612,7 +640,7 @@ class Rewriter : public ICSlotRewrite::CommitHook {
612640
// inline cache. (Extra allocations don't count even though they're potentially visible if you look
613641
// hard enough.)
614642
RewriterVar* call(bool has_side_effects, void* func_addr, llvm::ArrayRef<RewriterVar*> args = {},
615-
llvm::ArrayRef<RewriterVar*> args_xmm = {});
643+
llvm::ArrayRef<RewriterVar*> args_xmm = {}, llvm::ArrayRef<RewriterVar*> additional_uses = {});
616644
template <typename... Args>
617645
RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg1, Args... args) {
618646
return call(has_side_effects, func_addr, llvm::ArrayRef<RewriterVar*>({ arg1, args... }), {});

src/codegen/ast_interpreter.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,12 +1172,9 @@ Value ASTInterpreter::createFunction(AST* node, AST_arguments* args, const std::
11721172
closure_var = jit->imm(0ul);
11731173
if (!passed_globals_var)
11741174
passed_globals_var = jit->imm(0ul);
1175-
rtn.var = jit->call(false, (void*)createFunctionFromMetadata, jit->imm(md), closure_var, passed_globals_var,
1176-
defaults_var, jit->imm(args->defaults.size()))->setType(RefType::OWNED);
1177-
1178-
for (auto d_var : defaults_vars) {
1179-
d_var->refUsed();
1180-
}
1175+
rtn.var = jit->call(false, (void*)createFunctionFromMetadata, { jit->imm(md), closure_var, passed_globals_var,
1176+
defaults_var, jit->imm(args->defaults.size()) },
1177+
{}, defaults_vars)->setType(RefType::OWNED);
11811178
}
11821179

11831180
rtn.o = createFunctionFromMetadata(md, closure, passed_globals, u.il);

0 commit comments

Comments
 (0)