From ce69b813739fdab87ab30f9acb13d1c2b433b8fd Mon Sep 17 00:00:00 2001 From: Marius Wachtler Date: Thu, 26 May 2016 18:20:36 +0100 Subject: [PATCH 01/10] bjit: change PP sizes and some misc minor improvements --- src/codegen/ast_interpreter.cpp | 4 ++-- src/codegen/baseline_jit.cpp | 33 +++++++++++++++++++-------------- src/codegen/baseline_jit.h | 4 ++-- src/runtime/objmodel.cpp | 3 +++ 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/codegen/ast_interpreter.cpp b/src/codegen/ast_interpreter.cpp index b140f94b3..8dcb3bb3d 100644 --- a/src/codegen/ast_interpreter.cpp +++ b/src/codegen/ast_interpreter.cpp @@ -458,7 +458,7 @@ void ASTInterpreter::doStore(AST_Name* node, STOLEN(Value) value) { ScopeInfo::VarScopeType vst = node->lookup_type; if (vst == ScopeInfo::VarScopeType::GLOBAL) { if (jit) - jit->emitSetGlobal(frame_info.globals, name.getBox(), value); + jit->emitSetGlobal(name.getBox(), value, getMD()->source->scoping->areGlobalsFromModule()); setGlobal(frame_info.globals, name.getBox(), value.o); } else if (vst == ScopeInfo::VarScopeType::NAME) { if (jit) @@ -1661,7 +1661,7 @@ Value ASTInterpreter::visit_name(AST_Name* node) { assert(!node->is_kill); Value v; if (jit) - v.var = jit->emitGetGlobal(frame_info.globals, node->id.getBox()); + v.var = jit->emitGetGlobal(node->id.getBox()); v.o = getGlobal(frame_info.globals, node->id.getBox()); return v; diff --git a/src/codegen/baseline_jit.cpp b/src/codegen/baseline_jit.cpp index 05ccc856b..c7850de20 100644 --- a/src/codegen/baseline_jit.cpp +++ b/src/codegen/baseline_jit.cpp @@ -361,7 +361,7 @@ RewriterVar* JitFragmentWriter::emitExceptionMatches(RewriterVar* v, RewriterVar } RewriterVar* JitFragmentWriter::emitGetAttr(RewriterVar* obj, BoxedString* s, AST_expr* node) { - return emitPPCall((void*)getattr, { obj, imm(s) }, 2, 512, node, getTypeRecorderForNode(node)) + return emitPPCall((void*)getattr, { obj, imm(s) }, 2, 256, node, getTypeRecorderForNode(node)) .first->setType(RefType::OWNED); } @@ -393,23 +393,21 @@ RewriterVar* JitFragmentWriter::emitGetBoxedLocals() { } RewriterVar* JitFragmentWriter::emitGetClsAttr(RewriterVar* obj, BoxedString* s) { - return emitPPCall((void*)getclsattr, { obj, imm(s) }, 2, 512).first->setType(RefType::OWNED); + return emitPPCall((void*)getclsattr, { obj, imm(s) }, 2, 256).first->setType(RefType::OWNED); } -RewriterVar* JitFragmentWriter::emitGetGlobal(Box* global, BoxedString* s) { +RewriterVar* JitFragmentWriter::emitGetGlobal(BoxedString* s) { if (s->s() == "None") { RewriterVar* r = imm(None)->setType(RefType::BORROWED); return r; } - RewriterVar* args[] = { NULL, NULL }; - args[0] = imm(global); - args[1] = imm(s); - return emitPPCall((void*)getGlobal, args, 2, 512).first->setType(RefType::OWNED); + RewriterVar* globals = getInterp()->getAttr(ASTInterpreterJitInterface::getGlobalsOffset()); + return emitPPCall((void*)getGlobal, { globals, imm(s) }, 1, 128).first->setType(RefType::OWNED); } RewriterVar* JitFragmentWriter::emitGetItem(AST_expr* node, RewriterVar* value, RewriterVar* slice) { - return emitPPCall((void*)getitem, { value, slice }, 2, 512, node).first->setType(RefType::OWNED); + return emitPPCall((void*)getitem, { value, slice }, 1, 256, node).first->setType(RefType::OWNED); } RewriterVar* JitFragmentWriter::emitGetLocal(InternedString s, int vreg) { @@ -541,16 +539,18 @@ RewriterVar* JitFragmentWriter::emitYield(RewriterVar* v) { } void JitFragmentWriter::emitDelAttr(RewriterVar* target, BoxedString* attr) { - emitPPCall((void*)delattr, { target, imm(attr) }, 1, 512).first; + emitPPCall((void*)delattr, { target, imm(attr) }, 1, 144).first; } void JitFragmentWriter::emitDelGlobal(BoxedString* name) { RewriterVar* globals = getInterp()->getAttr(ASTInterpreterJitInterface::getGlobalsOffset()); - emitPPCall((void*)delGlobal, { globals, imm(name) }, 1, 512).first; + // does not get rewriten yet + // emitPPCall((void*)delGlobal, { globals, imm(name) }, 1, 512).first; + call(false, (void*)delGlobal, globals, imm(name)); } void JitFragmentWriter::emitDelItem(RewriterVar* target, RewriterVar* slice) { - emitPPCall((void*)delitem, { target, slice }, 1, 512).first; + emitPPCall((void*)delitem, { target, slice }, 1, 256).first; } void JitFragmentWriter::emitDelName(InternedString name) { @@ -623,7 +623,7 @@ void JitFragmentWriter::emitReturn(RewriterVar* v) { } void JitFragmentWriter::emitSetAttr(AST_expr* node, RewriterVar* obj, BoxedString* s, STOLEN(RewriterVar*) attr) { - auto rtn = emitPPCall((void*)setattr, { obj, imm(s), attr }, 2, 512, node); + auto rtn = emitPPCall((void*)setattr, { obj, imm(s), attr }, 2, 256, node); attr->refConsumed(rtn.second); } @@ -647,8 +647,13 @@ void JitFragmentWriter::emitSetExcInfo(RewriterVar* type, RewriterVar* value, Re traceback->refConsumed(); } -void JitFragmentWriter::emitSetGlobal(Box* global, BoxedString* s, STOLEN(RewriterVar*) v) { - auto rtn = emitPPCall((void*)setGlobal, { imm(global), imm(s), v }, 2, 512); +void JitFragmentWriter::emitSetGlobal(BoxedString* s, STOLEN(RewriterVar*) v, bool are_globals_from_module) { + RewriterVar* globals = getInterp()->getAttr(ASTInterpreterJitInterface::getGlobalsOffset()); + std::pair rtn; + if (are_globals_from_module) + rtn = emitPPCall((void*)setattr, { globals, imm(s), v }, 1, 256); + else + rtn = emitPPCall((void*)setGlobal, { globals, imm(s), v }, 1, 256); v->refConsumed(rtn.second); } diff --git a/src/codegen/baseline_jit.h b/src/codegen/baseline_jit.h index 43bdd9460..1c486e692 100644 --- a/src/codegen/baseline_jit.h +++ b/src/codegen/baseline_jit.h @@ -234,7 +234,7 @@ class JitFragmentWriter : public Rewriter { RewriterVar* emitGetBoxedLocal(BoxedString* s); RewriterVar* emitGetBoxedLocals(); RewriterVar* emitGetClsAttr(RewriterVar* obj, BoxedString* s); - RewriterVar* emitGetGlobal(Box* global, BoxedString* s); + RewriterVar* emitGetGlobal(BoxedString* s); RewriterVar* emitGetItem(AST_expr* node, RewriterVar* value, RewriterVar* slice); RewriterVar* emitGetLocal(InternedString s, int vreg); RewriterVar* emitGetPystonIter(RewriterVar* v); @@ -268,7 +268,7 @@ class JitFragmentWriter : public Rewriter { void emitSetBlockLocal(InternedString s, STOLEN(RewriterVar*) v); void emitSetCurrentInst(AST_stmt* node); void emitSetExcInfo(RewriterVar* type, RewriterVar* value, RewriterVar* traceback); - void emitSetGlobal(Box* global, BoxedString* s, STOLEN(RewriterVar*) v); + void emitSetGlobal(BoxedString* s, STOLEN(RewriterVar*) v, bool are_globals_from_module); void emitSetItemName(BoxedString* s, RewriterVar* v); void emitSetItem(RewriterVar* target, RewriterVar* slice, RewriterVar* value); void emitSetLocal(InternedString s, int vreg, bool set_closure, STOLEN(RewriterVar*) v); diff --git a/src/runtime/objmodel.cpp b/src/runtime/objmodel.cpp index 4798d9a13..f08b73a22 100644 --- a/src/runtime/objmodel.cpp +++ b/src/runtime/objmodel.cpp @@ -7302,6 +7302,9 @@ extern "C" Box* getGlobal(Box* globals, BoxedString* name) { } extern "C" void setGlobal(Box* globals, BoxedString* name, STOLEN(Box*) value) { + static StatCounter slowpath_setglobal("slowpath_setglobal"); + slowpath_setglobal.log(); + if (globals->cls == attrwrapper_cls) { globals = unwrapAttrWrapper(globals); RELEASE_ASSERT(globals->cls == module_cls, "%s", globals->cls->tp_name); From 031526a5459199c2bdf8dc2376b0f7a30f4c5df3 Mon Sep 17 00:00:00 2001 From: Marius Wachtler Date: Thu, 26 May 2016 18:50:52 +0100 Subject: [PATCH 02/10] bjit: let OSR handle multiple loops --- src/codegen/ast_interpreter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/codegen/ast_interpreter.cpp b/src/codegen/ast_interpreter.cpp index 8dcb3bb3d..ac283d2da 100644 --- a/src/codegen/ast_interpreter.cpp +++ b/src/codegen/ast_interpreter.cpp @@ -686,12 +686,12 @@ Value ASTInterpreter::visit_jump(AST_Jump* node) { if (backedge) ++edgecount; - if (ENABLE_BASELINEJIT && backedge && edgecount == OSR_THRESHOLD_INTERPRETER && !jit && !node->target->code) { + if (ENABLE_BASELINEJIT && backedge && edgecount >= OSR_THRESHOLD_INTERPRETER && !jit && !node->target->code) { should_jit = true; startJITing(node->target); } - if (backedge && edgecount == OSR_THRESHOLD_BASELINE) { + if (backedge && edgecount >= OSR_THRESHOLD_BASELINE) { Box* rtn = doOSR(node); if (rtn) return Value(rtn, NULL); From 4253de66d419d495cc0fe5bf48957fcd3a8f35f2 Mon Sep 17 00:00:00 2001 From: Marius Wachtler Date: Thu, 26 May 2016 20:36:37 +0100 Subject: [PATCH 03/10] bjit: directly emit 'is' and 'is not' comparisons also fix a build error when ICs are disabled --- src/asm_writing/rewriter.cpp | 18 ++++++++++++++---- src/asm_writing/rewriter.h | 2 ++ src/codegen/baseline_jit.cpp | 7 +++++-- src/runtime/objmodel.cpp | 4 ++-- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/asm_writing/rewriter.cpp b/src/asm_writing/rewriter.cpp index c08bd26f9..e9b0dbe1a 100644 --- a/src/asm_writing/rewriter.cpp +++ b/src/asm_writing/rewriter.cpp @@ -632,14 +632,18 @@ void Rewriter::_cmp(RewriterVar* result, RewriterVar* v1, AST_TYPE::AST_TYPE cmp if (LOG_IC_ASSEMBLY) assembler->comment("_cmp"); - assembler::Register v1_reg = v1->getInReg(); - assembler::Register v2_reg = v2->getInReg(); + assembler::Register v1_reg = v1->getInReg(Location::any(), false, dest); + assembler::Register v2_reg = v2->getInReg(Location::any(), false, dest); assert(v1_reg != v2_reg); // TODO how do we ensure this? v1->bumpUseEarlyIfPossible(); v2->bumpUseEarlyIfPossible(); - assembler::Register newvar_reg = allocReg(dest); + // sete and setne has special register requirements (can't use r8-r15) + const assembler::Register valid_registers[] = { + assembler::RAX, assembler::RCX, assembler::RDX, assembler::RSI, assembler::RDI, + }; + assembler::Register newvar_reg = allocReg(dest, Location::any(), valid_registers); result->initializeInReg(newvar_reg); assembler->cmp(v1_reg, v2_reg); switch (cmp_type) { @@ -2004,6 +2008,11 @@ void Rewriter::spillRegister(assembler::XMMRegister reg) { } assembler::Register Rewriter::allocReg(Location dest, Location otherThan) { + return allocReg(dest, otherThan, allocatable_regs); +} + +assembler::Register Rewriter::allocReg(Location dest, Location otherThan, + llvm::ArrayRef valid_registers) { assertPhaseEmitting(); if (dest.type == Location::AnyReg) { @@ -2012,7 +2021,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) { assembler::Register best_reg(0); // TODO prioritize spilling a constant register? - for (assembler::Register reg : allocatable_regs) { + for (assembler::Register reg : valid_registers) { if (Location(reg) != otherThan) { if (vars_by_location.count(reg) == 0) { return reg; @@ -2042,6 +2051,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) { assert(failed || vars_by_location.count(best_reg) == 0); return best_reg; } else if (dest.type == Location::Register) { + assert(std::find(valid_registers.begin(), valid_registers.end(), dest.asRegister()) != valid_registers.end()); assembler::Register reg(dest.regnum); if (vars_by_location.count(reg)) { diff --git a/src/asm_writing/rewriter.h b/src/asm_writing/rewriter.h index 385c05ec7..c91190010 100644 --- a/src/asm_writing/rewriter.h +++ b/src/asm_writing/rewriter.h @@ -483,6 +483,8 @@ class Rewriter : public ICSlotRewrite::CommitHook { // Allocates a register. dest must be of type Register or AnyReg // If otherThan is a register, guaranteed to not use that register. assembler::Register allocReg(Location dest, Location otherThan = Location::any()); + assembler::Register allocReg(Location dest, Location otherThan, + llvm::ArrayRef valid_registers); assembler::XMMRegister allocXMMReg(Location dest, Location otherThan = Location::any()); // Allocates an 8-byte region in the scratch space Location allocScratch(); diff --git a/src/codegen/baseline_jit.cpp b/src/codegen/baseline_jit.cpp index c7850de20..fc26ad1c3 100644 --- a/src/codegen/baseline_jit.cpp +++ b/src/codegen/baseline_jit.cpp @@ -223,7 +223,7 @@ RewriterVar* JitFragmentWriter::emitCallattr(AST_expr* node, RewriterVar* obj, B RewriterVar* args_array = nullptr; if (args.size()) - args_array = allocArgs(args); + args_array = allocArgs(args, RewriterVar::SetattrType::REF_USED); else RELEASE_ASSERT(!keyword_names_var, "0 args but keyword names are set"); @@ -244,7 +244,10 @@ RewriterVar* JitFragmentWriter::emitCallattr(AST_expr* node, RewriterVar* obj, B } RewriterVar* JitFragmentWriter::emitCompare(AST_expr* node, RewriterVar* lhs, RewriterVar* rhs, int op_type) { - // TODO: can directly emit the assembly for Is/IsNot + if (op_type == AST_TYPE::Is || op_type == AST_TYPE::IsNot) { + RewriterVar* cmp_result = lhs->cmp(op_type == AST_TYPE::IsNot ? AST_TYPE::NotEq : AST_TYPE::Eq, rhs); + return call(false, (void*)boxBool, cmp_result)->setType(RefType::OWNED); + } return emitPPCall((void*)compare, { lhs, rhs, imm(op_type) }, 2, 240, node).first->setType(RefType::OWNED); } diff --git a/src/runtime/objmodel.cpp b/src/runtime/objmodel.cpp index f08b73a22..39bbe96a8 100644 --- a/src/runtime/objmodel.cpp +++ b/src/runtime/objmodel.cpp @@ -5732,8 +5732,8 @@ Box* compareInternal(Box* lhs, Box* rhs, int op_type, CompareRewriteArgs* rewrit bool neg = (op_type == AST_TYPE::IsNot); if (rewrite_args) { - RewriterVar* cmpres = rewrite_args->lhs->cmp(neg ? AST_TYPE::NotEq : AST_TYPE::Eq, rewrite_args->rhs, - rewrite_args->destination); + RewriterVar* cmpres + = rewrite_args->lhs->cmp(neg ? AST_TYPE::NotEq : AST_TYPE::Eq, rewrite_args->rhs, assembler::RDI); rewrite_args->out_rtn = rewrite_args->rewriter->call(false, (void*)boxBool, cmpres)->setType(RefType::OWNED); rewrite_args->out_success = true; From dc85d0017ecdf83cf4f2a1c79bb1401968c655d4 Mon Sep 17 00:00:00 2001 From: Marius Wachtler Date: Sun, 29 May 2016 17:59:44 +0100 Subject: [PATCH 04/10] bjit: make nonzeroHelper return a borrowed reference --- src/codegen/baseline_jit.cpp | 22 +++++++++++----------- src/codegen/baseline_jit.h | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/codegen/baseline_jit.cpp b/src/codegen/baseline_jit.cpp index fc26ad1c3..3faf26c9a 100644 --- a/src/codegen/baseline_jit.cpp +++ b/src/codegen/baseline_jit.cpp @@ -454,11 +454,11 @@ RewriterVar* JitFragmentWriter::emitLandingpad() { RewriterVar* JitFragmentWriter::emitNonzero(RewriterVar* v) { // nonzeroHelper returns bool - return call(false, (void*)nonzeroHelper, v)->setType(RefType::OWNED); + return call(false, (void*)nonzeroHelper, v)->setType(RefType::BORROWED); } RewriterVar* JitFragmentWriter::emitNotNonzero(RewriterVar* v) { - return call(false, (void*)notHelper, v)->setType(RefType::OWNED); + return call(false, (void*)notHelper, v)->setType(RefType::BORROWED); } RewriterVar* JitFragmentWriter::emitRepr(RewriterVar* v) { @@ -960,12 +960,12 @@ Box* JitFragmentWriter::hasnextHelper(Box* b) { return boxBool(pyston::hasnext(b)); } -Box* JitFragmentWriter::nonzeroHelper(Box* b) { - return boxBool(b->nonzeroIC()); +BORROWED(Box*) JitFragmentWriter::nonzeroHelper(Box* b) { + return b->nonzeroIC() ? True : False; } -Box* JitFragmentWriter::notHelper(Box* b) { - return boxBool(!b->nonzeroIC()); +BORROWED(Box*) JitFragmentWriter::notHelper(Box* b) { + return b->nonzeroIC() ? False : True; } Box* JitFragmentWriter::runtimeCallHelper(Box* obj, ArgPassSpec argspec, TypeRecorder* type_recorder, Box** args, @@ -1164,12 +1164,12 @@ void JitFragmentWriter::_emitSideExit(STOLEN(RewriterVar*) var, RewriterVar* val // Really, we should probably do a decref on either side post-jump. // But the automatic refcounter doesn't support that, and since the value is either True or False, // we can get away with doing the decref early. - // TODO: better solution is to just make NONZERO return a borrowed ref, so we don't have to decref at all. - _decref(var); - // Hax: override the automatic refcount system - assert(var->reftype == RefType::OWNED); + if (var->reftype == RefType::OWNED) { + _decref(var); + // Hax: override the automatic refcount system + var->reftype = RefType::BORROWED; + } assert(var->num_refs_consumed == 0); - var->reftype = RefType::BORROWED; assembler::Register var_reg = var->getInReg(); if (isLargeConstant(val)) { diff --git a/src/codegen/baseline_jit.h b/src/codegen/baseline_jit.h index 1c486e692..88fc8f9ea 100644 --- a/src/codegen/baseline_jit.h +++ b/src/codegen/baseline_jit.h @@ -308,8 +308,8 @@ class JitFragmentWriter : public Rewriter { static Box* createTupleHelper(uint64_t num, Box** data); static Box* exceptionMatchesHelper(Box* obj, Box* cls); static Box* hasnextHelper(Box* b); - static Box* nonzeroHelper(Box* b); - static Box* notHelper(Box* b); + static BORROWED(Box*) nonzeroHelper(Box* b); + static BORROWED(Box*) notHelper(Box* b); static Box* runtimeCallHelper(Box* obj, ArgPassSpec argspec, TypeRecorder* type_recorder, Box** args, std::vector* keyword_names); From a9a9c6c286747560044c8769ea1d54301edf610a Mon Sep 17 00:00:00 2001 From: Marius Wachtler Date: Mon, 30 May 2016 17:48:09 +0100 Subject: [PATCH 05/10] bjit: use r12 and r15 --- src/asm_writing/assembler.cpp | 5 +++++ src/asm_writing/rewriter.cpp | 39 +++++++++++++++++++++++------------ src/asm_writing/rewriter.h | 2 ++ src/codegen/baseline_jit.cpp | 29 +++++++++++++++++++++----- src/codegen/baseline_jit.h | 11 ++++++++-- src/codegen/unwinding.cpp | 6 +----- 6 files changed, 67 insertions(+), 25 deletions(-) diff --git a/src/asm_writing/assembler.cpp b/src/asm_writing/assembler.cpp index c286b0f44..556eb4d1c 100644 --- a/src/asm_writing/assembler.cpp +++ b/src/asm_writing/assembler.cpp @@ -755,6 +755,7 @@ void Assembler::incq(Indirect mem) { } assert(src_idx >= 0 && src_idx < 8); + bool needssib = (src_idx == 0b100); if (rex) emitRex(rex); @@ -763,8 +764,12 @@ void Assembler::incq(Indirect mem) { assert(-0x80 <= mem.offset && mem.offset < 0x80); if (mem.offset == 0) { emitModRM(0b00, 0, src_idx); + if (needssib) + emitSIB(0b00, 0b100, src_idx); } else { emitModRM(0b01, 0, src_idx); + if (needssib) + emitSIB(0b00, 0b100, src_idx); emitByte(mem.offset); } } diff --git a/src/asm_writing/rewriter.cpp b/src/asm_writing/rewriter.cpp index e9b0dbe1a..5bc614261 100644 --- a/src/asm_writing/rewriter.cpp +++ b/src/asm_writing/rewriter.cpp @@ -24,7 +24,7 @@ namespace pyston { -static const assembler::Register allocatable_regs[] = { +static const assembler::Register std_allocatable_regs[] = { assembler::RAX, assembler::RCX, assembler::RDX, // no RSP // no RBP @@ -1234,17 +1234,29 @@ std::vector Rewriter::getDecrefLocations() { std::vector decref_infos; for (RewriterVar& var : vars) { if (var.locations.size() && var.needsDecref()) { - // TODO: add code to handle other location types and choose best location if there are several - Location l = *var.locations.begin(); - if (l.type == Location::Scratch) { - // convert to stack based location because later on we may not know the offset of the scratch area from - // the SP. - decref_infos.emplace_back(Location::Stack, indirectFor(l).offset); - } else if (l.type == Location::Register) { - // CSRs shouldn't be getting allocated, and we should only be calling this at a callsite: - RELEASE_ASSERT(0, "we shouldn't be trying to decref anything in a register"); - } else - RELEASE_ASSERT(0, "not implemented"); + bool found_location = false; + + for (Location l : var.locations) { + if (l.type == Location::Scratch) { + // convert to stack based location because later on we may not know the offset of the scratch area + // from the SP. + decref_infos.emplace_back(Location::Stack, indirectFor(l).offset); + found_location = true; + break; + } else if (l.type == Location::Register) { + // we only allow registers which are not clobbered by a call + if (l.isClobberedByCall()) + continue; + decref_infos.emplace_back(l); + found_location = true; + break; + } else + RELEASE_ASSERT(0, "not implemented"); + } + if (!found_location) { + // this is very rare. just fail the rewrite for now + failed = true; + } } } @@ -2208,7 +2220,8 @@ Rewriter::Rewriter(std::unique_ptr rewrite, int num_args, const L done_guarding(false), last_guard_action(-1), offset_eq_jmp_slowpath(-1), - offset_ne_jmp_slowpath(-1) { + offset_ne_jmp_slowpath(-1), + allocatable_regs(std_allocatable_regs) { initPhaseCollecting(); finished = false; diff --git a/src/asm_writing/rewriter.h b/src/asm_writing/rewriter.h index c91190010..e1f28a571 100644 --- a/src/asm_writing/rewriter.h +++ b/src/asm_writing/rewriter.h @@ -567,6 +567,8 @@ class Rewriter : public ICSlotRewrite::CommitHook { #endif } + llvm::ArrayRef allocatable_regs; + public: // This should be called exactly once for each argument RewriterVar* getArg(int argnum); diff --git a/src/codegen/baseline_jit.cpp b/src/codegen/baseline_jit.cpp index 3faf26c9a..a56c87e68 100644 --- a/src/codegen/baseline_jit.cpp +++ b/src/codegen/baseline_jit.cpp @@ -38,19 +38,22 @@ static llvm::DenseMap> block_patch_locations; // // long foo(char* c); // void bjit() { +// asm volatile ("" ::: "r15"); // asm volatile ("" ::: "r14"); // asm volatile ("" ::: "r13"); +// asm volatile ("" ::: "r12"); // char scratch[256+16]; // foo(scratch); // } // -// It omits the frame pointer but saves r13 and r14 +// It omits the frame pointer but saves r12, r13, r14 and r15 // use 'objdump -s -j .eh_frame ' to dump it const unsigned char eh_info[] - = { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x7a, 0x52, 0x00, 0x01, 0x78, 0x10, - 0x01, 0x1b, 0x0c, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1c, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x42, 0x0e, 0x10, 0x42, - 0x0e, 0x18, 0x47, 0x0e, 0xb0, 0x02, 0x8d, 0x03, 0x8e, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00 }; + = { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x7a, 0x52, 0x00, 0x01, 0x78, 0x10, 0x01, + 0x1b, 0x0c, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x42, 0x0e, 0x10, 0x42, 0x0e, 0x18, 0x42, + 0x0e, 0x20, 0x42, 0x0e, 0x28, 0x47, 0x0e, 0xc0, 0x02, 0x8c, 0x05, 0x8d, 0x04, 0x8e, 0x03, 0x8f, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static_assert(JitCodeBlock::num_stack_args == 2, "have to update EH table!"); static_assert(JitCodeBlock::scratch_size == 256, "have to update EH table!"); @@ -70,8 +73,10 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name) uint8_t* code = a.curInstPointer(); // emit prolog + a.push(assembler::R15); a.push(assembler::R14); a.push(assembler::R13); + a.push(assembler::R12); static_assert(sp_adjustment % 16 == 8, "stack isn't aligned"); a.sub(assembler::Immediate(sp_adjustment), assembler::RSP); a.mov(assembler::RDI, assembler::R13); // interpreter pointer @@ -136,6 +141,12 @@ void JitCodeBlock::fragmentFinished(int bytes_written, int num_bytes_overlapping ic_info.appendDecrefInfosTo(decref_infos); } +static const assembler::Register bjit_allocatable_regs[] + = { assembler::RAX, assembler::RCX, assembler::RDX, + // no RSP + // no RBP + assembler::RDI, assembler::RSI, assembler::R8, assembler::R9, + assembler::R10, assembler::R11, assembler::R12, assembler::R15 }; JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr ic_info, std::unique_ptr rewrite, int code_offset, int num_bytes_overlapping, @@ -149,6 +160,8 @@ JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr ic code_block(code_block), interp(0), ic_info(std::move(ic_info)) { + allocatable_regs = bjit_allocatable_regs; + if (LOG_BJIT_ASSEMBLY) comment("BJIT: JitFragmentWriter() start"); interp = createNewVar(); @@ -1009,8 +1022,10 @@ void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, ExitInfo exit_info.exit_start = assembler->curInstPointer(); block_next->getInReg(assembler::RAX, true); assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP); + assembler->pop(assembler::R12); assembler->pop(assembler::R13); assembler->pop(assembler::R14); + assembler->pop(assembler::R15); assembler->retq(); // make sure we have at least 'min_patch_size' of bytes available. @@ -1042,8 +1057,10 @@ void JitFragmentWriter::_emitOSRPoint() { assembler->clear_reg(assembler::RAX); // = next block to execute assembler->mov(assembler::Immediate(ASTInterpreterJitInterface::osr_dummy_value), assembler::RDX); assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP); + assembler->pop(assembler::R12); assembler->pop(assembler::R13); assembler->pop(assembler::R14); + assembler->pop(assembler::R15); assembler->retq(); } interp->bumpUse(); @@ -1145,8 +1162,10 @@ void JitFragmentWriter::_emitReturn(RewriterVar* return_val) { return_val->getInReg(assembler::RDX, true); assembler->clear_reg(assembler::RAX); assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP); + assembler->pop(assembler::R12); assembler->pop(assembler::R13); assembler->pop(assembler::R14); + assembler->pop(assembler::R15); assembler->retq(); return_val->bumpUse(); } diff --git a/src/codegen/baseline_jit.h b/src/codegen/baseline_jit.h index 88fc8f9ea..75188ef23 100644 --- a/src/codegen/baseline_jit.h +++ b/src/codegen/baseline_jit.h @@ -70,8 +70,9 @@ class JitFragmentWriter; // register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance. // // We use the following callee-save regs to speed up the generated code: -// r13: pointer to ASTInterpreter instance -// r14: pointer to the vregs array +// r12, r15: temporary values +// r13: pointer to ASTInterpreter instance +// r14: pointer to the vregs array // // To execute a specific CFGBlock one has to call: // CFGBlock* block; @@ -90,8 +91,10 @@ class JitFragmentWriter; // // Basic layout of generated code block is: // entry_code: +// push %r15 ; save r15 // push %r14 ; save r14 // push %r13 ; save r13 +// push %r12 ; save r12 // sub $0x118,%rsp ; setup scratch, 0x118 = scratch_size + 16 = space for two func args passed on the // stack + 8 byte for stack alignment // mov %rdi,%r13 ; copy the pointer to ASTInterpreter instance into r13 @@ -107,8 +110,10 @@ class JitFragmentWriter; // jne end_side_exit // movabs $0x215bb60,%rax ; rax = CFGBlock* to interpret next (rax is the 1. return reg) // add $0x118,%rsp ; restore stack pointer +// pop %r12 ; restore r12 // pop %r13 ; restore r13 // pop %r14 ; restore r14 +// pop %r15 ; restore r15 // ret ; exit to the interpreter which will interpret the specified CFGBLock* // end_side_exit: // .... @@ -120,8 +125,10 @@ class JitFragmentWriter; // in this case 0 which means we are finished // movabs $0x1270014108,%rdx ; rdx must contain the Box* value to return // add $0x118,%rsp ; restore stack pointer +// pop %r12 ; restore r12 // pop %r13 ; restore r13 // pop %r14 ; restore r14 +// pop %r15 ; restore r15 // ret // // nth_JitFragment: diff --git a/src/codegen/unwinding.cpp b/src/codegen/unwinding.cpp index 8d44aacb4..b604ba0b6 100644 --- a/src/codegen/unwinding.cpp +++ b/src/codegen/unwinding.cpp @@ -593,11 +593,7 @@ class PythonUnwindSession { assert(l.stack_second_offset % 8 == 0); b = b_ptr[l.stack_second_offset / 8]; } else if (l.type == Location::Register) { - RELEASE_ASSERT(0, "untested"); - // This branch should never get hit since we shouldn't generate Register locations, - // since we don't allow allocating callee-save registers. - // If we did, this code might be right: - // b = (Box*)get_cursor_reg(cursor, l.regnum); + b = (Box*)get_cursor_reg(cursor, l.regnum); } else { RELEASE_ASSERT(0, "not implemented"); } From b022674ccec4031a6b6a3de287709cc23340120f Mon Sep 17 00:00:00 2001 From: Marius Wachtler Date: Fri, 27 May 2016 14:34:07 +0100 Subject: [PATCH 06/10] bjit: reenable local block var optimization --- src/asm_writing/rewriter.cpp | 23 +++++++++++++++++++---- src/asm_writing/rewriter.h | 4 +++- src/codegen/ast_interpreter.cpp | 11 +++++++---- src/codegen/ast_interpreter.h | 1 + src/codegen/baseline_jit.cpp | 33 ++++++++++++++++++++++++++------- src/codegen/baseline_jit.h | 2 +- test/tests/iter_del_time.py | 3 ++- 7 files changed, 59 insertions(+), 18 deletions(-) diff --git a/src/asm_writing/rewriter.cpp b/src/asm_writing/rewriter.cpp index 5bc614261..dc9c8dd2a 100644 --- a/src/asm_writing/rewriter.cpp +++ b/src/asm_writing/rewriter.cpp @@ -1233,9 +1233,8 @@ void Rewriter::_call(RewriterVar* result, bool has_side_effects, void* func_addr std::vector Rewriter::getDecrefLocations() { std::vector decref_infos; for (RewriterVar& var : vars) { - if (var.locations.size() && var.needsDecref()) { + if (var.locations.size() && var.needsDecref(current_action_idx)) { bool found_location = false; - for (Location l : var.locations) { if (l.type == Location::Scratch) { // convert to stack based location because later on we may not know the offset of the scratch area @@ -1359,8 +1358,22 @@ void RewriterVar::refUsed() { rewriter->addAction([=]() { this->bumpUse(); }, { this }, ActionType::NORMAL); } -bool RewriterVar::needsDecref() { - return reftype == RefType::OWNED && !this->refHandedOff(); +bool RewriterVar::needsDecref(int current_action_index) { + rewriter->assertPhaseEmitting(); + + if (reftype != RefType::OWNED) + return false; + + // if nothing consumes this reference we need to create a decref entry + if (num_refs_consumed == 0) + return true; + + // don't create decref entry if the currenty action hands off the ownership + int reference_handed_off_action_index = uses[last_refconsumed_numuses - 1]; + if (reference_handed_off_action_index == current_action_index) + return false; + + return true; } void RewriterVar::registerOwnedAttr(int byte_offset) { @@ -1513,6 +1526,7 @@ void Rewriter::commit() { _incref(var, 1); } + current_action_idx = i; actions[i].action(); if (failed) { @@ -2215,6 +2229,7 @@ Rewriter::Rewriter(std::unique_ptr rewrite, int num_args, const L return_location(this->rewrite->returnRegister()), failed(false), needs_invalidation_support(needs_invalidation_support), + current_action_idx(-1), added_changing_action(false), marked_inside_ic(false), done_guarding(false), diff --git a/src/asm_writing/rewriter.h b/src/asm_writing/rewriter.h index e1f28a571..f1c79f3b7 100644 --- a/src/asm_writing/rewriter.h +++ b/src/asm_writing/rewriter.h @@ -254,7 +254,7 @@ class RewriterVar { bool isDoneUsing() { return next_use == uses.size(); } bool hasScratchAllocation() const { return scratch_allocation.second > 0; } void resetHasScratchAllocation() { scratch_allocation = std::make_pair(0, 0); } - bool needsDecref(); + bool needsDecref(int current_action_index); // Indicates if this variable is an arg, and if so, what location the arg is from. bool is_arg; @@ -432,6 +432,8 @@ class Rewriter : public ICSlotRewrite::CommitHook { bool needs_invalidation_support = true); std::deque actions; + int current_action_idx; // in the emitting phase get's set to index of currently executed action + template RewriterAction* addAction(F&& action, llvm::ArrayRef vars, ActionType type) { assertPhaseCollecting(); for (RewriterVar* var : vars) { diff --git a/src/codegen/ast_interpreter.cpp b/src/codegen/ast_interpreter.cpp index ac283d2da..c00c90e57 100644 --- a/src/codegen/ast_interpreter.cpp +++ b/src/codegen/ast_interpreter.cpp @@ -471,13 +471,12 @@ void ASTInterpreter::doStore(AST_Name* node, STOLEN(Value) value) { bool closure = vst == ScopeInfo::VarScopeType::CLOSURE; if (jit) { bool is_live = true; - // TODO: turn this optimization back on. - // if (!closure) - // is_live = source_info->getLiveness()->isLiveAtEnd(name, current_block); + if (!closure) + is_live = source_info->getLiveness()->isLiveAtEnd(name, current_block); if (is_live) jit->emitSetLocal(name, node->vreg, closure, value); else - jit->emitSetBlockLocal(name, value); + jit->emitSetBlockLocal(name, node->vreg, value); } if (closure) { @@ -1781,6 +1780,10 @@ int ASTInterpreterJitInterface::getBoxedLocalsOffset() { return offsetof(ASTInterpreter, frame_info.boxedLocals); } +int ASTInterpreterJitInterface::getCreatedClosureOffset() { + return offsetof(ASTInterpreter, created_closure); +} + int ASTInterpreterJitInterface::getCurrentBlockOffset() { return offsetof(ASTInterpreter, current_block); } diff --git a/src/codegen/ast_interpreter.h b/src/codegen/ast_interpreter.h index 0e0ba5da1..983fd5890 100644 --- a/src/codegen/ast_interpreter.h +++ b/src/codegen/ast_interpreter.h @@ -39,6 +39,7 @@ struct ASTInterpreterJitInterface { static constexpr uint64_t osr_dummy_value = -1; static int getBoxedLocalsOffset(); + static int getCreatedClosureOffset(); static int getCurrentBlockOffset(); static int getCurrentInstOffset(); static int getEdgeCountOffset(); diff --git a/src/codegen/baseline_jit.cpp b/src/codegen/baseline_jit.cpp index a56c87e68..866dee88f 100644 --- a/src/codegen/baseline_jit.cpp +++ b/src/codegen/baseline_jit.cpp @@ -385,17 +385,16 @@ RewriterVar* JitFragmentWriter::emitGetBlockLocal(InternedString s, int vreg) { auto it = local_syms.find(s); if (it == local_syms.end()) { auto r = emitGetLocal(s, vreg); - // TODO: clear out the vreg? - // assert(r->reftype == RefType::OWNED); - // emitSetLocal(s, vreg, false, imm(nullptr)); - // emitSetBlockLocal(s, r); + assert(r->reftype == RefType::OWNED); + emitSetBlockLocal(s, vreg, r); return r; } return it->second; } void JitFragmentWriter::emitKillTemporary(InternedString s, int vreg) { - emitSetLocal(s, vreg, false, imm(nullptr)); + if (!local_syms.count(s)) + emitSetLocal(s, vreg, false, imm(nullptr)); } RewriterVar* JitFragmentWriter::emitGetBoxedLocal(BoxedString* s) { @@ -549,8 +548,26 @@ std::vector JitFragmentWriter::emitUnpackIntoArray(RewriterVar* v, } RewriterVar* JitFragmentWriter::emitYield(RewriterVar* v) { - auto rtn = call(false, (void*)ASTInterpreterJitInterface::yieldHelper, getInterp(), v)->setType(RefType::OWNED); + llvm::SmallVector local_args; + local_args.push_back(interp->getAttr(ASTInterpreterJitInterface::getCreatedClosureOffset())); + // we have to pass all owned references which are not stored in the vregs to yield() so that the GC can traverse it + for (auto&& sym : local_syms) { + if (sym.second == v) + continue; + if (sym.second->reftype == RefType::OWNED) + local_args.push_back(sym.second); + } + // erase duplicate entries + std::sort(local_args.begin(), local_args.end()); + local_args.erase(std::unique(local_args.begin(), local_args.end()), local_args.end()); + + auto&& args = allocArgs(local_args, RewriterVar::SetattrType::REF_USED); + RewriterVar* generator = interp->getAttr(ASTInterpreterJitInterface::getGeneratorOffset()); + auto rtn = call(false, (void*)yield, generator, v, args, imm(local_args.size()))->setType(RefType::OWNED); v->refConsumed(); + for (auto&& var : local_args) { + var->refUsed(); + } return rtn; } @@ -643,10 +660,12 @@ void JitFragmentWriter::emitSetAttr(AST_expr* node, RewriterVar* obj, BoxedStrin attr->refConsumed(rtn.second); } -void JitFragmentWriter::emitSetBlockLocal(InternedString s, STOLEN(RewriterVar*) v) { +void JitFragmentWriter::emitSetBlockLocal(InternedString s, int vreg, STOLEN(RewriterVar*) v) { if (LOG_BJIT_ASSEMBLY) comment("BJIT: emitSetBlockLocal() start"); RewriterVar* prev = local_syms[s]; + if (!prev) + emitSetLocal(s, vreg, false, imm(nullptr)); // clear out the vreg local_syms[s] = v; if (LOG_BJIT_ASSEMBLY) comment("BJIT: emitSetBlockLocal() end"); diff --git a/src/codegen/baseline_jit.h b/src/codegen/baseline_jit.h index 75188ef23..8de46b1a2 100644 --- a/src/codegen/baseline_jit.h +++ b/src/codegen/baseline_jit.h @@ -272,7 +272,7 @@ class JitFragmentWriter : public Rewriter { void emitRaise3(RewriterVar* arg0, RewriterVar* arg1, RewriterVar* arg2); void emitReturn(RewriterVar* v); void emitSetAttr(AST_expr* node, RewriterVar* obj, BoxedString* s, STOLEN(RewriterVar*) attr); - void emitSetBlockLocal(InternedString s, STOLEN(RewriterVar*) v); + void emitSetBlockLocal(InternedString s, int vreg, STOLEN(RewriterVar*) v); void emitSetCurrentInst(AST_stmt* node); void emitSetExcInfo(RewriterVar* type, RewriterVar* value, RewriterVar* traceback); void emitSetGlobal(BoxedString* s, STOLEN(RewriterVar*) v, bool are_globals_from_module); diff --git a/test/tests/iter_del_time.py b/test/tests/iter_del_time.py index f69733917..1ce16aef6 100644 --- a/test/tests/iter_del_time.py +++ b/test/tests/iter_del_time.py @@ -1,4 +1,5 @@ -# fail-if: '-n' in EXTRA_JIT_ARGS or '-O' in EXTRA_JIT_ARGS +# expected: fail +# this only works in the interpreter and not in the bjit and llvm jit class C(object): def next(self): From 75b794ad52b0cc2bd30c2da23610008daf04aa21 Mon Sep 17 00:00:00 2001 From: Marius Wachtler Date: Thu, 26 May 2016 21:02:50 +0100 Subject: [PATCH 07/10] bjit: allocate code block using mmap also add a option to pass MAP_32BIT to mmap --- src/codegen/baseline_jit.cpp | 21 ++++++++++++++++----- src/codegen/baseline_jit.h | 15 ++++++++++++++- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/codegen/baseline_jit.cpp b/src/codegen/baseline_jit.cpp index 866dee88f..6544af392 100644 --- a/src/codegen/baseline_jit.cpp +++ b/src/codegen/baseline_jit.cpp @@ -16,6 +16,7 @@ #include #include +#include #include "codegen/irgen/hooks.h" #include "codegen/memmgr.h" @@ -59,12 +60,22 @@ static_assert(JitCodeBlock::scratch_size == 256, "have to update EH table!"); constexpr int code_size = JitCodeBlock::memory_size - sizeof(eh_info); +JitCodeBlock::MemoryManager::MemoryManager() { + int protection = PROT_READ | PROT_WRITE | PROT_EXEC; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; +#if ENABLE_BASELINEJIT_MAP_32BIT + flags |= MAP_32BIT; +#endif + addr = (uint8_t*)mmap(NULL, JitCodeBlock::memory_size, protection, flags, -1, 0); +} + +JitCodeBlock::MemoryManager::~MemoryManager() { + munmap(addr, JitCodeBlock::memory_size); + addr = NULL; +} + JitCodeBlock::JitCodeBlock(llvm::StringRef name) - : memory(new uint8_t[memory_size]), - entry_offset(0), - a(memory.get() + sizeof(eh_info), code_size), - is_currently_writing(false), - asm_failed(false) { + : entry_offset(0), a(memory.get() + sizeof(eh_info), code_size), is_currently_writing(false), asm_failed(false) { static StatCounter num_jit_code_blocks("num_baselinejit_code_blocks"); num_jit_code_blocks.log(); static StatCounter num_jit_total_bytes("num_baselinejit_total_bytes"); diff --git a/src/codegen/baseline_jit.h b/src/codegen/baseline_jit.h index 8de46b1a2..41e69c8a2 100644 --- a/src/codegen/baseline_jit.h +++ b/src/codegen/baseline_jit.h @@ -23,6 +23,9 @@ namespace pyston { +// passes MAP_32BIT to mmap when allocating the memory for the bjit code. +// it's nice for inspecting the generated asm because the debugger is able to show the name of called C/C++ functions +#define ENABLE_BASELINEJIT_MAP_32BIT 0 #define ENABLE_BASELINEJIT_ICS 1 class AST_stmt; @@ -147,8 +150,18 @@ class JitCodeBlock { static constexpr int sp_adjustment = scratch_size + num_stack_args * 8 + 8 /* = alignment */; private: + struct MemoryManager { + private: + uint8_t* addr; + + public: + MemoryManager(); + ~MemoryManager(); + uint8_t* get() { return addr; } + }; + // the memory block contains the EH frame directly followed by the generated machine code. - std::unique_ptr memory; + MemoryManager memory; int entry_offset; assembler::Assembler a; bool is_currently_writing; From ce2d965d17dc1a4eb654524d4731798e437c25b0 Mon Sep 17 00:00:00 2001 From: Marius Wachtler Date: Thu, 2 Jun 2016 13:50:34 +0100 Subject: [PATCH 08/10] rewriter: cleanup call() using a template and add a can_throw argument to _call() --- src/asm_writing/rewriter.cpp | 96 ++++++------------------------------ src/asm_writing/rewriter.h | 21 ++++---- src/codegen/baseline_jit.cpp | 6 ++- 3 files changed, 29 insertions(+), 94 deletions(-) diff --git a/src/asm_writing/rewriter.cpp b/src/asm_writing/rewriter.cpp index dc9c8dd2a..fd2d5da7f 100644 --- a/src/asm_writing/rewriter.cpp +++ b/src/asm_writing/rewriter.cpp @@ -540,8 +540,8 @@ void Rewriter::_incref(RewriterVar* var, int num_refs) { // this->_trap(); -// this->_call(NULL, true, (void*)Helper::incref, llvm::ArrayRef(&var, 1), -// llvm::ArrayRef()); +// this->_call(NULL, true, false /* can't throw */, (void*)Helper::incref, { var }); + #ifdef Py_REF_DEBUG // assembler->trap(); for (int i = 0; i < num_refs; ++i) @@ -569,16 +569,13 @@ void Rewriter::_decref(RewriterVar* var) { assert(!var->nullable); // assembler->trap(); -// this->_call(NULL, true, (void*)Helper::decref, llvm::ArrayRef(&var, 1), -// llvm::ArrayRef(NULL, (int)0)); +// this->_call(NULL, true, false /* can't throw */, (void*)Helper::decref, { var }); #ifdef Py_REF_DEBUG // assembler->trap(); assembler->decq(assembler::Immediate(&_Py_RefTotal)); #endif - - _setupCall(true, llvm::ArrayRef(&var, 1), llvm::ArrayRef(NULL, (size_t)0), - assembler::RAX); + _setupCall(true, { var }, {}, assembler::RAX); #ifdef Py_REF_DEBUG @@ -612,8 +609,7 @@ void Rewriter::_xdecref(RewriterVar* var) { assert(var->nullable); // assembler->trap(); - this->_call(NULL, true, (void*)Helper::xdecref, llvm::ArrayRef(&var, 1), - llvm::ArrayRef(NULL, (size_t)0)); + this->_call(NULL, true, false /* can't throw */, (void*)Helper::xdecref, { var }); // Doesn't call bumpUse, since this function is designed to be callable from other emitting functions. // (ie the caller should call bumpUse) @@ -920,72 +916,9 @@ RewriterVar* Rewriter::loadConst(int64_t val, Location dest) { return const_loader_var; } -RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr) { - STAT_TIMER(t0, "us_timer_rewriter", 10); - - RewriterVar::SmallVector args; - RewriterVar::SmallVector args_xmm; - return call(has_side_effects, func_addr, args, args_xmm); -} - -RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, RewriterVar* arg0) { +RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, llvm::ArrayRef args, + llvm::ArrayRef args_xmm) { STAT_TIMER(t0, "us_timer_rewriter", 10); - - RewriterVar::SmallVector args; - RewriterVar::SmallVector args_xmm; - args.push_back(arg0); - return call(has_side_effects, func_addr, args, args_xmm); -} - -RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1) { - STAT_TIMER(t0, "us_timer_rewriter", 10); - - RewriterVar::SmallVector args; - RewriterVar::SmallVector args_xmm; - args.push_back(arg0); - args.push_back(arg1); - return call(has_side_effects, func_addr, args, args_xmm); -} - -RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1, - RewriterVar* arg2) { - STAT_TIMER(t0, "us_timer_rewriter", 10); - - RewriterVar::SmallVector args; - RewriterVar::SmallVector args_xmm; - args.push_back(arg0); - args.push_back(arg1); - args.push_back(arg2); - return call(has_side_effects, func_addr, args, args_xmm); -} - -RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1, - RewriterVar* arg2, RewriterVar* arg3) { - STAT_TIMER(t0, "us_timer_rewriter", 10); - - RewriterVar::SmallVector args; - RewriterVar::SmallVector args_xmm; - args.push_back(arg0); - args.push_back(arg1); - args.push_back(arg2); - args.push_back(arg3); - return call(has_side_effects, func_addr, args, args_xmm); -} - -RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1, - RewriterVar* arg2, RewriterVar* arg3, RewriterVar* arg4) { - RewriterVar::SmallVector args; - RewriterVar::SmallVector args_xmm; - args.push_back(arg0); - args.push_back(arg1); - args.push_back(arg2); - args.push_back(arg3); - args.push_back(arg4); - return call(has_side_effects, func_addr, args, args_xmm); -} - -RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, const RewriterVar::SmallVector& args, - const RewriterVar::SmallVector& args_xmm) { RewriterVar* result = createNewVar(); RewriterVar::SmallVector uses; for (RewriterVar* v : args) { @@ -1015,9 +948,12 @@ RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, const Rewrit // Hack: pack this into a short to make sure it fits in the closure short xmm_args_size = args_xmm.size(); + // TODO: we don't need to generate the decref info for calls which can't throw + bool can_throw = true; + // Hack: explicitly order the closure arguments so they pad nicer - addAction([args_size, xmm_args_size, has_side_effects, this, result, func_addr, _args, _args_xmm]() { - this->_call(result, has_side_effects, func_addr, llvm::ArrayRef(_args, args_size), + addAction([args_size, xmm_args_size, has_side_effects, can_throw, this, result, func_addr, _args, _args_xmm]() { + this->_call(result, has_side_effects, can_throw, func_addr, llvm::ArrayRef(_args, args_size), llvm::ArrayRef(_args_xmm, xmm_args_size)); for (int i = 0; i < args_size; i++) _args[i]->bumpUse(); @@ -1177,8 +1113,8 @@ void Rewriter::_setupCall(bool has_side_effects, llvm::ArrayRef ar #endif } -void Rewriter::_call(RewriterVar* result, bool has_side_effects, void* func_addr, llvm::ArrayRef args, - llvm::ArrayRef args_xmm) { +void Rewriter::_call(RewriterVar* result, bool has_side_effects, bool can_throw, void* func_addr, + llvm::ArrayRef args, llvm::ArrayRef args_xmm) { if (LOG_IC_ASSEMBLY) assembler->comment("_call"); @@ -1215,8 +1151,8 @@ void Rewriter::_call(RewriterVar* result, bool has_side_effects, void* func_addr assert(assembler->hasFailed() || asm_address == (uint64_t)assembler->curInstPointer()); } - // TODO: we don't need to generate the decref info for calls which can't throw - registerDecrefInfoHere(); + if (can_throw) + registerDecrefInfoHere(); if (!failed) { assert(vars_by_location.count(assembler::RAX) == 0); diff --git a/src/asm_writing/rewriter.h b/src/asm_writing/rewriter.h index f1c79f3b7..4496134c8 100644 --- a/src/asm_writing/rewriter.h +++ b/src/asm_writing/rewriter.h @@ -514,8 +514,8 @@ class Rewriter : public ICSlotRewrite::CommitHook { void _setupCall(bool has_side_effects, llvm::ArrayRef args, llvm::ArrayRef args_xmm, Location preserve = Location::any()); // _call does not call bumpUse on its arguments: - void _call(RewriterVar* result, bool has_side_effects, void* func_addr, llvm::ArrayRef args, - llvm::ArrayRef args_xmm); + void _call(RewriterVar* result, bool has_side_effects, bool can_throw, void* func_addr, + llvm::ArrayRef args, llvm::ArrayRef args_xmm = {}); void _add(RewriterVar* result, RewriterVar* a, int64_t b, Location dest); int _allocate(RewriterVar* result, int n); void _allocateAndCopy(RewriterVar* result, RewriterVar* array, int n); @@ -612,16 +612,13 @@ class Rewriter : public ICSlotRewrite::CommitHook { // 2) does not have any side-effects that would be user-visible if we bailed out from the middle of the // inline cache. (Extra allocations don't count even though they're potentially visible if you look // hard enough.) - RewriterVar* call(bool has_side_effects, void* func_addr, const RewriterVar::SmallVector& args, - const RewriterVar::SmallVector& args_xmm = RewriterVar::SmallVector()); - RewriterVar* call(bool has_side_effects, void* func_addr); - RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0); - RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1); - RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1, RewriterVar* arg2); - RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1, RewriterVar* arg2, - RewriterVar* arg3); - RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg0, RewriterVar* arg1, RewriterVar* arg2, - RewriterVar* arg3, RewriterVar* arg4); + RewriterVar* call(bool has_side_effects, void* func_addr, llvm::ArrayRef args = {}, + llvm::ArrayRef args_xmm = {}); + template + RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg1, Args... args) { + return call(has_side_effects, func_addr, llvm::ArrayRef({ arg1, args... }), {}); + } + RewriterVar* add(RewriterVar* a, int64_t b, Location dest); // Allocates n pointer-sized stack slots: RewriterVar* allocate(int n); diff --git a/src/codegen/baseline_jit.cpp b/src/codegen/baseline_jit.cpp index 6544af392..dca93282a 100644 --- a/src/codegen/baseline_jit.cpp +++ b/src/codegen/baseline_jit.cpp @@ -311,9 +311,11 @@ RewriterVar* JitFragmentWriter::emitCallWithAllocatedArgs(void* func_addr, const // Hack: pack this into a short to make sure it fits in the closure short args_additional_size = additional_uses.size(); + bool can_throw = true; + // Hack: explicitly order the closure arguments so they pad nicer - addAction([args_size, args_additional_size, func_addr, this, result, _args, _args_additional]() { - this->_call(result, false, func_addr, llvm::ArrayRef(_args, args_size), + addAction([args_size, args_additional_size, can_throw, func_addr, this, result, _args, _args_additional]() { + this->_call(result, false, can_throw, func_addr, llvm::ArrayRef(_args, args_size), llvm::ArrayRef()); for (int i = 0; i < args_size; i++) _args[i]->bumpUse(); From 229fb46572b62cce15bcec36f6208b19d1ba339c Mon Sep 17 00:00:00 2001 From: Marius Wachtler Date: Thu, 2 Jun 2016 14:17:17 +0100 Subject: [PATCH 09/10] rewriter: directly embed additional ref uses and bump uses early when emitting calls --- src/asm_writing/rewriter.cpp | 112 ++++++++++++++++------------- src/asm_writing/rewriter.h | 50 ++++++++++--- src/codegen/ast_interpreter.cpp | 9 +-- src/codegen/baseline_jit.cpp | 124 +++++++++++--------------------- src/codegen/baseline_jit.h | 7 +- src/runtime/objmodel.cpp | 34 ++++----- 6 files changed, 166 insertions(+), 170 deletions(-) diff --git a/src/asm_writing/rewriter.cpp b/src/asm_writing/rewriter.cpp index fd2d5da7f..308f23b38 100644 --- a/src/asm_writing/rewriter.cpp +++ b/src/asm_writing/rewriter.cpp @@ -917,18 +917,9 @@ RewriterVar* Rewriter::loadConst(int64_t val, Location dest) { } RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, llvm::ArrayRef args, - llvm::ArrayRef args_xmm) { + llvm::ArrayRef args_xmm, llvm::ArrayRef additional_uses) { STAT_TIMER(t0, "us_timer_rewriter", 10); RewriterVar* result = createNewVar(); - RewriterVar::SmallVector uses; - for (RewriterVar* v : args) { - assert(v != NULL); - uses.push_back(v); - } - for (RewriterVar* v : args_xmm) { - assert(v != NULL); - uses.push_back(v); - } ActionType type; if (has_side_effects) @@ -936,36 +927,65 @@ RewriterVar* Rewriter::call(bool has_side_effects, void* func_addr, llvm::ArrayR else type = ActionType::NORMAL; - // It's not nice to pass llvm::SmallVectors through a closure, especially with our SmallFunction - // optimization, so just regionAlloc them and copy the data in: - RewriterVar** _args = (RewriterVar**)this->regionAlloc(sizeof(RewriterVar*) * args.size()); - memcpy(_args, args.begin(), sizeof(RewriterVar*) * args.size()); - RewriterVar** _args_xmm = (RewriterVar**)this->regionAlloc(sizeof(RewriterVar*) * args_xmm.size()); - memcpy(_args_xmm, args_xmm.begin(), sizeof(RewriterVar*) * args_xmm.size()); - - int args_size = args.size(); - assert(args_xmm.size() <= 0x7fff); - // Hack: pack this into a short to make sure it fits in the closure - short xmm_args_size = args_xmm.size(); // TODO: we don't need to generate the decref info for calls which can't throw bool can_throw = true; + auto args_array_ref = regionAllocArgs(args, args_xmm, additional_uses); // Hack: explicitly order the closure arguments so they pad nicer - addAction([args_size, xmm_args_size, has_side_effects, can_throw, this, result, func_addr, _args, _args_xmm]() { - this->_call(result, has_side_effects, can_throw, func_addr, llvm::ArrayRef(_args, args_size), - llvm::ArrayRef(_args_xmm, xmm_args_size)); - for (int i = 0; i < args_size; i++) - _args[i]->bumpUse(); - for (int i = 0; i < xmm_args_size; i++) - _args_xmm[i]->bumpUse(); - }, uses, type); + struct LambdaClosure { + RewriterVar** args_array; + + struct { + unsigned int has_side_effects : 1; + unsigned int can_throw : 1; + unsigned int num_args : 16; + unsigned int num_args_xmm : 16; + unsigned int num_additional_uses : 16; + }; + + llvm::ArrayRef allArgs() const { + return llvm::makeArrayRef(args_array, num_args + num_args_xmm + num_additional_uses); + } + + llvm::ArrayRef args() const { return allArgs().slice(0, num_args); } + + llvm::ArrayRef argsXmm() const { return allArgs().slice(num_args, num_args_xmm); } + + llvm::ArrayRef additionalUses() const { + return allArgs().slice((int)num_args + (int)num_args_xmm, num_additional_uses); + } + + LambdaClosure(llvm::MutableArrayRef args_array_ref, llvm::ArrayRef _args, + llvm::ArrayRef _args_xmm, llvm::ArrayRef _addition_uses, + bool has_side_effects, bool can_throw) + : args_array(args_array_ref.data()), + has_side_effects(has_side_effects), + can_throw(can_throw), + num_args(_args.size()), + num_args_xmm(_args_xmm.size()), + num_additional_uses(_addition_uses.size()) { + assert(_args.size() < 1 << 16); + assert(_args_xmm.size() < 1 << 16); + assert(_addition_uses.size() < 1 << 16); + } + + } lambda_closure(args_array_ref, args, args_xmm, additional_uses, has_side_effects, can_throw); + assert(lambda_closure.args().size() == args.size()); + assert(lambda_closure.argsXmm().size() == args_xmm.size()); + assert(lambda_closure.additionalUses().size() == additional_uses.size()); + + addAction([this, result, func_addr, lambda_closure]() { + this->_call(result, lambda_closure.has_side_effects, lambda_closure.can_throw, func_addr, lambda_closure.args(), + lambda_closure.argsXmm(), lambda_closure.allArgs()); + }, lambda_closure.allArgs(), type); return result; } void Rewriter::_setupCall(bool has_side_effects, llvm::ArrayRef args, - llvm::ArrayRef args_xmm, Location preserve) { + llvm::ArrayRef args_xmm, Location preserve, + llvm::ArrayRef bump_if_possible) { if (has_side_effects) assert(done_guarding); @@ -1052,6 +1072,10 @@ void Rewriter::_setupCall(bool has_side_effects, llvm::ArrayRef ar } #endif + for (auto&& use : bump_if_possible) { + use->bumpUseEarlyIfPossible(); + } + // Spill caller-saved registers: for (auto check_reg : caller_save_registers) { // check_reg.dump(); @@ -1114,7 +1138,8 @@ void Rewriter::_setupCall(bool has_side_effects, llvm::ArrayRef ar } void Rewriter::_call(RewriterVar* result, bool has_side_effects, bool can_throw, void* func_addr, - llvm::ArrayRef args, llvm::ArrayRef args_xmm) { + llvm::ArrayRef args, llvm::ArrayRef args_xmm, + llvm::ArrayRef vars_to_bump) { if (LOG_IC_ASSEMBLY) assembler->comment("_call"); @@ -1123,17 +1148,7 @@ void Rewriter::_call(RewriterVar* result, bool has_side_effects, bool can_throw, if (failed) return; - _setupCall(has_side_effects, args, args_xmm, assembler::R11); - - // This duty is now on the caller: - /* - for (RewriterVar* arg : args) { - arg->bumpUse(); - } - for (RewriterVar* arg_xmm : args_xmm) { - arg_xmm->bumpUse(); - } - */ + _setupCall(has_side_effects, args, args_xmm, assembler::R11, vars_to_bump); assertConsistent(); @@ -1164,6 +1179,10 @@ void Rewriter::_call(RewriterVar* result, bool has_side_effects, bool can_throw, if (result) result->releaseIfNoUses(); + + for (RewriterVar* var : vars_to_bump) { + var->bumpUseLateIfNecessary(); + } } std::vector Rewriter::getDecrefLocations() { @@ -1286,12 +1305,7 @@ void RewriterVar::refConsumed(RewriterAction* action) { last_refconsumed_numuses = uses.size(); if (!action) action = rewriter->getLastAction(); - action->consumed_refs.emplace_back(this); -} - -void RewriterVar::refUsed() { - // TODO: This is a pretty silly implementation that might prevent other optimizations? - rewriter->addAction([=]() { this->bumpUse(); }, { this }, ActionType::NORMAL); + action->consumed_refs.push_front(this); } bool RewriterVar::needsDecref(int current_action_index) { @@ -1892,7 +1906,7 @@ void Rewriter::_checkAndThrowCAPIException(RewriterVar* r, int64_t exc_val, asse } else assembler->cmp(var_reg, assembler::Immediate(exc_val), type); - _setupCall(false, RewriterVar::SmallVector(), RewriterVar::SmallVector()); + _setupCall(false, {}); { assembler::ForwardJump jnz(*assembler, assembler::COND_NOT_ZERO); assembler->mov(assembler::Immediate((void*)throwCAPIException), assembler::R11); diff --git a/src/asm_writing/rewriter.h b/src/asm_writing/rewriter.h index 4496134c8..1ee965bde 100644 --- a/src/asm_writing/rewriter.h +++ b/src/asm_writing/rewriter.h @@ -16,6 +16,7 @@ #define PYSTON_ASMWRITING_REWRITER_H #include +#include #include #include #include @@ -192,8 +193,6 @@ class RewriterVar { // if no action is specified it will assume the last action consumed the reference void refConsumed(RewriterAction* action = NULL); - void refUsed(); - // registerOwnedAttr tells the refcounter that a certain memory location holds a pointer // to an owned reference. This must be paired with a call to deregisterOwnedAttr // Call these right before emitting the store (for register) or decref (for deregister). @@ -237,11 +236,11 @@ class RewriterVar { // /* some code */ // bumpUseLateIfNecessary(); void bumpUseEarlyIfPossible() { - if (reftype != RefType::OWNED) + if (reftype != RefType::OWNED && !hasScratchAllocation()) bumpUse(); } void bumpUseLateIfNecessary() { - if (reftype == RefType::OWNED) + if (reftype == RefType::OWNED || hasScratchAllocation()) bumpUse(); } @@ -339,8 +338,9 @@ template class SmallFunction { class RewriterAction { public: - SmallFunction<56> action; - std::vector consumed_refs; + SmallFunction<48> action; + std::forward_list consumed_refs; + template RewriterAction(F&& action) : action(std::forward(action)) {} @@ -367,7 +367,33 @@ class Rewriter : public ICSlotRewrite::CommitHook { protected: // Allocates `bytes` bytes of data. The allocation will get freed when the rewriter gets freed. - void* regionAlloc(size_t bytes) { return allocator.Allocate(bytes, 16 /* alignment */); } + void* regionAlloc(size_t bytes, int alignment = 16) { return allocator.Allocate(bytes, alignment); } + template llvm::MutableArrayRef regionAlloc(size_t num_elements) { + return llvm::MutableArrayRef(allocator.Allocate(num_elements), num_elements); + } + + // This takes a variable number of llvm::ArrayRef and copies in all elements into a single contiguous + // memory location. + template + llvm::MutableArrayRef regionAllocArgs(llvm::ArrayRef arg1, Args... args) { + size_t num_total_args = 0; + for (auto&& array : { arg1, args... }) { + num_total_args += array.size(); + } + if (num_total_args == 0) + return llvm::MutableArrayRef(); + + auto args_array_ref = regionAlloc(num_total_args); + auto insert_point = args_array_ref; + for (auto&& array : { arg1, args... }) { + if (!array.empty()) { + memcpy(insert_point.data(), array.data(), array.size() * sizeof(RewriterVar*)); + insert_point = insert_point.slice(array.size()); + } + } + assert(insert_point.size() == 0); + return args_array_ref; + } // Helps generating the best code for loading a const integer value. // By keeping track of the last known value of every register and reusing it. @@ -511,11 +537,13 @@ class Rewriter : public ICSlotRewrite::CommitHook { void _slowpathJump(bool condition_eq); void _trap(); void _loadConst(RewriterVar* result, int64_t val); - void _setupCall(bool has_side_effects, llvm::ArrayRef args, llvm::ArrayRef args_xmm, - Location preserve = Location::any()); + void _setupCall(bool has_side_effects, llvm::ArrayRef args = {}, + llvm::ArrayRef args_xmm = {}, Location preserve = Location::any(), + llvm::ArrayRef bump_if_possible = {}); // _call does not call bumpUse on its arguments: void _call(RewriterVar* result, bool has_side_effects, bool can_throw, void* func_addr, - llvm::ArrayRef args, llvm::ArrayRef args_xmm = {}); + llvm::ArrayRef args, llvm::ArrayRef args_xmm = {}, + llvm::ArrayRef vars_to_bump = {}); void _add(RewriterVar* result, RewriterVar* a, int64_t b, Location dest); int _allocate(RewriterVar* result, int n); void _allocateAndCopy(RewriterVar* result, RewriterVar* array, int n); @@ -613,7 +641,7 @@ class Rewriter : public ICSlotRewrite::CommitHook { // inline cache. (Extra allocations don't count even though they're potentially visible if you look // hard enough.) RewriterVar* call(bool has_side_effects, void* func_addr, llvm::ArrayRef args = {}, - llvm::ArrayRef args_xmm = {}); + llvm::ArrayRef args_xmm = {}, llvm::ArrayRef additional_uses = {}); template RewriterVar* call(bool has_side_effects, void* func_addr, RewriterVar* arg1, Args... args) { return call(has_side_effects, func_addr, llvm::ArrayRef({ arg1, args... }), {}); diff --git a/src/codegen/ast_interpreter.cpp b/src/codegen/ast_interpreter.cpp index c00c90e57..1cd01e28f 100644 --- a/src/codegen/ast_interpreter.cpp +++ b/src/codegen/ast_interpreter.cpp @@ -1172,12 +1172,9 @@ Value ASTInterpreter::createFunction(AST* node, AST_arguments* args, const std:: closure_var = jit->imm(0ul); if (!passed_globals_var) passed_globals_var = jit->imm(0ul); - rtn.var = jit->call(false, (void*)createFunctionFromMetadata, jit->imm(md), closure_var, passed_globals_var, - defaults_var, jit->imm(args->defaults.size()))->setType(RefType::OWNED); - - for (auto d_var : defaults_vars) { - d_var->refUsed(); - } + rtn.var = jit->call(false, (void*)createFunctionFromMetadata, { jit->imm(md), closure_var, passed_globals_var, + defaults_var, jit->imm(args->defaults.size()) }, + {}, defaults_vars)->setType(RefType::OWNED); } rtn.o = createFunctionFromMetadata(md, closure, passed_globals, u.il); diff --git a/src/codegen/baseline_jit.cpp b/src/codegen/baseline_jit.cpp index dca93282a..439758973 100644 --- a/src/codegen/baseline_jit.cpp +++ b/src/codegen/baseline_jit.cpp @@ -222,10 +222,10 @@ RewriterVar* JitFragmentWriter::emitCallattr(AST_expr* node, RewriterVar* obj, B call_args.push_back(args.size() > 1 ? args[1] : imm(0ul)); call_args.push_back(args.size() > 2 ? args[2] : imm(0ul)); + llvm::ArrayRef additional_uses; if (args.size() > 3) { - RewriterVar* scratch = allocate(args.size() - 3); - for (int i = 0; i < args.size() - 3; ++i) - scratch->setAttr(i * sizeof(void*), args[i + 3], RewriterVar::SetattrType::REF_USED); + additional_uses = args.slice(3); + RewriterVar* scratch = allocArgs(additional_uses, RewriterVar::SetattrType::REF_USED); call_args.push_back(scratch); } else if (keyword_names) { call_args.push_back(imm(0ul)); @@ -234,11 +234,8 @@ RewriterVar* JitFragmentWriter::emitCallattr(AST_expr* node, RewriterVar* obj, B if (keyword_names) call_args.push_back(imm(keyword_names)); - auto r = emitPPCall((void*)callattr, call_args, 2, 640, node, type_recorder).first->setType(RefType::OWNED); - for (int i = 3; i < args.size(); i++) { - args[i]->refUsed(); - } - return r; + return emitPPCall((void*)callattr, call_args, 2, 640, node, type_recorder, additional_uses) + .first->setType(RefType::OWNED); #else // We could make this faster but for now: keep it simple, stupid... RewriterVar* attr_var = imm(attr); @@ -285,44 +282,9 @@ void JitFragmentWriter::emitDictSet(RewriterVar* dict, RewriterVar* k, RewriterV v->refConsumed(); } -// TODO: merge this function's functionality with refUsed RewriterVar* JitFragmentWriter::emitCallWithAllocatedArgs(void* func_addr, const llvm::ArrayRef args, const llvm::ArrayRef additional_uses) { - RewriterVar* result = createNewVar(); - RewriterVar::SmallVector uses; - for (RewriterVar* v : args) { - assert(v != NULL); - uses.push_back(v); - } - for (RewriterVar* v : additional_uses) { - assert(v != NULL); - uses.push_back(v); - } - - // It's not nice to pass llvm::SmallVectors through a closure, especially with our SmallFunction - // optimization, so just regionAlloc them and copy the data in: - RewriterVar** _args = (RewriterVar**)this->regionAlloc(sizeof(RewriterVar*) * args.size()); - memcpy(_args, args.begin(), sizeof(RewriterVar*) * args.size()); - RewriterVar** _args_additional = (RewriterVar**)this->regionAlloc(sizeof(RewriterVar*) * additional_uses.size()); - memcpy(_args_additional, additional_uses.begin(), sizeof(RewriterVar*) * additional_uses.size()); - - int args_size = args.size(); - assert(additional_uses.size() <= 0x7fff); - // Hack: pack this into a short to make sure it fits in the closure - short args_additional_size = additional_uses.size(); - - bool can_throw = true; - - // Hack: explicitly order the closure arguments so they pad nicer - addAction([args_size, args_additional_size, can_throw, func_addr, this, result, _args, _args_additional]() { - this->_call(result, false, can_throw, func_addr, llvm::ArrayRef(_args, args_size), - llvm::ArrayRef()); - for (int i = 0; i < args_size; i++) - _args[i]->bumpUse(); - for (int i = 0; i < args_additional_size; i++) - _args_additional[i]->bumpUse(); - }, uses, ActionType::NORMAL); - return result; + return call(false, func_addr, args, {}, additional_uses); } RewriterVar* JitFragmentWriter::emitCreateList(const llvm::ArrayRef values) { @@ -504,21 +466,18 @@ RewriterVar* JitFragmentWriter::emitRuntimeCall(AST_expr* node, RewriterVar* obj call_args.push_back(args.size() > 1 ? args[1] : imm(0ul)); call_args.push_back(args.size() > 2 ? args[2] : imm(0ul)); + llvm::ArrayRef additional_uses; if (args.size() > 3) { - RewriterVar* scratch = allocate(args.size() - 3); - for (int i = 0; i < args.size() - 3; ++i) - scratch->setAttr(i * sizeof(void*), args[i + 3], RewriterVar::SetattrType::REF_USED); + additional_uses = args.slice(3); + RewriterVar* scratch = allocArgs(additional_uses, RewriterVar::SetattrType::REF_USED); call_args.push_back(scratch); } else call_args.push_back(imm(0ul)); if (keyword_names) call_args.push_back(imm(keyword_names)); - auto r = emitPPCall((void*)runtimeCall, call_args, 2, 640, node, type_recorder).first->setType(RefType::OWNED); - for (int i = 3; i < args.size(); i++) { - args[i]->refUsed(); - } - return r; + return emitPPCall((void*)runtimeCall, call_args, 2, 640, node, type_recorder, additional_uses) + .first->setType(RefType::OWNED); #else RewriterVar* argspec_var = imm(argspec.asInt()); RewriterVar* keyword_names_var = keyword_names ? imm(keyword_names) : nullptr; @@ -576,11 +535,9 @@ RewriterVar* JitFragmentWriter::emitYield(RewriterVar* v) { auto&& args = allocArgs(local_args, RewriterVar::SetattrType::REF_USED); RewriterVar* generator = interp->getAttr(ASTInterpreterJitInterface::getGeneratorOffset()); - auto rtn = call(false, (void*)yield, generator, v, args, imm(local_args.size()))->setType(RefType::OWNED); + auto rtn = call(false, (void*)yield, { generator, v, args, imm(local_args.size()) }, {}, local_args) + ->setType(RefType::OWNED); v->refConsumed(); - for (auto&& var : local_args) { - var->refUsed(); - } return rtn; } @@ -906,31 +863,38 @@ uint64_t JitFragmentWriter::asUInt(InternedString s) { } #endif -std::pair JitFragmentWriter::emitPPCall(void* func_addr, - llvm::ArrayRef args, int num_slots, - int slot_size, AST* ast_node, - TypeRecorder* type_recorder) { +std::pair +JitFragmentWriter::emitPPCall(void* func_addr, llvm::ArrayRef args, unsigned char num_slots, + unsigned short slot_size, AST* ast_node, TypeRecorder* type_recorder, + llvm::ArrayRef additional_uses) { if (LOG_BJIT_ASSEMBLY) comment("BJIT: emitPPCall() start"); RewriterVar::SmallVector args_vec(args.begin(), args.end()); #if ENABLE_BASELINEJIT_ICS RewriterVar* result = createNewVar(); + auto args_array_ref = regionAllocArgs(args, additional_uses); + + assert(args.size() < 1ul << 32); int args_size = args.size(); - RewriterVar** _args = (RewriterVar**)regionAlloc(sizeof(RewriterVar*) * args_size); - memcpy(_args, args.begin(), sizeof(RewriterVar*) * args_size); + assert(additional_uses.size() < 1 << 8); + unsigned char num_additional = additional_uses.size(); + RewriterVar** args_array = args_array_ref.data(); - RewriterAction* call_action = addAction([=]() { - this->_emitPPCall(result, func_addr, llvm::ArrayRef(_args, args_size), num_slots, slot_size, - ast_node); - }, args, ActionType::NORMAL); + RewriterAction* call_action + = addAction([this, result, func_addr, ast_node, args_array, args_size, slot_size, num_slots, num_additional]() { + auto all_args = llvm::makeArrayRef(args_array, args_size + num_additional); + auto args = all_args.slice(0, args_size); + this->_emitPPCall(result, func_addr, args, num_slots, slot_size, ast_node, all_args); + }, args_array_ref, ActionType::NORMAL); if (type_recorder) { RewriterVar* type_recorder_var = imm(type_recorder); RewriterVar* obj_cls_var = result->getAttr(offsetof(Box, cls)); - addAction([=]() { _emitRecordType(type_recorder_var, obj_cls_var); }, { type_recorder_var, obj_cls_var }, - ActionType::NORMAL); - result->refUsed(); + addAction([=]() { + _emitRecordType(type_recorder_var, obj_cls_var); + result->bumpUse(); + }, { type_recorder_var, obj_cls_var, result }, ActionType::NORMAL); emitPendingCallsCheck(); return std::make_pair(result, call_action); @@ -1025,7 +989,7 @@ void JitFragmentWriter::_emitGetLocal(RewriterVar* val_var, const char* name) { assembler::Register var_reg = val_var->getInReg(); assembler->test(var_reg, var_reg); - _setupCall(false, RewriterVar::SmallVector(), RewriterVar::SmallVector()); + _setupCall(false, {}); { assembler::ForwardJump jnz(*assembler, assembler::COND_NOT_ZERO); assembler->mov(assembler::Immediate((uint64_t)name), assembler::RDI); @@ -1100,7 +1064,8 @@ void JitFragmentWriter::_emitOSRPoint() { } void JitFragmentWriter::_emitPPCall(RewriterVar* result, void* func_addr, llvm::ArrayRef args, - int num_slots, int slot_size, AST* ast_node) { + int num_slots, int slot_size, AST* ast_node, + llvm::ArrayRef vars_to_bump) { assembler::Register r = allocReg(assembler::R11); if (args.size() > 6) { // only 6 args can get passed in registers. @@ -1109,11 +1074,9 @@ void JitFragmentWriter::_emitPPCall(RewriterVar* result, void* func_addr, llvm:: assembler::Register reg = args[i]->getInReg(Location::any(), true); assembler->mov(reg, assembler::Indirect(assembler::RSP, sizeof(void*) * (i - 6))); } - RewriterVar::SmallVector reg_args(args.begin(), args.begin() + 6); - assert(reg_args.size() == 6); - _setupCall(false, reg_args, RewriterVar::SmallVector()); + _setupCall(false, args.slice(0, 6), {}, Location::any(), vars_to_bump); } else - _setupCall(false, args, RewriterVar::SmallVector()); + _setupCall(false, args, {}, Location::any(), vars_to_bump); if (failed) return; @@ -1158,15 +1121,8 @@ void JitFragmentWriter::_emitPPCall(RewriterVar* result, void* func_addr, llvm:: result->releaseIfNoUses(); - // TODO: it would be nice to be able to bumpUse on these earlier so that we could potentially avoid spilling - // the args across the call if we don't need to. - // This had to get moved to the very end of this function due to the fact that bumpUse can cause refcounting - // operations to happen. - // I'm not sure though that just moving this earlier is good enough though -- we also might need to teach setupCall - // that the args might not be needed afterwards? - // Anyway this feels like micro-optimizations at the moment and we can figure it out later. - for (RewriterVar* arg : args) { - arg->bumpUse(); + for (RewriterVar* use : vars_to_bump) { + use->bumpUseLateIfNecessary(); } } diff --git a/src/codegen/baseline_jit.h b/src/codegen/baseline_jit.h index 41e69c8a2..3f551e6e1 100644 --- a/src/codegen/baseline_jit.h +++ b/src/codegen/baseline_jit.h @@ -316,8 +316,9 @@ class JitFragmentWriter : public Rewriter { RewriterVar* emitCallWithAllocatedArgs(void* func_addr, const llvm::ArrayRef args, const llvm::ArrayRef additional_uses); std::pair emitPPCall(void* func_addr, llvm::ArrayRef args, - int num_slots, int slot_size, AST* ast_node = NULL, - TypeRecorder* type_recorder = NULL); + unsigned char num_slots, unsigned short slot_size, + AST* ast_node = NULL, TypeRecorder* type_recorder = NULL, + llvm::ArrayRef additional_uses = {}); static void assertNameDefinedHelper(const char* id); static Box* callattrHelper(Box* obj, BoxedString* attr, CallattrFlags flags, TypeRecorder* type_recorder, @@ -337,7 +338,7 @@ class JitFragmentWriter : public Rewriter { void _emitJump(CFGBlock* b, RewriterVar* block_next, ExitInfo& exit_info); void _emitOSRPoint(); void _emitPPCall(RewriterVar* result, void* func_addr, llvm::ArrayRef args, int num_slots, - int slot_size, AST* ast_node); + int slot_size, AST* ast_node, llvm::ArrayRef vars_to_bump); void _emitRecordType(RewriterVar* type_recorder_var, RewriterVar* obj_cls_var); void _emitReturn(RewriterVar* v); void _emitSideExit(STOLEN(RewriterVar*) var, RewriterVar* val_constant, CFGBlock* next_block, diff --git a/src/runtime/objmodel.cpp b/src/runtime/objmodel.cpp index 39bbe96a8..f10d08db9 100644 --- a/src/runtime/objmodel.cpp +++ b/src/runtime/objmodel.cpp @@ -4870,32 +4870,32 @@ Box* callCLFunc(FunctionMetadata* md, CallRewriteArgs* rewrite_args, int num_out } else { // Hacky workaround: the rewriter can only pass arguments in registers, so use this helper function // to unpack some of the additional arguments: + llvm::SmallVector additional_uses; RewriterVar* arg_array = rewrite_args->rewriter->allocate(4); arg_vec.push_back(arg_array); - if (num_output_args >= 1) + if (num_output_args >= 1) { arg_array->setAttr(0, rewrite_args->arg1, RewriterVar::SetattrType::REF_USED); - if (num_output_args >= 2) + additional_uses.push_back(rewrite_args->arg1); + } + if (num_output_args >= 2) { arg_array->setAttr(8, rewrite_args->arg2, RewriterVar::SetattrType::REF_USED); - if (num_output_args >= 3) + additional_uses.push_back(rewrite_args->arg2); + } + if (num_output_args >= 3) { arg_array->setAttr(16, rewrite_args->arg3, RewriterVar::SetattrType::REF_USED); - if (num_output_args >= 4) + additional_uses.push_back(rewrite_args->arg3); + } + if (num_output_args >= 4) { arg_array->setAttr(24, rewrite_args->args, RewriterVar::SetattrType::REF_USED); + additional_uses.push_back(rewrite_args->args); + } if (S == CXX) - rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)astInterpretHelper, arg_vec) - ->setType(RefType::OWNED); + rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)astInterpretHelper, arg_vec, {}, + additional_uses)->setType(RefType::OWNED); else - rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)astInterpretHelperCapi, arg_vec) - ->setType(RefType::OWNED); - - if (num_output_args >= 1) - rewrite_args->arg1->refUsed(); - if (num_output_args >= 2) - rewrite_args->arg2->refUsed(); - if (num_output_args >= 3) - rewrite_args->arg3->refUsed(); - if (num_output_args >= 4) - rewrite_args->args->refUsed(); + rewrite_args->out_rtn = rewrite_args->rewriter->call(true, (void*)astInterpretHelperCapi, arg_vec, + {}, additional_uses)->setType(RefType::OWNED); } rewrite_args->out_success = true; From 9fd4924fef35fb9c5af4f84ca57b4f240bfb2fab Mon Sep 17 00:00:00 2001 From: Marius Wachtler Date: Sat, 4 Jun 2016 19:08:00 +0100 Subject: [PATCH 10/10] bjit: enable MAP_32BIT --- src/codegen/baseline_jit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen/baseline_jit.h b/src/codegen/baseline_jit.h index 3f551e6e1..f1744486a 100644 --- a/src/codegen/baseline_jit.h +++ b/src/codegen/baseline_jit.h @@ -25,7 +25,7 @@ namespace pyston { // passes MAP_32BIT to mmap when allocating the memory for the bjit code. // it's nice for inspecting the generated asm because the debugger is able to show the name of called C/C++ functions -#define ENABLE_BASELINEJIT_MAP_32BIT 0 +#define ENABLE_BASELINEJIT_MAP_32BIT 1 #define ENABLE_BASELINEJIT_ICS 1 class AST_stmt;