@@ -3358,6 +3358,58 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1,
33583358 return phi;
33593359}
33603360
3361+ struct egal_desc {
3362+ size_t offset;
3363+ size_t nrepeats;
3364+ size_t data_bytes;
3365+ size_t padding_bytes;
3366+ };
3367+
3368+ template <typename callback>
3369+ static size_t emit_masked_bits_compare (callback &emit_desc, jl_datatype_t *aty, egal_desc ¤t_desc)
3370+ {
3371+ // Memcmp, but with masked padding
3372+ size_t data_bytes = 0 ;
3373+ size_t padding_bytes = 0 ;
3374+ size_t nfields = jl_datatype_nfields (aty);
3375+ size_t total_size = jl_datatype_size (aty);
3376+ for (size_t i = 0 ; i < nfields; ++i) {
3377+ size_t offset = jl_field_offset (aty, i);
3378+ size_t fend = i == nfields - 1 ? total_size : jl_field_offset (aty, i + 1 );
3379+ size_t fsz = jl_field_size (aty, i);
3380+ jl_datatype_t *fty = (jl_datatype_t *)jl_field_type (aty, i);
3381+ if (jl_field_isptr (aty, i) || !fty->layout ->flags .haspadding ) {
3382+ // The field has no internal padding
3383+ data_bytes += fsz;
3384+ if (offset + fsz == fend) {
3385+ // The field has no padding after. Merge this into the current
3386+ // comparison range and go to next field.
3387+ } else {
3388+ padding_bytes = fend - offset - fsz;
3389+ // Found padding. Either merge this into the current comparison
3390+ // range, or emit the old one and start a new one.
3391+ if (current_desc.data_bytes == data_bytes &&
3392+ current_desc.padding_bytes == padding_bytes) {
3393+ // Same as the previous range, just note that down, so we
3394+ // emit this as a loop.
3395+ current_desc.nrepeats += 1 ;
3396+ } else {
3397+ if (current_desc.nrepeats != 0 )
3398+ emit_desc (current_desc);
3399+ current_desc.nrepeats = 1 ;
3400+ current_desc.data_bytes = data_bytes;
3401+ current_desc.padding_bytes = padding_bytes;
3402+ }
3403+ data_bytes = 0 ;
3404+ }
3405+ } else {
3406+ // The field may have internal padding. Recurse this.
3407+ data_bytes += emit_masked_bits_compare (emit_desc, fty, current_desc);
3408+ }
3409+ }
3410+ return data_bytes;
3411+ }
3412+
33613413static Value *emit_bits_compare (jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2)
33623414{
33633415 ++EmittedBitsCompares;
@@ -3396,7 +3448,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
33963448 if (at->isAggregateType ()) { // Struct or Array
33973449 jl_datatype_t *sty = (jl_datatype_t *)arg1.typ ;
33983450 size_t sz = jl_datatype_size (sty);
3399- if (sz > 512 && !sty->layout ->flags .haspadding ) {
3451+ if (sz > 512 && !sty->layout ->flags .haspadding && sty-> layout -> flags . isbitsegal ) {
34003452 Value *varg1 = arg1.ispointer () ? data_pointer (ctx, arg1) :
34013453 value_to_pointer (ctx, arg1).V ;
34023454 Value *varg2 = arg2.ispointer () ? data_pointer (ctx, arg2) :
@@ -3433,6 +3485,89 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
34333485 }
34343486 return ctx.builder .CreateICmpEQ (answer, ConstantInt::get (getInt32Ty (ctx.builder .getContext ()), 0 ));
34353487 }
3488+ else if (sz > 512 && jl_struct_try_layout (sty) && sty->layout ->flags .isbitsegal ) {
3489+ Type *TInt8 = getInt8Ty (ctx.builder .getContext ());
3490+ Type *TpInt8 = getInt8PtrTy (ctx.builder .getContext ());
3491+ Type *TInt1 = getInt1Ty (ctx.builder .getContext ());
3492+ Value *varg1 = arg1.ispointer () ? data_pointer (ctx, arg1) :
3493+ value_to_pointer (ctx, arg1).V ;
3494+ Value *varg2 = arg2.ispointer () ? data_pointer (ctx, arg2) :
3495+ value_to_pointer (ctx, arg2).V ;
3496+ varg1 = emit_pointer_from_objref (ctx, varg1);
3497+ varg2 = emit_pointer_from_objref (ctx, varg2);
3498+ varg1 = emit_bitcast (ctx, varg1, TpInt8);
3499+ varg2 = emit_bitcast (ctx, varg2, TpInt8);
3500+
3501+ Value *answer = nullptr ;
3502+ auto emit_desc = [&](egal_desc desc) {
3503+ Value *ptr1 = varg1;
3504+ Value *ptr2 = varg2;
3505+ if (desc.offset != 0 ) {
3506+ ptr1 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, ptr1, desc.offset );
3507+ ptr2 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, ptr2, desc.offset );
3508+ }
3509+
3510+ Value *new_ptr1 = ptr1;
3511+ Value *endptr1 = nullptr ;
3512+ BasicBlock *postBB = nullptr ;
3513+ BasicBlock *loopBB = nullptr ;
3514+ PHINode *answerphi = nullptr ;
3515+ if (desc.nrepeats != 1 ) {
3516+ // Set up loop
3517+ endptr1 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, ptr1, desc.nrepeats * (desc.data_bytes + desc.padding_bytes ));;
3518+
3519+ BasicBlock *currBB = ctx.builder .GetInsertBlock ();
3520+ loopBB = BasicBlock::Create (ctx.builder .getContext (), " egal_loop" , ctx.f );
3521+ postBB = BasicBlock::Create (ctx.builder .getContext (), " post" , ctx.f );
3522+ ctx.builder .CreateBr (loopBB);
3523+
3524+ ctx.builder .SetInsertPoint (loopBB);
3525+ answerphi = ctx.builder .CreatePHI (TInt1, 2 );
3526+ answerphi->addIncoming (answer ? answer : ConstantInt::get (TInt1, 1 ), currBB);
3527+ answer = answerphi;
3528+
3529+ PHINode *itr1 = ctx.builder .CreatePHI (ptr1->getType (), 2 );
3530+ PHINode *itr2 = ctx.builder .CreatePHI (ptr2->getType (), 2 );
3531+
3532+ new_ptr1 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, itr1, desc.data_bytes + desc.padding_bytes );
3533+ itr1->addIncoming (ptr1, currBB);
3534+ itr1->addIncoming (new_ptr1, loopBB);
3535+
3536+ Value *new_ptr2 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, itr2, desc.data_bytes + desc.padding_bytes );
3537+ itr2->addIncoming (ptr2, currBB);
3538+ itr2->addIncoming (new_ptr2, loopBB);
3539+
3540+ ptr1 = itr1;
3541+ ptr2 = itr2;
3542+ }
3543+
3544+ // Emit memcmp. TODO: LLVM has a pass to expand this for additional
3545+ // performance.
3546+ Value *this_answer = ctx.builder .CreateCall (prepare_call (memcmp_func),
3547+ { ptr1,
3548+ ptr2,
3549+ ConstantInt::get (ctx.types ().T_size , desc.data_bytes ) });
3550+ this_answer = ctx.builder .CreateICmpEQ (this_answer, ConstantInt::get (getInt32Ty (ctx.builder .getContext ()), 0 ));
3551+ answer = answer ? ctx.builder .CreateAnd (answer, this_answer) : this_answer;
3552+ if (endptr1) {
3553+ answerphi->addIncoming (answer, loopBB);
3554+ Value *loopend = ctx.builder .CreateICmpEQ (new_ptr1, endptr1);
3555+ ctx.builder .CreateCondBr (loopend, postBB, loopBB);
3556+ ctx.builder .SetInsertPoint (postBB);
3557+ }
3558+ };
3559+ egal_desc current_desc = {0 };
3560+ size_t trailing_data_bytes = emit_masked_bits_compare (emit_desc, sty, current_desc);
3561+ assert (current_desc.nrepeats != 0 );
3562+ emit_desc (current_desc);
3563+ if (trailing_data_bytes != 0 ) {
3564+ current_desc.nrepeats = 1 ;
3565+ current_desc.data_bytes = trailing_data_bytes;
3566+ current_desc.padding_bytes = 0 ;
3567+ emit_desc (current_desc);
3568+ }
3569+ return answer;
3570+ }
34363571 else {
34373572 jl_svec_t *types = sty->types ;
34383573 Value *answer = ConstantInt::get (getInt1Ty (ctx.builder .getContext ()), 1 );
0 commit comments