@@ -3352,6 +3352,58 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1,
33523352    return  phi;
33533353}
33543354
3355+ struct  egal_desc  {
3356+     size_t  offset;
3357+     size_t  nrepeats;
3358+     size_t  data_bytes;
3359+     size_t  padding_bytes;
3360+ };
3361+ 
3362+ template  <typename  callback>
3363+ static  size_t  emit_masked_bits_compare (callback &emit_desc, jl_datatype_t  *aty, egal_desc ¤t_desc)
3364+ {
3365+     //  Memcmp, but with masked padding
3366+     size_t  data_bytes = 0 ;
3367+     size_t  padding_bytes = 0 ;
3368+     size_t  nfields = jl_datatype_nfields (aty);
3369+     size_t  total_size = jl_datatype_size (aty);
3370+     for  (size_t  i = 0 ; i < nfields; ++i) {
3371+         size_t  offset = jl_field_offset (aty, i);
3372+         size_t  fend = i == nfields - 1  ? total_size : jl_field_offset (aty, i + 1 );
3373+         size_t  fsz = jl_field_size (aty, i);
3374+         jl_datatype_t  *fty = (jl_datatype_t *)jl_field_type (aty, i);
3375+         if  (jl_field_isptr (aty, i) || !fty->layout ->flags .haspadding ) {
3376+             //  The field has no internal padding
3377+             data_bytes += fsz;
3378+             if  (offset + fsz == fend) {
3379+                 //  The field has no padding after. Merge this into the current
3380+                 //  comparison range and go to next field.
3381+             } else  {
3382+                 padding_bytes = fend - offset - fsz;
3383+                 //  Found padding. Either merge this into the current comparison
3384+                 //  range, or emit the old one and start a new one.
3385+                 if  (current_desc.data_bytes  == data_bytes &&
3386+                         current_desc.padding_bytes  == padding_bytes) {
3387+                     //  Same as the previous range, just note that down, so we
3388+                     //  emit this as a loop.
3389+                     current_desc.nrepeats  += 1 ;
3390+                 } else  {
3391+                     if  (current_desc.nrepeats  != 0 )
3392+                         emit_desc (current_desc);
3393+                     current_desc.nrepeats  = 1 ;
3394+                     current_desc.data_bytes  = data_bytes;
3395+                     current_desc.padding_bytes  = padding_bytes;
3396+                 }
3397+                 data_bytes = 0 ;
3398+             }
3399+         } else  {
3400+             //  The field may have internal padding. Recurse this.
3401+             data_bytes += emit_masked_bits_compare (emit_desc, fty, current_desc);
3402+         }
3403+     }
3404+     return  data_bytes;
3405+ }
3406+ 
33553407static  Value *emit_bits_compare (jl_codectx_t  &ctx, jl_cgval_t  arg1, jl_cgval_t  arg2)
33563408{
33573409    ++EmittedBitsCompares;
@@ -3390,7 +3442,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
33903442    if  (at->isAggregateType ()) { //  Struct or Array
33913443        jl_datatype_t  *sty = (jl_datatype_t *)arg1.typ ;
33923444        size_t  sz = jl_datatype_size (sty);
3393-         if  (sz > 512  && !sty->layout ->flags .haspadding ) {
3445+         if  (sz > 512  && !sty->layout ->flags .haspadding  && sty-> layout -> flags . isbitsegal ) {
33943446            Value *varg1 = arg1.ispointer () ? data_pointer (ctx, arg1) :
33953447                value_to_pointer (ctx, arg1).V ;
33963448            Value *varg2 = arg2.ispointer () ? data_pointer (ctx, arg2) :
@@ -3427,6 +3479,89 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
34273479            }
34283480            return  ctx.builder .CreateICmpEQ (answer, ConstantInt::get (getInt32Ty (ctx.builder .getContext ()), 0 ));
34293481        }
3482+         else  if  (sz > 512  && jl_struct_try_layout (sty) && sty->layout ->flags .isbitsegal ) {
3483+             Type *TInt8 = getInt8Ty (ctx.builder .getContext ());
3484+             Type *TpInt8 = getInt8PtrTy (ctx.builder .getContext ());
3485+             Type *TInt1 = getInt1Ty (ctx.builder .getContext ());
3486+             Value *varg1 = arg1.ispointer () ? data_pointer (ctx, arg1) :
3487+                 value_to_pointer (ctx, arg1).V ;
3488+             Value *varg2 = arg2.ispointer () ? data_pointer (ctx, arg2) :
3489+                 value_to_pointer (ctx, arg2).V ;
3490+             varg1 = emit_pointer_from_objref (ctx, varg1);
3491+             varg2 = emit_pointer_from_objref (ctx, varg2);
3492+             varg1 = emit_bitcast (ctx, varg1, TpInt8);
3493+             varg2 = emit_bitcast (ctx, varg2, TpInt8);
3494+ 
3495+             Value *answer = nullptr ;
3496+             auto  emit_desc = [&](egal_desc desc) {
3497+                 Value *ptr1 = varg1;
3498+                 Value *ptr2 = varg2;
3499+                 if  (desc.offset  != 0 ) {
3500+                     ptr1 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, ptr1, desc.offset );
3501+                     ptr2 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, ptr2, desc.offset );
3502+                 }
3503+ 
3504+                 Value *new_ptr1 = ptr1;
3505+                 Value *endptr1 = nullptr ;
3506+                 BasicBlock *postBB = nullptr ;
3507+                 BasicBlock *loopBB = nullptr ;
3508+                 PHINode *answerphi = nullptr ;
3509+                 if  (desc.nrepeats  != 1 ) {
3510+                     //  Set up loop
3511+                     endptr1 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, ptr1, desc.nrepeats  * (desc.data_bytes  + desc.padding_bytes ));;
3512+ 
3513+                     BasicBlock *currBB = ctx.builder .GetInsertBlock ();
3514+                     loopBB = BasicBlock::Create (ctx.builder .getContext (), " egal_loop" f );
3515+                     postBB = BasicBlock::Create (ctx.builder .getContext (), " post" f );
3516+                     ctx.builder .CreateBr (loopBB);
3517+ 
3518+                     ctx.builder .SetInsertPoint (loopBB);
3519+                     answerphi = ctx.builder .CreatePHI (TInt1, 2 );
3520+                     answerphi->addIncoming (answer ? answer : ConstantInt::get (TInt1, 1 ), currBB);
3521+                     answer = answerphi;
3522+ 
3523+                     PHINode *itr1 = ctx.builder .CreatePHI (ptr1->getType (), 2 );
3524+                     PHINode *itr2 = ctx.builder .CreatePHI (ptr2->getType (), 2 );
3525+ 
3526+                     new_ptr1 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, itr1, desc.data_bytes  + desc.padding_bytes );
3527+                     itr1->addIncoming (ptr1, currBB);
3528+                     itr1->addIncoming (new_ptr1, loopBB);
3529+ 
3530+                     Value *new_ptr2 = ctx.builder .CreateConstInBoundsGEP1_32 (TInt8, itr2, desc.data_bytes  + desc.padding_bytes );
3531+                     itr2->addIncoming (ptr2, currBB);
3532+                     itr2->addIncoming (new_ptr2, loopBB);
3533+ 
3534+                     ptr1 = itr1;
3535+                     ptr2 = itr2;
3536+                 }
3537+ 
3538+                 //  Emit memcmp. TODO: LLVM has a pass to expand this for additional
3539+                 //  performance.
3540+                 Value *this_answer = ctx.builder .CreateCall (prepare_call (memcmp_func),
3541+                     { ptr1,
3542+                       ptr2,
3543+                       ConstantInt::get (ctx.types ().T_size , desc.data_bytes ) });
3544+                 this_answer = ctx.builder .CreateICmpEQ (this_answer, ConstantInt::get (getInt32Ty (ctx.builder .getContext ()), 0 ));
3545+                 answer = answer ? ctx.builder .CreateAnd (answer, this_answer) : this_answer;
3546+                 if  (endptr1) {
3547+                     answerphi->addIncoming (answer, loopBB);
3548+                     Value *loopend = ctx.builder .CreateICmpEQ (new_ptr1, endptr1);
3549+                     ctx.builder .CreateCondBr (loopend, postBB, loopBB);
3550+                     ctx.builder .SetInsertPoint (postBB);
3551+                 }
3552+             };
3553+             egal_desc current_desc = {0 };
3554+             size_t  trailing_data_bytes = emit_masked_bits_compare (emit_desc, sty, current_desc);
3555+             assert (current_desc.nrepeats  != 0 );
3556+             emit_desc (current_desc);
3557+             if  (trailing_data_bytes != 0 ) {
3558+                 current_desc.nrepeats  = 1 ;
3559+                 current_desc.data_bytes  = trailing_data_bytes;
3560+                 current_desc.padding_bytes  = 0 ;
3561+                 emit_desc (current_desc);
3562+             }
3563+             return  answer;
3564+         }
34303565        else  {
34313566            jl_svec_t  *types = sty->types ;
34323567            Value *answer = ConstantInt::get (getInt1Ty (ctx.builder .getContext ()), 1 );
0 commit comments