From 307ff6b1414682bca12068e1b04df1dd6d461a6f Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 1 May 2017 11:06:09 -0400 Subject: [PATCH 1/6] Don't overflow the internal datatype count. Change the type of the count to be a size_t (it does not alter the total size of the internal structures, so has no impact on the ABI). Signed-off-by: George Bosilca --- opal/datatype/opal_convertor.c | 4 ++-- opal/datatype/opal_datatype_clone.c | 3 +++ opal/datatype/opal_datatype_internal.h | 19 +++++++++++++------ opal/datatype/opal_datatype_optimize.c | 9 +++++---- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index e555e4df837..e4d939fa3f2 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -714,8 +714,8 @@ void opal_datatype_dump_stack( const dt_stack_t* pStack, int stack_pos, opal_output( 0, "%d: pos %d count %d disp %ld ", stack_pos, pStack[stack_pos].index, (int)pStack[stack_pos].count, (long)pStack[stack_pos].disp ); if( pStack->index != -1 ) - opal_output( 0, "\t[desc count %d disp %ld extent %ld]\n", - pDesc[pStack[stack_pos].index].elem.count, + opal_output( 0, "\t[desc count %lu disp %ld extent %ld]\n", + (unsigned long)pDesc[pStack[stack_pos].index].elem.count, (long)pDesc[pStack[stack_pos].index].elem.disp, (long)pDesc[pStack[stack_pos].index].elem.extent ); else diff --git a/opal/datatype/opal_datatype_clone.c b/opal/datatype/opal_datatype_clone.c index 05f57c88cd8..056c7f3f5c0 100644 --- a/opal/datatype/opal_datatype_clone.c +++ b/opal/datatype/opal_datatype_clone.c @@ -61,6 +61,9 @@ int32_t opal_datatype_clone( const opal_datatype_t * src_type, opal_datatype_t * dest_type->opt_desc.used = src_type->opt_desc.used; memcpy( dest_type->opt_desc.desc, src_type->opt_desc.desc, desc_length * sizeof(dt_elem_desc_t) ); } + } else { + assert( NULL == dest_type->opt_desc.desc ); + assert( 0 == dest_type->opt_desc.length ); } } dest_type->id = src_type->id; /* preserve the default id. This allow us to diff --git a/opal/datatype/opal_datatype_internal.h b/opal/datatype/opal_datatype_internal.h index ab4d1b2bc6b..963c9036d3f 100644 --- a/opal/datatype/opal_datatype_internal.h +++ b/opal/datatype/opal_datatype_internal.h @@ -155,10 +155,10 @@ typedef struct ddt_elem_id_description ddt_elem_id_description; */ struct ddt_elem_desc { ddt_elem_id_description common; /**< basic data description and flags */ - uint32_t count; /**< number of blocks */ + size_t count; /**< number of blocks */ uint32_t blocklen; /**< number of elements on each block */ - ptrdiff_t extent; /**< extent of each block (in bytes) */ - ptrdiff_t disp; /**< displacement of the first block */ + ptrdiff_t extent; /**< extent of each block (in bytes) */ + ptrdiff_t disp; /**< displacement of the first block */ }; typedef struct ddt_elem_desc ddt_elem_desc_t; @@ -175,7 +175,7 @@ struct ddt_loop_desc { uint32_t loops; /**< number of elements */ uint32_t items; /**< number of items in the loop */ size_t unused; /**< not used right now */ - ptrdiff_t extent; /**< extent of the whole loop */ + ptrdiff_t extent; /**< extent of the whole loop */ }; typedef struct ddt_loop_desc ddt_loop_desc_t; @@ -184,7 +184,7 @@ struct ddt_endloop_desc { uint32_t items; /**< number of elements */ uint32_t unused; /**< not used right now */ size_t size; /**< real size of the data in the loop */ - ptrdiff_t first_elem_disp; /**< the displacement of the first block in the loop */ + ptrdiff_t first_elem_disp; /**< the displacement of the first block in the loop */ }; typedef struct ddt_endloop_desc ddt_endloop_desc_t; @@ -214,13 +214,20 @@ union dt_elem_desc { (_place)->end_loop.unused = -1; \ } while(0) + +/** + * Create one or more elements depending on the value of _count. If the value + * is too large for the type of elem.count then use oth the elem.count and + * elem.blocklen to create it. If the number is prime then create a second + * element to account for the difference. + */ #define CREATE_ELEM( _place, _type, _flags, _count, _disp, _extent ) \ do { \ (_place)->elem.common.flags = (_flags) | OPAL_DATATYPE_FLAG_DATA; \ (_place)->elem.common.type = (_type); \ - (_place)->elem.count = (_count); \ (_place)->elem.disp = (_disp); \ (_place)->elem.extent = (_extent); \ + (_place)->elem.count = (_count); \ (_place)->elem.blocklen = 1; \ } while(0) /* diff --git a/opal/datatype/opal_datatype_optimize.c b/opal/datatype/opal_datatype_optimize.c index d5313a8b8eb..357689e1d8a 100644 --- a/opal/datatype/opal_datatype_optimize.c +++ b/opal/datatype/opal_datatype_optimize.c @@ -50,11 +50,12 @@ opal_datatype_optimize_short( opal_datatype_t* pData, dt_stack_t* pOrigStack; dt_stack_t* pStack; /* pointer to the position on the stack */ int32_t pos_desc = 0; /* actual position in the description of the derived datatype */ - int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1, last_length = 0; + int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1; int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, continuity; ptrdiff_t total_disp = 0, last_extent = 1, last_disp = 0; uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */ uint32_t i; + size_t last_length = 0; pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->btypes[OPAL_DATATYPE_LOOP]+2) ); SAVE_STACK( pStack, -1, 0, count, 0 ); @@ -98,8 +99,8 @@ opal_datatype_optimize_short( opal_datatype_t* pData, int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) ); ptrdiff_t loop_disp = pData->desc.desc[pos_desc + index].elem.disp; - continuity = ((last_disp + last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) - == (total_disp + loop_disp)); + continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) + == (total_disp + loop_disp)); if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) { /* the loop is contiguous or composed by contiguous elements with a gap */ if( loop->extent == (ptrdiff_t)end_loop->size ) { @@ -206,7 +207,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData, while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */ /* now here we have a basic datatype */ type = pData->desc.desc[pos_desc].elem.common.type; - continuity = ((last_disp + last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) + continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size) == (total_disp + pData->desc.desc[pos_desc].elem.disp)); if( (pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && continuity && From b6190800c8aaa5e4c8459cc3456d65c1886e7dcb Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Tue, 2 May 2017 01:56:03 -0400 Subject: [PATCH 2/6] Optimize the datatype creation. The internal array of counts of predefined types is now only created when needed, which is either in a heterogeneous environment, or when one call get_elements. It saves space and makes the convertor creation a little faster in some cases. Rearrange the fields in the datatype description structs. The macro OPAL_DATATYPE_INIT_PTYPES_ARRAY had a bug, and the static array was only partially created. All predefined types should have the ptypes array created and initialized. Signed-off-by: George Bosilca --- ompi/datatype/ompi_datatype_get_elements.c | 6 +- ompi/datatype/ompi_datatype_internal.h | 2 +- ompi/datatype/ompi_datatype_module.c | 19 ++--- ompi/include/ompi/memchecker.h | 3 +- opal/datatype/opal_convertor.c | 82 ++++++++++++++-------- opal/datatype/opal_convertor.h | 29 ++++++-- opal/datatype/opal_datatype.h | 27 ++++--- opal/datatype/opal_datatype_add.c | 19 ++--- opal/datatype/opal_datatype_copy.h | 2 +- opal/datatype/opal_datatype_create.c | 36 +++++----- opal/datatype/opal_datatype_dump.c | 12 +++- opal/datatype/opal_datatype_fake_stack.c | 23 ++---- opal/datatype/opal_datatype_get_count.c | 63 +++++++++++++++-- opal/datatype/opal_datatype_internal.h | 21 +++--- opal/datatype/opal_datatype_optimize.c | 8 +-- test/datatype/position_noncontig.c | 2 +- 16 files changed, 226 insertions(+), 128 deletions(-) diff --git a/ompi/datatype/ompi_datatype_get_elements.c b/ompi/datatype/ompi_datatype_get_elements.c index 0c1f8a7b842..57f51ea9260 100644 --- a/ompi/datatype/ompi_datatype_get_elements.c +++ b/ompi/datatype/ompi_datatype_get_elements.c @@ -25,6 +25,7 @@ #include "ompi/runtime/params.h" #include "ompi/datatype/ompi_datatype.h" +#include "opal/datatype/opal_datatype_internal.h" int ompi_datatype_get_elements (ompi_datatype_t *datatype, size_t ucount, size_t *count) { @@ -48,9 +49,10 @@ int ompi_datatype_get_elements (ompi_datatype_t *datatype, size_t ucount, size_t there are no leftover bytes */ if (!ompi_datatype_is_predefined(datatype)) { if (0 != internal_count) { + opal_datatype_compute_ptypes(&datatype->super); /* count the basic elements in the datatype */ - for (i = 4, total = 0 ; i < OPAL_DATATYPE_MAX_PREDEFINED ; ++i) { - total += datatype->super.btypes[i]; + for (i = OPAL_DATATYPE_FIRST_TYPE, total = 0 ; i < OPAL_DATATYPE_MAX_PREDEFINED ; ++i) { + total += datatype->super.ptypes[i]; } internal_count = total * internal_count; } diff --git a/ompi/datatype/ompi_datatype_internal.h b/ompi/datatype/ompi_datatype_internal.h index f7863622c62..5bc068da12b 100644 --- a/ompi/datatype/ompi_datatype_internal.h +++ b/ompi/datatype/ompi_datatype_internal.h @@ -467,7 +467,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX .name = OPAL_DATATYPE_INIT_NAME(TYPE ## SIZE), \ .desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(TYPE ## SIZE), \ .opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(TYPE ## SIZE), \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(TYPE ## SIZE) \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY(TYPE ## SIZE) \ } #define OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE_FORTRAN( TYPE, NAME, SIZE, ALIGN, FLAGS ) \ diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index 9de36f92240..b602d92d3ea 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -384,8 +384,9 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}}; (PDST)->super.opt_desc = (PSRC)->super.opt_desc; \ (PDST)->packed_description = (PSRC)->packed_description; \ (PSRC)->packed_description = NULL; \ - memcpy( (PDST)->super.btypes, (PSRC)->super.btypes, \ - OPAL_DATATYPE_MAX_PREDEFINED * sizeof(uint32_t) ); \ + /* transfer the ptypes */ \ + (PDST)->super.ptypes = (PSRC)->super.ptypes; \ + (PSRC)->super.ptypes = NULL; \ } while(0) #define DECLARE_MPI2_COMPOSED_STRUCT_DDT( PDATA, MPIDDT, MPIDDTNAME, type1, type2, MPIType1, MPIType2, FLAGS) \ @@ -393,20 +394,20 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}}; struct { type1 v1; type2 v2; } s[2]; \ ompi_datatype_t *types[2], *ptype; \ int bLength[2] = {1, 1}; \ - ptrdiff_t base, displ[2]; \ + ptrdiff_t base, displ[2]; \ \ types[0] = (ompi_datatype_t*)ompi_datatype_basicDatatypes[MPIType1]; \ types[1] = (ompi_datatype_t*)ompi_datatype_basicDatatypes[MPIType2]; \ - base = (ptrdiff_t)(&(s[0])); \ - displ[0] = (ptrdiff_t)(&(s[0].v1)); \ + base = (ptrdiff_t)(&(s[0])); \ + displ[0] = (ptrdiff_t)(&(s[0].v1)); \ displ[0] -= base; \ - displ[1] = (ptrdiff_t)(&(s[0].v2)); \ + displ[1] = (ptrdiff_t)(&(s[0].v2)); \ displ[1] -= base; \ \ ompi_datatype_create_struct( 2, bLength, displ, types, &ptype ); \ - displ[0] = (ptrdiff_t)(&(s[1])); \ + displ[0] = (ptrdiff_t)(&(s[1])); \ displ[0] -= base; \ - if( displ[0] != (displ[1] + (ptrdiff_t)sizeof(type2)) ) \ + if( displ[0] != (displ[1] + (ptrdiff_t)sizeof(type2)) ) \ ptype->super.ub = displ[0]; /* force a new extent for the datatype */ \ ptype->super.flags |= (FLAGS); \ ptype->id = MPIDDT; \ @@ -736,7 +737,7 @@ void ompi_datatype_dump( const ompi_datatype_t* pData ) (long)pData->super.size, (int)pData->super.align, pData->super.id, (int)pData->super.desc.length, (int)pData->super.desc.used, (long)pData->super.true_lb, (long)pData->super.true_ub, (long)(pData->super.true_ub - pData->super.true_lb), (long)pData->super.lb, (long)pData->super.ub, (long)(pData->super.ub - pData->super.lb), - (int)pData->super.nbElems, (int)pData->super.btypes[OPAL_DATATYPE_LOOP], (int)pData->super.flags ); + (int)pData->super.nbElems, (int)pData->super.loops, (int)pData->super.flags ); /* dump the flags */ if( ompi_datatype_is_predefined(pData) ) { index += snprintf( buffer + index, length - index, "predefined " ); diff --git a/ompi/include/ompi/memchecker.h b/ompi/include/ompi/memchecker.h index 4d47ed0d3a1..a56f065c364 100644 --- a/ompi/include/ompi/memchecker.h +++ b/ompi/include/ompi/memchecker.h @@ -366,7 +366,8 @@ static inline int memchecker_datatype(MPI_Datatype type) opal_memchecker_base_isdefined (&type->super.opt_desc.length, sizeof(opal_datatype_count_t)); opal_memchecker_base_isdefined (&type->super.opt_desc.used, sizeof(opal_datatype_count_t)); opal_memchecker_base_isdefined (&type->super.opt_desc.desc, sizeof(dt_elem_desc_t *)); - opal_memchecker_base_isdefined (&type->super.btypes, OPAL_DATATYPE_MAX_PREDEFINED * sizeof(uint32_t)); + if( NULL != type->super.ptypes ) + opal_memchecker_base_isdefined (&type->super.ptypes, OPAL_DATATYPE_MAX_PREDEFINED * sizeof(size_t)); opal_memchecker_base_isdefined (&type->id, sizeof(int32_t)); opal_memchecker_base_isdefined (&type->d_f_to_c_index, sizeof(int32_t)); diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index e4d939fa3f2..62e201996b2 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -43,9 +43,6 @@ CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) ) #endif -extern int opal_convertor_create_stack_with_pos_general( opal_convertor_t* convertor, - int starting_point, const int* sizes ); - static void opal_convertor_construct( opal_convertor_t* convertor ) { convertor->pStack = convertor->static_stack; @@ -447,32 +444,62 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, return rc; } +static size_t +opal_datatype_compute_remote_size( const opal_datatype_t* pData, + const size_t* sizes ) +{ + uint32_t typeMask = pData->bdt_used; + size_t length = 0; + + if( OPAL_UNLIKELY(NULL == pData->ptypes) ) { + /* Allocate and fill the array of types used in the datatype description */ + opal_datatype_compute_ptypes( (opal_datatype_t*)pData ); + } + + for( int i = OPAL_DATATYPE_FIRST_TYPE; typeMask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { + if( typeMask & ((uint32_t)1 << i) ) { + length += (pData->ptypes[i] * sizes[i]); + typeMask ^= ((uint32_t)1 << i); + } + } + return length; +} + +/** + * Compute the remote size based on the datatype and count. Assume that the sizes + * are the sizes corresponding to the remote architecture. + */ +size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor ) +{ + if( pConvertor->flags & CONVERTOR_HOMOGENEOUS ) { + pConvertor->remote_size = pConvertor->local_size; + } else { + if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) { + /* This is for a single datatype, we must update it with the count */ + pConvertor->remote_size = opal_datatype_compute_remote_size(pConvertor->pDesc, + pConvertor->master->remote_sizes); + pConvertor->remote_size *= pConvertor->count; + pConvertor->flags |= CONVERTOR_HAS_REMOTE_SIZE; + } + } + pConvertor->flags |= CONVERTOR_HAS_REMOTE_SIZE; + return pConvertor->remote_size; +} + /** * Compute the remote size. If necessary remove the homogeneous flag * and redirect the convertor description toward the non-optimized * datatype representation. */ -#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \ -{ \ - if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \ - opal_convertor_master_t* master; \ - int i; \ - uint32_t mask = datatype->bdt_used; \ - convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \ - master = convertor->master; \ - convertor->remote_size = 0; \ - for( i = OPAL_DATATYPE_FIRST_TYPE; mask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { \ - if( mask & ((uint32_t)1 << i) ) { \ - convertor->remote_size += (datatype->btypes[i] * \ - master->remote_sizes[i]); \ - mask ^= ((uint32_t)1 << i); \ - } \ - } \ - convertor->remote_size *= convertor->count; \ - convertor->use_desc = &(datatype->desc); \ - } \ -} +#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype) \ + do { \ + if( datatype->bdt_used & convertor->master->hetero_mask ) { \ + convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \ + convertor->use_desc = &(datatype->desc); \ + } \ + opal_convertor_compute_remote_size( (convertor) ); \ + } while(0) /** * This macro will initialize a convertor based on a previously created @@ -483,8 +510,6 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, */ #define OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ) \ { \ - uint32_t bdt_mask; \ - \ /* If the data is empty we just mark the convertor as \ * completed. With this flag set the pack and unpack functions \ * will not do anything. \ @@ -516,9 +541,8 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, } \ } \ \ - bdt_mask = datatype->bdt_used & convertor->master->hetero_mask; \ - OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE( convertor, datatype, \ - bdt_mask ); \ + assert( (convertor)->pDesc == (datatype) ); \ + OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE( convertor, datatype ); \ assert( NULL != convertor->use_desc->desc ); \ /* For predefined datatypes (contiguous) do nothing more */ \ /* if checksum is enabled then always continue */ \ @@ -530,7 +554,7 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, } \ convertor->flags &= ~CONVERTOR_NO_OP; \ { \ - uint32_t required_stack_length = datatype->btypes[OPAL_DATATYPE_LOOP] + 1; \ + uint32_t required_stack_length = datatype->loops + 1; \ \ if( required_stack_length > convertor->stack_size ) { \ assert(convertor->pStack == convertor->static_stack); \ diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h index 716c336622d..1a532821bc0 100644 --- a/opal/datatype/opal_convertor.h +++ b/opal/datatype/opal_convertor.h @@ -54,6 +54,7 @@ BEGIN_C_DECLS #define CONVERTOR_STATE_ALLOC 0x04000000 #define CONVERTOR_COMPLETED 0x08000000 #define CONVERTOR_CUDA_UNIFIED 0x10000000 +#define CONVERTOR_HAS_REMOTE_SIZE 0x20000000 union dt_elem_desc; typedef struct opal_convertor_t opal_convertor_t; @@ -72,7 +73,7 @@ struct dt_stack_t { int32_t index; /**< index in the element description */ int16_t type; /**< the type used for the last pack/unpack (original or OPAL_DATATYPE_UINT1) */ size_t count; /**< number of times we still have to do it */ - ptrdiff_t disp; /**< actual displacement depending on the count field */ + ptrdiff_t disp; /**< actual displacement depending on the count field */ }; typedef struct dt_stack_t dt_stack_t; @@ -186,9 +187,16 @@ static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConv return 1; } +/** + * Update the size of the remote datatype representation. The size will + * depend on the configuration of the master convertor. In homogeneous + * environments, the local and remote sizes are identical. + */ +size_t +opal_convertor_compute_remote_size( opal_convertor_t* pConv ); -/* - * +/** + * Return the local size of the convertor (count times the size of the datatype). */ static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv, size_t* pSize ) @@ -197,16 +205,24 @@ static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv } -/* - * +/** + * Return the remote size of the convertor (count times the remote size of the + * datatype). On homogeneous environments the local and remote sizes are + * identical. */ static inline void opal_convertor_get_unpacked_size( const opal_convertor_t* pConv, size_t* pSize ) { + if( pConv->flags & CONVERTOR_HOMOGENEOUS ) { + *pSize = pConv->local_size; + return; + } + if( 0 == (CONVERTOR_HAS_REMOTE_SIZE & pConv->flags) ) { + opal_convertor_compute_remote_size( (opal_convertor_t*)pConv); + } *pSize = pConv->remote_size; } - /** * Return the current absolute position of the next pack/unpack. This function is * mostly useful for contiguous datatypes, when we need to get the pointer to the @@ -279,6 +295,7 @@ opal_convertor_raw( opal_convertor_t* convertor, /* [IN/OUT] */ uint32_t* iov_count, /* [IN/OUT] */ size_t* length ); /* [OUT] */ + /* * Upper level does not need to call the _nocheck function directly. */ diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index 74349b61463..53aac7e6675 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -53,9 +53,10 @@ BEGIN_C_DECLS #endif /* * No more than this number of _Basic_ datatypes in C/CPP or Fortran - * are supported (in order to not change setup and usage of btypes). + * are supported (in order to not change setup and usage of the predefined + * datatypes). * - * XXX TODO Adapt to whatever the OMPI-layer needs + * BEWARE: This constant should reflect whatever the OMPI-layer needs. */ #define OPAL_DATATYPE_MAX_SUPPORTED 47 @@ -108,13 +109,14 @@ struct opal_datatype_t { uint32_t bdt_used; /**< bitset of which basic datatypes are used in the data description */ size_t size; /**< total size in bytes of the memory used by the data if the data is put on a contiguous buffer */ - ptrdiff_t true_lb; /**< the true lb of the data without user defined lb and ub */ - ptrdiff_t true_ub; /**< the true ub of the data without user defined lb and ub */ - ptrdiff_t lb; /**< lower bound in memory */ - ptrdiff_t ub; /**< upper bound in memory */ + ptrdiff_t true_lb; /**< the true lb of the data without user defined lb and ub */ + ptrdiff_t true_ub; /**< the true ub of the data without user defined lb and ub */ + ptrdiff_t lb; /**< lower bound in memory */ + ptrdiff_t ub; /**< upper bound in memory */ /* --- cacheline 1 boundary (64 bytes) --- */ size_t nbElems; /**< total number of elements inside the datatype */ uint32_t align; /**< data should be aligned to */ + uint32_t loops; /**< number of loops on the iternal type stack */ /* Attribute fields */ char name[OPAL_MAX_OBJECT_NAME]; /**< name of the datatype */ @@ -123,11 +125,12 @@ struct opal_datatype_t { dt_type_desc_t opt_desc; /**< short description of the data used when conversion is useless or in the send case (without conversion) */ - uint32_t btypes[OPAL_DATATYPE_MAX_SUPPORTED]; - /**< basic elements count used to compute the size of the - datatype for remote nodes. The length of the array is dependent on - the maximum number of datatypes of all top layers. - Reason being is that Fortran is not at the OPAL layer. */ + size_t *ptypes; /**< array of basic predefined types that facilitate the computing + of the remote size in heterogeneous environments. The length of the + array is dependent on the maximum number of predefined datatypes of + all language interfaces (because Fortran is not known at the OPAL + layer). This field should never be initialized in homogeneous + environments */ /* --- cacheline 5 boundary (320 bytes) was 32-36 bytes ago --- */ /* size: 352, cachelines: 6, members: 15 */ @@ -281,6 +284,8 @@ OPAL_DECLSPEC int32_t opal_datatype_copy_content_same_ddt( const opal_datatype_t* pData, int32_t count, char* pDestBuf, char* pSrcBuf ); +OPAL_DECLSPEC int opal_datatype_compute_ptypes( opal_datatype_t* datatype ); + OPAL_DECLSPEC const opal_datatype_t* opal_datatype_match_size( int size, uint16_t datakind, uint16_t datalang ); diff --git a/opal/datatype/opal_datatype_add.c b/opal/datatype/opal_datatype_add.c index 18a90d322da..2aaefa5d1d8 100644 --- a/opal/datatype/opal_datatype_add.c +++ b/opal/datatype/opal_datatype_add.c @@ -279,7 +279,8 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA * predefined non contiguous datatypes (like MPI_SHORT_INT). */ if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) { - pdtBase->btypes[pdtAdd->id] += count; + if( NULL != pdtBase->ptypes ) + pdtBase->ptypes[pdtAdd->id] += count; pLast->elem.common.type = pdtAdd->id; pLast->elem.count = count; pLast->elem.disp = disp; @@ -291,13 +292,13 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA } } else { /* keep trace of the total number of basic datatypes in the datatype definition */ - pdtBase->btypes[OPAL_DATATYPE_LOOP] += pdtAdd->btypes[OPAL_DATATYPE_LOOP]; - pdtBase->btypes[OPAL_DATATYPE_END_LOOP] += pdtAdd->btypes[OPAL_DATATYPE_END_LOOP]; - pdtBase->btypes[OPAL_DATATYPE_LB] |= pdtAdd->btypes[OPAL_DATATYPE_LB]; - pdtBase->btypes[OPAL_DATATYPE_UB] |= pdtAdd->btypes[OPAL_DATATYPE_UB]; - for( i = 4; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) - if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]); - + pdtBase->loops += pdtAdd->loops; + pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_LB); + pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_UB); + if( (NULL != pdtBase->ptypes) && (NULL != pdtAdd->ptypes) ) { + for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) + if( pdtAdd->ptypes[i] != 0 ) pdtBase->ptypes[i] += (count * pdtAdd->ptypes[i]); + } if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) && (extent == pdtAdd->desc.desc[0].elem.extent) ){ pLast->elem = pdtAdd->desc.desc[0].elem; @@ -312,7 +313,7 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA pLoop = pLast; CREATE_LOOP_START( pLast, count, pdtAdd->desc.used + 1, extent, (pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED)) ); - pdtBase->btypes[OPAL_DATATYPE_LOOP] += 2; + pdtBase->loops += 2; pdtBase->desc.used += 2; pLast++; } diff --git a/opal/datatype/opal_datatype_copy.h b/opal/datatype/opal_datatype_copy.h index d2e6a9b5199..e4c006cb14b 100644 --- a/opal/datatype/opal_datatype_copy.h +++ b/opal/datatype/opal_datatype_copy.h @@ -179,7 +179,7 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i return 0; /* completed */ } - pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 1) ); + pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 1) ); pStack->count = count; pStack->index = -1; pStack->disp = 0; diff --git a/opal/datatype/opal_datatype_create.c b/opal/datatype/opal_datatype_create.c index e64e1f04190..b009c27d6ec 100644 --- a/opal/datatype/opal_datatype_create.c +++ b/opal/datatype/opal_datatype_create.c @@ -30,8 +30,6 @@ static void opal_datatype_construct( opal_datatype_t* pData ) { - int i; - pData->size = 0; pData->flags = OPAL_DATATYPE_FLAG_CONTIGUOUS; pData->id = 0; @@ -53,32 +51,36 @@ static void opal_datatype_construct( opal_datatype_t* pData ) pData->opt_desc.length = 0; pData->opt_desc.used = 0; - for( i = 0; i < OPAL_DATATYPE_MAX_SUPPORTED; i++ ) - pData->btypes[i] = 0; + pData->ptypes = NULL; + pData->loops = 0; } static void opal_datatype_destruct( opal_datatype_t* datatype ) { + /** + * As the default description and the optimized description might point to the + * same data description we should start by cleaning the optimized description. + */ + if( NULL != datatype->opt_desc.desc ) { + if( datatype->opt_desc.desc != datatype->desc.desc ) + free( datatype->opt_desc.desc ); + datatype->opt_desc.length = 0; + datatype->opt_desc.used = 0; + datatype->opt_desc.desc = NULL; + } if (!opal_datatype_is_predefined(datatype)) { - if( datatype->desc.desc != NULL ) { + if( NULL != datatype->desc.desc ) { free( datatype->desc.desc ); datatype->desc.length = 0; datatype->desc.used = 0; + datatype->desc.desc = NULL; } } - if( datatype->opt_desc.desc != NULL ) { - if( datatype->opt_desc.desc != datatype->desc.desc ) - free( datatype->opt_desc.desc ); - datatype->opt_desc.length = 0; - datatype->opt_desc.used = 0; - datatype->opt_desc.desc = NULL; + /* dont free the ptypes of predefined types (it was not dynamically allocated) */ + if( (NULL != datatype->ptypes) && (datatype->id >= OPAL_DATATYPE_MAX_PREDEFINED) ) { + free(datatype->ptypes); + datatype->ptypes = NULL; } - /** - * As the default description and the optimized description can point to the - * same memory location we should keep the default location pointer until we - * know what we should do with the optimized description. - */ - datatype->desc.desc = NULL; /* make sure the name is set to empty */ datatype->name[0] = '\0'; diff --git a/opal/datatype/opal_datatype_dump.c b/opal/datatype/opal_datatype_dump.c index 30575674196..d60721a06bf 100644 --- a/opal/datatype/opal_datatype_dump.c +++ b/opal/datatype/opal_datatype_dump.c @@ -42,8 +42,14 @@ int opal_datatype_contain_basic_datatypes( const opal_datatype_t* pData, char* p if( pData->flags & OPAL_DATATYPE_FLAG_USER_LB ) index += snprintf( ptr, length - index, "lb " ); if( pData->flags & OPAL_DATATYPE_FLAG_USER_UB ) index += snprintf( ptr + index, length - index, "ub " ); for( i = 0; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) { - if( pData->bdt_used & mask ) - index += snprintf( ptr + index, length - index, "%s ", opal_datatype_basicDatatypes[i]->name ); + if( pData->bdt_used & mask ) { + if( NULL == pData->ptypes ) { + index += snprintf( ptr + index, length - index, "%s:* ", opal_datatype_basicDatatypes[i]->name ); + } else { + index += snprintf( ptr + index, length - index, "%s:%lu ", opal_datatype_basicDatatypes[i]->name, + pData->ptypes[i]); + } + } mask <<= 1; if( length <= (size_t)index ) break; } @@ -115,7 +121,7 @@ void opal_datatype_dump( const opal_datatype_t* pData ) (void*)pData, pData->name, (long)pData->size, (int)pData->align, pData->id, (int)pData->desc.length, (int)pData->desc.used, (long)pData->true_lb, (long)pData->true_ub, (long)(pData->true_ub - pData->true_lb), (long)pData->lb, (long)pData->ub, (long)(pData->ub - pData->lb), - (int)pData->nbElems, (int)pData->btypes[OPAL_DATATYPE_LOOP], (int)pData->flags ); + (int)pData->nbElems, (int)pData->loops, (int)pData->flags ); /* dump the flags */ if( pData->flags == OPAL_DATATYPE_FLAG_PREDEFINED ) index += snprintf( buffer + index, length - index, "predefined " ); diff --git a/opal/datatype/opal_datatype_fake_stack.c b/opal/datatype/opal_datatype_fake_stack.c index 8259f3d0fa5..389804cd647 100644 --- a/opal/datatype/opal_datatype_fake_stack.c +++ b/opal/datatype/opal_datatype_fake_stack.c @@ -6,7 +6,7 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2017 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. @@ -34,21 +34,8 @@ #include "opal/datatype/opal_datatype_internal.h" -int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, - size_t starting_point, - const size_t* sizes ); - -static inline size_t -opal_convertor_compute_remote_size( const opal_datatype_t* pData, const size_t* sizes ) -{ - uint32_t i; - size_t length = 0; - - for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) { - length += (pData->btypes[i] * sizes[i]); - } - return length; -} +extern int opal_convertor_create_stack_with_pos_general( opal_convertor_t* convertor, + size_t starting_point, const size_t* sizes ); int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, size_t starting_point, const size_t* sizes ) @@ -104,7 +91,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, } /* remove from the main loop all the complete datatypes */ - remote_size = opal_convertor_compute_remote_size( pData, sizes ); + remote_size = opal_convertor_compute_remote_size( pConvertor ); count = (int32_t)(starting_point / remote_size); resting_place -= (remote_size * count); pStack->count = pConvertor->count - count; @@ -114,7 +101,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor, pStack->disp = count * (pData->ub - pData->lb) + pElems[loop_length].elem.disp; pos_desc = 0; - remoteLength = (size_t*)alloca( sizeof(size_t) * (pConvertor->pDesc->btypes[OPAL_DATATYPE_LOOP] + 1)); + remoteLength = (size_t*)alloca( sizeof(size_t) * (pConvertor->pDesc->loops + 1)); remoteLength[0] = 0; /* initial value set to ZERO */ loop_length = 0; diff --git a/opal/datatype/opal_datatype_get_count.c b/opal/datatype/opal_datatype_get_count.c index 7b539fbec81..a860d5fb41e 100644 --- a/opal/datatype/opal_datatype_get_count.c +++ b/opal/datatype/opal_datatype_get_count.c @@ -39,9 +39,9 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t /* Normally the size should be less or equal to the size of the datatype. * This function does not support a iSize bigger than the size of the datatype. */ - assert( (uint32_t)iSize <= datatype->size ); - DUMP( "dt_count_elements( %p, %d )\n", (void*)datatype, iSize ); - pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 2) ); + assert( iSize <= datatype->size ); + DUMP( "dt_count_elements( %p, %ul )\n", (void*)datatype, (unsigned long)iSize ); + pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) ); pStack->count = 1; pStack->index = -1; pStack->disp = 0; @@ -93,9 +93,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t /** * Handle all complete multiple of the datatype. */ - for( pos_desc = 4; pos_desc < OPAL_DATATYPE_MAX_PREDEFINED; pos_desc++ ) { - local_length += datatype->btypes[pos_desc]; - } + local_length = datatype->nbElems; pos_desc = count / local_length; count = count % local_length; *length = datatype->size * pos_desc; @@ -104,7 +102,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t } DUMP( "dt_set_element_count( %p, %d )\n", (void*)datatype, count ); - pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 2) ); + pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) ); pStack->count = 1; pStack->index = -1; pStack->disp = 0; @@ -143,3 +141,54 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t } } +/** + * Compute the array of counts of the predefined datatypes contained in + * the datatype. We have no simple way to create this array, as we only + * sporadically need it (when we deal with heterogeneous environments or + * when we use get_element_count). Thus, we will pay the cost once per + * datatype, but we will only update this array if/when needed. + */ +int opal_datatype_compute_ptypes( opal_datatype_t* datatype ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + uint32_t pos_desc; /* actual position in the description of the derived datatype */ + ssize_t nbElems = 0, stack_pos = 0; + dt_elem_desc_t* pElems; + + if( NULL != datatype->ptypes ) return 0; + datatype->ptypes = (size_t*)calloc(OPAL_DATATYPE_MAX_SUPPORTED, sizeof(size_t)); + + DUMP( "opal_datatype_compute_ptypes( %p )\n", (void*)datatype ); + pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) ); + pStack->count = 1; + pStack->index = -1; + pStack->disp = 0; + pElems = datatype->desc.desc; + pos_desc = 0; + + while( 1 ) { /* loop forever the exit condition is on the last OPAL_DATATYPE_END_LOOP */ + if( OPAL_DATATYPE_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */ + if( --(pStack->count) == 0 ) { /* end of loop */ + stack_pos--; pStack--; + if( stack_pos == -1 ) return 0; /* completed */ + } + pos_desc = pStack->index + 1; + continue; + } + if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) { + ddt_loop_desc_t* loop = &(pElems[pos_desc].loop); + do { + PUSH_STACK( pStack, stack_pos, pos_desc, OPAL_DATATYPE_LOOP, loop->loops, 0 ); + pos_desc++; + } while( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ); /* let's start another loop */ + DDT_DUMP_STACK( pStack, stack_pos, pElems, "advance loops" ); + } + while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { + /* now here we have a basic datatype */ + datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count; + nbElems += pElems[pos_desc].elem.count; + + pos_desc++; /* advance to the next data */ + } + } +} diff --git a/opal/datatype/opal_datatype_internal.h b/opal/datatype/opal_datatype_internal.h index 963c9036d3f..f749388b4e8 100644 --- a/opal/datatype/opal_datatype_internal.h +++ b/opal/datatype/opal_datatype_internal.h @@ -155,8 +155,8 @@ typedef struct ddt_elem_id_description ddt_elem_id_description; */ struct ddt_elem_desc { ddt_elem_id_description common; /**< basic data description and flags */ - size_t count; /**< number of blocks */ uint32_t blocklen; /**< number of elements on each block */ + size_t count; /**< number of blocks */ ptrdiff_t extent; /**< extent of each block (in bytes) */ ptrdiff_t disp; /**< displacement of the first block */ }; @@ -172,8 +172,8 @@ typedef struct ddt_elem_desc ddt_elem_desc_t; */ struct ddt_loop_desc { ddt_elem_id_description common; /**< basic data description and flags */ - uint32_t loops; /**< number of elements */ uint32_t items; /**< number of items in the loop */ + uint32_t loops; /**< number of elements */ size_t unused; /**< not used right now */ ptrdiff_t extent; /**< extent of the whole loop */ }; @@ -245,8 +245,8 @@ struct opal_datatype_t; * OPAL_DATATYPE_INIT_BTYPES_ARRAY_[0-21], then order and naming would _not_ matter.... */ -#define OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE { 0 } -#define OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) { [OPAL_DATATYPE_ ## NAME] = 1 } +#define OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE NULL +#define OPAL_DATATYPE_INIT_PTYPES_ARRAY(NAME) (size_t[OPAL_DATATYPE_MAX_PREDEFINED]){ [OPAL_DATATYPE_ ## NAME] = 1, [OPAL_DATATYPE_MAX_PREDEFINED-1] = 0 } #define OPAL_DATATYPE_INIT_NAME(NAME) "OPAL_" #NAME @@ -275,7 +275,7 @@ struct opal_datatype_t; .name = OPAL_DATATYPE_INIT_NAME(NAME), \ .desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(UNAVAILABLE), \ .opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(UNAVAILABLE), \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \ } #define OPAL_DATATYPE_INITIALIZER_UNAVAILABLE( FLAGS ) \ @@ -294,7 +294,7 @@ struct opal_datatype_t; .name = OPAL_DATATYPE_INIT_NAME(EMPTY), \ .desc = OPAL_DATATYPE_INIT_DESC_NULL, \ .opt_desc = OPAL_DATATYPE_INIT_DESC_NULL, \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \ } #define OPAL_DATATYPE_INIT_BASIC_TYPE( TYPE, NAME, FLAGS ) \ @@ -310,7 +310,7 @@ struct opal_datatype_t; .name = OPAL_DATATYPE_INIT_NAME(NAME), \ .desc = OPAL_DATATYPE_INIT_DESC_NULL, \ .opt_desc = OPAL_DATATYPE_INIT_DESC_NULL, \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \ } #define OPAL_DATATYPE_INIT_BASIC_DATATYPE( TYPE, ALIGN, NAME, FLAGS ) \ @@ -326,7 +326,7 @@ struct opal_datatype_t; .name = OPAL_DATATYPE_INIT_NAME(NAME), \ .desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(NAME), \ .opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(NAME), \ - .btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) \ + .ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \ } #define OPAL_DATATYPE_INITIALIZER_LOOP(FLAGS) OPAL_DATATYPE_INIT_BASIC_TYPE( OPAL_DATATYPE_LOOP, LOOP, FLAGS ) @@ -483,7 +483,10 @@ static inline int GET_FIRST_NON_LOOP( const union dt_elem_desc* _pElem ) #define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER ) \ do { \ (ELEMENT) = &((DESCRIPTION)[(POSITION)]); \ - (COUNTER) = (ELEMENT)->elem.count; \ + if( OPAL_DATATYPE_LOOP == (ELEMENT)->elem.common.type ) \ + (COUNTER) = (ELEMENT)->loop.loops; \ + else \ + (COUNTER) = (ELEMENT)->elem.count; \ } while (0) OPAL_DECLSPEC int opal_datatype_contain_basic_datatypes( const struct opal_datatype_t* pData, char* ptr, size_t length ); diff --git a/opal/datatype/opal_datatype_optimize.c b/opal/datatype/opal_datatype_optimize.c index 357689e1d8a..8c15777cf6a 100644 --- a/opal/datatype/opal_datatype_optimize.c +++ b/opal/datatype/opal_datatype_optimize.c @@ -42,8 +42,8 @@ static int32_t opal_datatype_optimize_short( opal_datatype_t* pData, - int32_t count, - dt_type_desc_t* pTypeDesc ) + int32_t count, + dt_type_desc_t* pTypeDesc ) { dt_elem_desc_t* pElemDesc; ddt_elem_desc_t opt_elem; @@ -57,7 +57,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData, uint32_t i; size_t last_length = 0; - pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->btypes[OPAL_DATATYPE_LOOP]+2) ); + pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->loops+2) ); SAVE_STACK( pStack, -1, 0, count, 0 ); pTypeDesc->length = 2 * pData->desc.used + 1 /* for the fake OPAL_DATATYPE_END_LOOP at the end */; @@ -86,7 +86,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData, pElemDesc++; nbElems++; if( --stack_pos >= 0 ) { /* still something to do ? */ ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop); - pStartLoop->items = (pElemDesc - 1)->elem.count; + pStartLoop->items = end_loop->items; total_disp = pStack->disp; /* update the displacement position */ } pStack--; /* go down one position on the stack */ diff --git a/test/datatype/position_noncontig.c b/test/datatype/position_noncontig.c index 12a15fa47a7..01779d186d5 100644 --- a/test/datatype/position_noncontig.c +++ b/test/datatype/position_noncontig.c @@ -23,7 +23,7 @@ /** * The purpose of this test is to simulate the multi-network packing and * unpacking process. The pack operation will happens in-order while the - * will be done randomly. Therefore, before each unpack the correct + * unpack will be done randomly. Therefore, before each unpack the correct * position in the user buffer has to be set. */ From d9462680b2bf90dfb5408fe8e6bc39a13e99a662 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Mon, 8 May 2017 23:26:49 -0400 Subject: [PATCH 3/6] Fix the boundary computation. Signed-off-by: George Bosilca --- ompi/datatype/ompi_datatype_get_elements.c | 2 +- ompi/datatype/ompi_datatype_internal.h | 2 +- opal/datatype/opal_convertor.c | 2 +- opal/datatype/opal_convertor.h | 2 +- opal/datatype/opal_datatype.h | 2 +- opal/datatype/opal_datatype_add.c | 2 +- opal/datatype/opal_datatype_clone.c | 2 +- opal/datatype/opal_datatype_copy.h | 2 +- opal/datatype/opal_datatype_create.c | 2 +- opal/datatype/opal_datatype_dump.c | 2 +- opal/datatype/opal_datatype_fake_stack.c | 2 +- opal/datatype/opal_datatype_get_count.c | 14 ++++++++++---- opal/datatype/opal_datatype_internal.h | 2 +- opal/datatype/opal_datatype_optimize.c | 2 +- test/datatype/position_noncontig.c | 2 +- 15 files changed, 24 insertions(+), 18 deletions(-) diff --git a/ompi/datatype/ompi_datatype_get_elements.c b/ompi/datatype/ompi_datatype_get_elements.c index 57f51ea9260..72ac87d6df7 100644 --- a/ompi/datatype/ompi_datatype_get_elements.c +++ b/ompi/datatype/ompi_datatype_get_elements.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, diff --git a/ompi/datatype/ompi_datatype_internal.h b/ompi/datatype/ompi_datatype_internal.h index 5bc068da12b..4323f0c31e9 100644 --- a/ompi/datatype/ompi_datatype_internal.h +++ b/ompi/datatype/ompi_datatype_internal.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2009-2013 The University of Tennessee and The University + * Copyright (c) 2009-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 62e201996b2..82d1794bfc0 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, diff --git a/opal/datatype/opal_convertor.h b/opal/datatype/opal_convertor.h index 1a532821bc0..85956af88d7 100644 --- a/opal/datatype/opal_convertor.h +++ b/opal/datatype/opal_convertor.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index 53aac7e6675..519d370aac3 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2015 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, diff --git a/opal/datatype/opal_datatype_add.c b/opal/datatype/opal_datatype_add.c index 2aaefa5d1d8..8876b74e1c7 100644 --- a/opal/datatype/opal_datatype_add.c +++ b/opal/datatype/opal_datatype_add.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, diff --git a/opal/datatype/opal_datatype_clone.c b/opal/datatype/opal_datatype_clone.c index 056c7f3f5c0..fa4479982d0 100644 --- a/opal/datatype/opal_datatype_clone.c +++ b/opal/datatype/opal_datatype_clone.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, diff --git a/opal/datatype/opal_datatype_copy.h b/opal/datatype/opal_datatype_copy.h index e4c006cb14b..5dcfe2ec5d3 100644 --- a/opal/datatype/opal_datatype_copy.h +++ b/opal/datatype/opal_datatype_copy.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2012 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. diff --git a/opal/datatype/opal_datatype_create.c b/opal/datatype/opal_datatype_create.c index b009c27d6ec..0e6d49b9bd7 100644 --- a/opal/datatype/opal_datatype_create.c +++ b/opal/datatype/opal_datatype_create.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2013 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, diff --git a/opal/datatype/opal_datatype_dump.c b/opal/datatype/opal_datatype_dump.c index d60721a06bf..8ec86ee63a8 100644 --- a/opal/datatype/opal_datatype_dump.c +++ b/opal/datatype/opal_datatype_dump.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, diff --git a/opal/datatype/opal_datatype_fake_stack.c b/opal/datatype/opal_datatype_fake_stack.c index 389804cd647..d336f6cf76d 100644 --- a/opal/datatype/opal_datatype_fake_stack.c +++ b/opal/datatype/opal_datatype_fake_stack.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2017 High Performance Computing Center Stuttgart, diff --git a/opal/datatype/opal_datatype_get_count.c b/opal/datatype/opal_datatype_get_count.c index a860d5fb41e..9f1b0ecf8e5 100644 --- a/opal/datatype/opal_datatype_get_count.c +++ b/opal/datatype/opal_datatype_get_count.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. @@ -53,8 +53,10 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t if( --(pStack->count) == 0 ) { /* end of loop */ stack_pos--; pStack--; if( stack_pos == -1 ) return nbElems; /* completed */ + pos_desc++; /* advance to the next element after the end loop */ + } else { + pos_desc = pStack->index + 1; /* go back to the begining of the loop */ } - pos_desc = pStack->index + 1; continue; } if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) { @@ -114,8 +116,10 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t if( --(pStack->count) == 0 ) { /* end of loop */ stack_pos--; pStack--; if( stack_pos == -1 ) return 0; + pos_desc++; /* advance to the next element after the end loop */ + } else { + pos_desc = pStack->index + 1; /* go back to the begining of the loop */ } - pos_desc = pStack->index + 1; continue; } if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) { @@ -171,8 +175,10 @@ int opal_datatype_compute_ptypes( opal_datatype_t* datatype ) if( --(pStack->count) == 0 ) { /* end of loop */ stack_pos--; pStack--; if( stack_pos == -1 ) return 0; /* completed */ + pos_desc++; /* advance to the next element after the end loop */ + } else { + pos_desc = pStack->index + 1; /* go back to the begining of the loop */ } - pos_desc = pStack->index + 1; continue; } if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) { diff --git a/opal/datatype/opal_datatype_internal.h b/opal/datatype/opal_datatype_internal.h index f749388b4e8..9ff34921495 100644 --- a/opal/datatype/opal_datatype_internal.h +++ b/opal/datatype/opal_datatype_internal.h @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2012 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, diff --git a/opal/datatype/opal_datatype_optimize.c b/opal/datatype/opal_datatype_optimize.c index 8c15777cf6a..882e3a8d979 100644 --- a/opal/datatype/opal_datatype_optimize.c +++ b/opal/datatype/opal_datatype_optimize.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, diff --git a/test/datatype/position_noncontig.c b/test/datatype/position_noncontig.c index 01779d186d5..0fb94c224ab 100644 --- a/test/datatype/position_noncontig.c +++ b/test/datatype/position_noncontig.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; -*- */ /* - * Copyright (c) 2004-2007 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved. From a5e08f55252f012a98676a3dfcd570ad1a3dbb14 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 4 May 2017 01:18:29 -0400 Subject: [PATCH 4/6] test/datatype: add test for short unpack on heteregeneous cluster Signed-off-by: Gilles Gouaillardet Signed-off-by: George Bosilca --- test/datatype/Makefile.am | 7 ++- test/datatype/unpack_hetero.c | 99 +++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 test/datatype/unpack_hetero.c diff --git a/test/datatype/Makefile.am b/test/datatype/Makefile.am index 9c9aaa4a1a0..cd867134a4f 100644 --- a/test/datatype/Makefile.am +++ b/test/datatype/Makefile.am @@ -18,7 +18,7 @@ if PROJECT_OMPI MPI_TESTS = checksum position position_noncontig ddt_test ddt_raw unpack_ooo ddt_pack external32 MPI_CHECKS = to_self endif -TESTS = opal_datatype_test $(MPI_TESTS) +TESTS = opal_datatype_test unpack_hetero $(MPI_TESTS) check_PROGRAMS = $(TESTS) $(MPI_CHECKS) @@ -79,5 +79,10 @@ external32_LDADD = \ $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \ $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la +unpack_hetero_SOURCES = unpack_hetero.c +unpack_hetero_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS) +unpack_hetero_LDADD = \ + $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la + distclean: rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile diff --git a/test/datatype/unpack_hetero.c b/test/datatype/unpack_hetero.c new file mode 100644 index 00000000000..48c9c1c2746 --- /dev/null +++ b/test/datatype/unpack_hetero.c @@ -0,0 +1,99 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2014-2016 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/runtime/opal.h" +#include "opal/datatype/opal_datatype.h" +#include "opal/datatype/opal_datatype_internal.h" +#include "opal/datatype/opal_convertor.h" +#include "opal/datatype/opal_datatype_prototypes.h" +#include "opal/util/arch.h" +#include +#include +#ifdef HAVE_SYS_TIME_H +#include +#endif +#include +#include + +/* Compile with: +gcc -DHAVE_CONFIG_H -I. -I../../include -I../.. -I../../include -I../../../ompi-trunk/opal -I../../../ompi-trunk/orte -g opal_datatype_test.c -o opal_datatype_test +*/ + +uint32_t remote_arch = 0xffffffff; + +/** + * Main function. Call several tests and print-out the results. It try to stress the convertor + * using difficult data-type constructions as well as strange segment sizes for the conversion. + * Usually, it is able to detect most of the data-type and convertor problems. Any modifications + * on the data-type engine should first pass all the tests from this file, before going into other + * tests. + */ +int main( int argc, char* argv[] ) +{ + opal_datatype_init(); + + /** + * By default simulate homogeneous architectures. + */ + remote_arch = opal_local_arch ^ OPAL_ARCH_ISBIGENDIAN; + + opal_convertor_t * pConv; + int sbuf[2], rbuf[2]; + size_t max_data; + struct iovec a; + uint32_t iov_count; + + sbuf[0] = 0x01000000; sbuf[1] = 0x02000000; + + printf( "\n\n#\n * TEST UNPACKING 1 int out of 1\n#\n\n" ); + + pConv = opal_convertor_create( remote_arch, 0 ); + rbuf[0] = -1; rbuf[1] = -1; + if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, &opal_datatype_int4, 1, rbuf ) ) { + printf( "Cannot attach the datatype to a convertor\n" ); + return OPAL_ERROR; + } + + a.iov_base = sbuf; + a.iov_len = 4; + iov_count = 1; + max_data = 4; + opal_unpack_general( pConv, &a, &iov_count, &max_data ); + + assert(1 == rbuf[0]); + assert(-1 == rbuf[1]); + OBJ_RELEASE(pConv); + + printf( "\n\n#\n * TEST UNPACKING 1 int out of 2\n#\n\n" ); + pConv = opal_convertor_create( remote_arch, 0 ); + rbuf[0] = -1; rbuf[1] = -1; + if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, &opal_datatype_int4, 2, rbuf ) ) { + printf( "Cannot attach the datatype to a convertor\n" ); + return OPAL_ERROR; + } + + + a.iov_base = sbuf; + a.iov_len = 4; + iov_count = 1; + max_data = 4; + opal_unpack_general( pConv, &a, &iov_count, &max_data ); + + assert(1 == rbuf[0]); + assert(-1 == rbuf[1]); + OBJ_RELEASE(pConv); + + /* clean-ups all data allocations */ + opal_datatype_finalize(); + opal_finalize(); + return OPAL_SUCCESS; +} From 10228175accdc623d54b8aa4adad0195d5f657a0 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 4 May 2017 23:11:35 -0400 Subject: [PATCH 5/6] Trying to reduce the cost of creating a convertor. Signed-off-by: George Bosilca --- opal/datatype/opal_convertor.c | 51 +++++++++++++--------------------- 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index 82d1794bfc0..3ceab70a8a4 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -223,7 +223,7 @@ int32_t opal_convertor_pack( opal_convertor_t* pConv, if( OPAL_LIKELY(pConv->flags & CONVERTOR_NO_OP) ) { /** * We are doing conversion on a contiguous datatype on a homogeneous - * environment. The convertor contain minimal informations, we only + * environment. The convertor contain minimal information, we only * use the bConverted to manage the conversion. */ uint32_t i; @@ -466,41 +466,29 @@ opal_datatype_compute_remote_size( const opal_datatype_t* pData, } /** - * Compute the remote size based on the datatype and count. Assume that the sizes - * are the sizes corresponding to the remote architecture. + * Compute the remote size. If necessary remove the homogeneous flag + * and redirect the convertor description toward the non-optimized + * datatype representation. */ size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor ) { - if( pConvertor->flags & CONVERTOR_HOMOGENEOUS ) { - pConvertor->remote_size = pConvertor->local_size; - } else { + opal_datatype_t* datatype = (opal_datatype_t*)pConvertor->pDesc; + + pConvertor->remote_size = pConvertor->local_size; + if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) { + pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS); + pConvertor->use_desc = &(datatype->desc); if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) { /* This is for a single datatype, we must update it with the count */ - pConvertor->remote_size = opal_datatype_compute_remote_size(pConvertor->pDesc, + pConvertor->remote_size = opal_datatype_compute_remote_size(datatype, pConvertor->master->remote_sizes); pConvertor->remote_size *= pConvertor->count; - pConvertor->flags |= CONVERTOR_HAS_REMOTE_SIZE; } } pConvertor->flags |= CONVERTOR_HAS_REMOTE_SIZE; return pConvertor->remote_size; } - -/** - * Compute the remote size. If necessary remove the homogeneous flag - * and redirect the convertor description toward the non-optimized - * datatype representation. - */ -#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype) \ - do { \ - if( datatype->bdt_used & convertor->master->hetero_mask ) { \ - convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \ - convertor->use_desc = &(datatype->desc); \ - } \ - opal_convertor_compute_remote_size( (convertor) ); \ - } while(0) - /** * This macro will initialize a convertor based on a previously created * convertor. The idea is the move outside these function the heavy @@ -510,27 +498,26 @@ size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor ) */ #define OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ) \ { \ + convertor->local_size = count * datatype->size; \ + convertor->pBaseBuf = (unsigned char*)pUserBuf; \ + convertor->count = count; \ + convertor->pDesc = (opal_datatype_t*)datatype; \ + convertor->bConverted = 0; \ + convertor->use_desc = &(datatype->opt_desc); \ /* If the data is empty we just mark the convertor as \ * completed. With this flag set the pack and unpack functions \ * will not do anything. \ */ \ if( OPAL_UNLIKELY((0 == count) || (0 == datatype->size)) ) { \ - convertor->flags |= OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED; \ + convertor->flags |= (OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED | CONVERTOR_HAS_REMOTE_SIZE); \ convertor->local_size = convertor->remote_size = 0; \ return OPAL_SUCCESS; \ } \ - /* Compute the local in advance */ \ - convertor->local_size = count * datatype->size; \ - convertor->pBaseBuf = (unsigned char*)pUserBuf; \ - convertor->count = count; \ \ /* Grab the datatype part of the flags */ \ convertor->flags &= CONVERTOR_TYPE_MASK; \ convertor->flags |= (CONVERTOR_DATATYPE_MASK & datatype->flags); \ convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \ - convertor->pDesc = (opal_datatype_t*)datatype; \ - convertor->bConverted = 0; \ - convertor->use_desc = &(datatype->opt_desc); \ \ convertor->remote_size = convertor->local_size; \ if( OPAL_LIKELY(convertor->remoteArch == opal_local_arch) ) { \ @@ -542,7 +529,7 @@ size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor ) } \ \ assert( (convertor)->pDesc == (datatype) ); \ - OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE( convertor, datatype ); \ + opal_convertor_compute_remote_size( convertor ); \ assert( NULL != convertor->use_desc->desc ); \ /* For predefined datatypes (contiguous) do nothing more */ \ /* if checksum is enabled then always continue */ \ From b3cc530852bafd3cc0eacef47938cc73b6ab1ed3 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 4 May 2017 23:24:16 -0400 Subject: [PATCH 6/6] Respect the unpack boundaries. As Gilles suggested on #2535 the opal_unpack_general_function was unpacking based on the requested count and not on the amount of packed data provided. Fixes #2535. Signed-off-by: George Bosilca --- opal/datatype/opal_datatype_unpack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index 093610b897a..ec046a99001 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -502,6 +502,7 @@ opal_unpack_general_function( opal_convertor_t* pConvertor, conv_ptr = pConvertor->pBaseBuf + pStack->disp; pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); + if( 0 == iov_len_local ) goto complete_loop; /* escape if we're done */ continue; } conv_ptr += rc * description[pos_desc].elem.extent;