diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index 72677bff67..a7707237a6 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -721,7 +721,10 @@ ur_result_t AsanInterceptor::prepareLaunch( ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) { ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr, ValidateResult, PtrPair.second); - exitWithErrors(); + if (ValidateResult.Type != + ValidateUSMResult::MAYBE_HOST_POINTER) { + exitWithErrors(); + } } } } @@ -864,13 +867,15 @@ AsanInterceptor::findAllocInfoByAddress(uptr Address) { std::shared_lock Guard(m_AllocationMapMutex); auto It = m_AllocationMap.upper_bound(Address); if (It == m_AllocationMap.begin()) { - return std::optional{}; + return std::nullopt; } --It; - // Make sure we got the right AllocInfo - assert(Address >= It->second->AllocBegin && - Address < It->second->AllocBegin + It->second->AllocSize && - "Wrong AllocInfo for the address"); + + // Maybe it's a host pointer + if (Address < It->second->AllocBegin || + Address >= It->second->AllocBegin + It->second->AllocSize) { + return std::nullopt; + } return It; } diff --git a/source/loader/layers/sanitizer/msan/msan_buffer.cpp b/source/loader/layers/sanitizer/msan/msan_buffer.cpp index 66ebb10326..8c2080b3ac 100644 --- a/source/loader/layers/sanitizer/msan/msan_buffer.cpp +++ b/source/loader/layers/sanitizer/msan/msan_buffer.cpp @@ -48,22 +48,67 @@ ur_result_t EnqueueMemCopyRectHelper( char *DstOrigin = pDst + DstOffset.x + DstRowPitch * DstOffset.y + DstSlicePitch * DstOffset.z; + const bool IsDstDeviceUSM = getMsanInterceptor() + ->findAllocInfoByAddress((uptr)DstOrigin) + .has_value(); + const bool IsSrcDeviceUSM = getMsanInterceptor() + ->findAllocInfoByAddress((uptr)SrcOrigin) + .has_value(); + + ur_device_handle_t Device = GetDevice(Queue); + std::shared_ptr DeviceInfo = + getMsanInterceptor()->getDeviceInfo(Device); std::vector Events; - Events.reserve(Region.depth); + // For now, USM doesn't support 3D memory copy operation, so we can only // loop call 2D memory copy function to implement it. for (size_t i = 0; i < Region.depth; i++) { ur_event_handle_t NewEvent{}; UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy2D( - Queue, Blocking, DstOrigin + (i * DstSlicePitch), DstRowPitch, + Queue, false, DstOrigin + (i * DstSlicePitch), DstRowPitch, SrcOrigin + (i * SrcSlicePitch), SrcRowPitch, Region.width, Region.height, NumEventsInWaitList, EventWaitList, &NewEvent)); - Events.push_back(NewEvent); + + // Update shadow memory + if (IsDstDeviceUSM && IsSrcDeviceUSM) { + NewEvent = nullptr; + uptr DstShadowAddr = DeviceInfo->Shadow->MemToShadow( + (uptr)DstOrigin + (i * DstSlicePitch)); + uptr SrcShadowAddr = DeviceInfo->Shadow->MemToShadow( + (uptr)SrcOrigin + (i * SrcSlicePitch)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy2D( + Queue, false, (void *)DstShadowAddr, DstRowPitch, + (void *)SrcShadowAddr, SrcRowPitch, Region.width, Region.height, + NumEventsInWaitList, EventWaitList, &NewEvent)); + Events.push_back(NewEvent); + } else if (IsDstDeviceUSM && !IsSrcDeviceUSM) { + uptr DstShadowAddr = DeviceInfo->Shadow->MemToShadow( + (uptr)DstOrigin + (i * DstSlicePitch)); + const char Val = 0; + // opencl & l0 adapter doesn't implement urEnqueueUSMFill2D, so + // emulate the operation with urEnqueueUSMFill. + for (size_t HeightIndex = 0; HeightIndex < Region.height; + HeightIndex++) { + NewEvent = nullptr; + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill( + Queue, (void *)(DstShadowAddr + HeightIndex * DstRowPitch), + 1, &Val, Region.width, NumEventsInWaitList, EventWaitList, + &NewEvent)); + Events.push_back(NewEvent); + } + } } - UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - Queue, Events.size(), Events.data(), Event)); + if (Blocking) { + UR_CALL( + getContext()->urDdiTable.Event.pfnWait(Events.size(), &Events[0])); + } + + if (Event) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + Queue, Events.size(), &Events[0], Event)); + } return UR_RESULT_SUCCESS; } @@ -93,7 +138,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; URes = getMsanInterceptor()->allocateMemory( - Context, Device, &USMDesc, Pool, Size, + Context, Device, &USMDesc, Pool, Size, AllocType::DEVICE_USM, ur_cast(&Allocation)); if (URes != UR_RESULT_SUCCESS) { getContext()->logger.error( @@ -112,6 +157,12 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { Size, HostPtr, this); return URes; } + + // Update shadow memory + std::shared_ptr DeviceInfo = + getMsanInterceptor()->getDeviceInfo(Device); + UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow( + Queue, (uptr)Allocation, Size, 0)); } } @@ -130,8 +181,8 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { ur_usm_desc_t USMDesc{}; USMDesc.align = getAlignment(); ur_usm_pool_handle_t Pool{}; - URes = getMsanInterceptor()->allocateMemory( - Context, nullptr, &USMDesc, Pool, Size, + URes = getContext()->urDdiTable.USM.pfnHostAlloc( + Context, &USMDesc, Pool, Size, ur_cast(&HostAllocation)); if (URes != UR_RESULT_SUCCESS) { getContext()->logger.error("Failed to allocate {} bytes host " diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 2dfeadc358..a0fac2da75 100644 --- a/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -99,13 +99,50 @@ ur_result_t urUSMDeviceAlloc( ) { getContext()->logger.debug("==== urUSMDeviceAlloc"); - return getMsanInterceptor()->allocateMemory(hContext, hDevice, pUSMDesc, - pool, size, ppMem); + return getMsanInterceptor()->allocateMemory( + hContext, hDevice, pUSMDesc, pool, size, AllocType::DEVICE_USM, ppMem); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMHostAlloc +ur_result_t UR_APICALL urUSMHostAlloc( + ur_context_handle_t hContext, ///< [in] handle of the context object + const ur_usm_desc_t + *pUSMDesc, ///< [in][optional] USM memory allocation descriptor + ur_usm_pool_handle_t + pool, ///< [in][optional] Pointer to a pool created using urUSMPoolCreate + size_t + size, ///< [in] size in bytes of the USM memory object to be allocated + void **ppMem ///< [out] pointer to USM host memory object +) { + getContext()->logger.debug("==== urUSMHostAlloc"); + + return getMsanInterceptor()->allocateMemory( + hContext, nullptr, pUSMDesc, pool, size, AllocType::HOST_USM, ppMem); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMSharedAlloc +ur_result_t UR_APICALL urUSMSharedAlloc( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + const ur_usm_desc_t * + pUSMDesc, ///< [in][optional] Pointer to USM memory allocation descriptor. + ur_usm_pool_handle_t + pool, ///< [in][optional] Pointer to a pool created using urUSMPoolCreate + size_t + size, ///< [in] size in bytes of the USM memory object to be allocated + void **ppMem ///< [out] pointer to USM shared memory object +) { + getContext()->logger.debug("==== urUSMSharedAlloc"); + + return getMsanInterceptor()->allocateMemory( + hContext, hDevice, pUSMDesc, pool, size, AllocType::SHARED_USM, ppMem); } /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMFree -__urdlllocal ur_result_t UR_APICALL urUSMFree( +ur_result_t UR_APICALL urUSMFree( ur_context_handle_t hContext, ///< [in] handle of the context object void *pMem ///< [in] pointer to USM memory object ) { @@ -515,6 +552,12 @@ ur_result_t urMemBufferCreate( UR_CALL(pMemBuffer->getHandle(hDevice, Handle)); UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( InternalQueue, true, Handle, Host, size, 0, nullptr, nullptr)); + + // Update shadow memory + std::shared_ptr DeviceInfo = + getMsanInterceptor()->getDeviceInfo(hDevice); + UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow( + InternalQueue, (uptr)Handle, size, 0)); } } @@ -730,10 +773,29 @@ ur_result_t urEnqueueMemBufferWrite( if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hBuffer)) { ur_device_handle_t Device = GetDevice(hQueue); char *pDst = nullptr; + std::vector Events; + ur_event_handle_t Event{}; UR_CALL(MemBuffer->getHandle(Device, pDst)); UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( hQueue, blockingWrite, pDst + offset, pSrc, size, - numEventsInWaitList, phEventWaitList, phEvent)); + numEventsInWaitList, phEventWaitList, &Event)); + Events.push_back(Event); + + // Update shadow memory + std::shared_ptr DeviceInfo = + getMsanInterceptor()->getDeviceInfo(Device); + const char Val = 0; + uptr ShadowAddr = DeviceInfo->Shadow->MemToShadow((uptr)pDst + offset); + Event = nullptr; + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill( + hQueue, (void *)ShadowAddr, 1, &Val, size, numEventsInWaitList, + phEventWaitList, &Event)); + Events.push_back(Event); + + if (phEvent) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, Events.size(), Events.data(), phEvent)); + } } else { UR_CALL(pfnMemBufferWrite(hQueue, hBuffer, blockingWrite, offset, size, pSrc, numEventsInWaitList, phEventWaitList, @@ -893,15 +955,36 @@ ur_result_t urEnqueueMemBufferCopy( if (SrcBuffer && DstBuffer) { ur_device_handle_t Device = GetDevice(hQueue); + std::shared_ptr DeviceInfo = + getMsanInterceptor()->getDeviceInfo(Device); char *SrcHandle = nullptr; UR_CALL(SrcBuffer->getHandle(Device, SrcHandle)); char *DstHandle = nullptr; UR_CALL(DstBuffer->getHandle(Device, DstHandle)); + std::vector Events; + ur_event_handle_t Event{}; UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( hQueue, false, DstHandle + dstOffset, SrcHandle + srcOffset, size, - numEventsInWaitList, phEventWaitList, phEvent)); + numEventsInWaitList, phEventWaitList, &Event)); + Events.push_back(Event); + + // Update shadow memory + uptr DstShadowAddr = + DeviceInfo->Shadow->MemToShadow((uptr)DstHandle + dstOffset); + uptr SrcShadowAddr = + DeviceInfo->Shadow->MemToShadow((uptr)SrcHandle + srcOffset); + Event = nullptr; + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + hQueue, false, (void *)DstShadowAddr, (void *)SrcShadowAddr, size, + numEventsInWaitList, phEventWaitList, &Event)); + Events.push_back(Event); + + if (phEvent) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, Events.size(), Events.data(), phEvent)); + } } else { UR_CALL(pfnMemBufferCopy(hQueue, hBufferSrc, hBufferDst, srcOffset, dstOffset, size, numEventsInWaitList, @@ -1000,11 +1083,31 @@ ur_result_t urEnqueueMemBufferFill( if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hBuffer)) { char *Handle = nullptr; + std::vector Events; + ur_event_handle_t Event{}; ur_device_handle_t Device = GetDevice(hQueue); UR_CALL(MemBuffer->getHandle(Device, Handle)); UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill( hQueue, Handle + offset, patternSize, pPattern, size, - numEventsInWaitList, phEventWaitList, phEvent)); + numEventsInWaitList, phEventWaitList, &Event)); + Events.push_back(Event); + + // Update shadow memory + std::shared_ptr DeviceInfo = + getMsanInterceptor()->getDeviceInfo(Device); + const char Val = 0; + uptr ShadowAddr = + DeviceInfo->Shadow->MemToShadow((uptr)Handle + offset); + Event = nullptr; + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill( + hQueue, (void *)ShadowAddr, 1, &Val, size, numEventsInWaitList, + phEventWaitList, &Event)); + Events.push_back(Event); + + if (phEvent) { + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + hQueue, Events.size(), Events.data(), phEvent)); + } } else { UR_CALL(pfnMemBufferFill(hQueue, hBuffer, pPattern, patternSize, offset, size, numEventsInWaitList, phEventWaitList, @@ -1270,9 +1373,11 @@ ur_result_t UR_APICALL urEnqueueUSMFill( auto pfnUSMFill = getContext()->urDdiTable.Enqueue.pfnUSMFill; getContext()->logger.debug("==== urEnqueueUSMFill"); - ur_event_handle_t hEvents[2] = {}; + std::vector Events; + ur_event_handle_t Event{}; UR_CALL(pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, - numEventsInWaitList, phEventWaitList, &hEvents[0])); + numEventsInWaitList, phEventWaitList, &Event)); + Events.push_back(Event); const auto Mem = (uptr)pMem; auto MemInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Mem); @@ -1283,13 +1388,15 @@ ur_result_t UR_APICALL urEnqueueUSMFill( getMsanInterceptor()->getDeviceInfo(MemInfo->Device); const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); + Event = nullptr; UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)MemShadow, 0, size, 0, - nullptr, &hEvents[1])); + nullptr, &Event)); + Events.push_back(Event); } if (phEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - hQueue, 2, hEvents, phEvent)); + hQueue, Events.size(), Events.data(), phEvent)); } return UR_RESULT_SUCCESS; @@ -1319,9 +1426,11 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( auto pfnUSMMemcpy = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy; getContext()->logger.debug("==== pfnUSMMemcpy"); - ur_event_handle_t hEvents[2] = {}; + std::vector Events; + ur_event_handle_t Event{}; UR_CALL(pfnUSMMemcpy(hQueue, blocking, pDst, pSrc, size, - numEventsInWaitList, phEventWaitList, &hEvents[0])); + numEventsInWaitList, phEventWaitList, &Event)); + Events.push_back(Event); const auto Src = (uptr)pSrc, Dst = (uptr)pDst; auto SrcInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Src); @@ -1336,8 +1445,10 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( const auto SrcShadow = DeviceInfo->Shadow->MemToShadow(Src); const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + Event = nullptr; UR_CALL(pfnUSMMemcpy(hQueue, blocking, (void *)DstShadow, - (void *)SrcShadow, size, 0, nullptr, &hEvents[1])); + (void *)SrcShadow, size, 0, nullptr, &Event)); + Events.push_back(Event); } else if (DstInfoItOp) { auto DstInfo = (*DstInfoItOp)->second; @@ -1345,13 +1456,15 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( getMsanInterceptor()->getDeviceInfo(DstInfo->Device); auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + Event = nullptr; UR_CALL(EnqueueUSMBlockingSet(hQueue, (void *)DstShadow, 0, size, 0, - nullptr, &hEvents[1])); + nullptr, &Event)); + Events.push_back(Event); } if (phEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - hQueue, 2, hEvents, phEvent)); + hQueue, Events.size(), Events.data(), phEvent)); } return UR_RESULT_SUCCESS; @@ -1387,10 +1500,11 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( auto pfnUSMFill2D = getContext()->urDdiTable.Enqueue.pfnUSMFill2D; getContext()->logger.debug("==== urEnqueueUSMFill2D"); - ur_event_handle_t hEvents[2] = {}; + std::vector Events; + ur_event_handle_t Event{}; UR_CALL(pfnUSMFill2D(hQueue, pMem, pitch, patternSize, pPattern, width, - height, numEventsInWaitList, phEventWaitList, - &hEvents[0])); + height, numEventsInWaitList, phEventWaitList, &Event)); + Events.push_back(Event); const auto Mem = (uptr)pMem; auto MemInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Mem); @@ -1402,13 +1516,15 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( const auto MemShadow = DeviceInfo->Shadow->MemToShadow(Mem); const char Pattern = 0; + Event = nullptr; UR_CALL(pfnUSMFill2D(hQueue, (void *)MemShadow, pitch, 1, &Pattern, - width, height, 0, nullptr, &hEvents[1])); + width, height, 0, nullptr, &Event)); + Events.push_back(Event); } if (phEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - hQueue, 2, hEvents, phEvent)); + hQueue, Events.size(), Events.data(), phEvent)); } return UR_RESULT_SUCCESS; @@ -1443,10 +1559,12 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( auto pfnUSMMemcpy2D = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy2D; getContext()->logger.debug("==== pfnUSMMemcpy2D"); - ur_event_handle_t hEvents[2] = {}; + std::vector Events; + ur_event_handle_t Event{}; UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, pDst, dstPitch, pSrc, srcPitch, width, height, numEventsInWaitList, phEventWaitList, - &hEvents[0])); + &Event)); + Events.push_back(Event); const auto Src = (uptr)pSrc, Dst = (uptr)pDst; auto SrcInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Src); @@ -1461,9 +1579,11 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const auto SrcShadow = DeviceInfo->Shadow->MemToShadow(Src); const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); + Event = nullptr; UR_CALL(pfnUSMMemcpy2D(hQueue, blocking, (void *)DstShadow, dstPitch, (void *)SrcShadow, srcPitch, width, height, 0, - nullptr, &hEvents[1])); + nullptr, &Event)); + Events.push_back(Event); } else if (DstInfoItOp) { auto DstInfo = (*DstInfoItOp)->second; @@ -1472,14 +1592,16 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( const auto DstShadow = DeviceInfo->Shadow->MemToShadow(Dst); const char Pattern = 0; + Event = nullptr; UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill2D( hQueue, (void *)DstShadow, dstPitch, 1, &Pattern, width, height, 0, - nullptr, &hEvents[1])); + nullptr, &Event)); + Events.push_back(Event); } if (phEvent) { UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( - hQueue, 2, hEvents, phEvent)); + hQueue, Events.size(), Events.data(), phEvent)); } return UR_RESULT_SUCCESS; @@ -1663,6 +1785,8 @@ ur_result_t urGetUSMProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; pDdiTable->pfnDeviceAlloc = ur_sanitizer_layer::msan::urUSMDeviceAlloc; + pDdiTable->pfnHostAlloc = ur_sanitizer_layer::msan::urUSMHostAlloc; + pDdiTable->pfnSharedAlloc = ur_sanitizer_layer::msan::urUSMSharedAlloc; pDdiTable->pfnFree = ur_sanitizer_layer::msan::urUSMFree; return result; diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index cdaa088297..21a19e11f3 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -46,18 +46,36 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, ur_device_handle_t Device, const ur_usm_desc_t *Properties, ur_usm_pool_handle_t Pool, - size_t Size, void **ResultPtr) { + size_t Size, AllocType Type, + void **ResultPtr) { auto ContextInfo = getContextInfo(Context); - std::shared_ptr DeviceInfo = getDeviceInfo(Device); + std::shared_ptr DeviceInfo = + Device ? getDeviceInfo(Device) : nullptr; void *Allocated = nullptr; - UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( - Context, Device, Properties, Pool, Size, &Allocated)); + if (Type == AllocType::DEVICE_USM) { + UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( + Context, Device, Properties, Pool, Size, &Allocated)); + } else if (Type == AllocType::HOST_USM) { + UR_CALL(getContext()->urDdiTable.USM.pfnHostAlloc( + Context, Properties, Pool, Size, &Allocated)); + } else if (Type == AllocType::SHARED_USM) { + UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc( + Context, Device, Properties, Pool, Size, &Allocated)); + } *ResultPtr = Allocated; + ContextInfo->MaxAllocatedSize = + std::max(ContextInfo->MaxAllocatedSize, Size); + + // For host/shared usm, we only record the alloc size. + if (Type != AllocType::DEVICE_USM) { + return UR_RESULT_SUCCESS; + } + auto AI = std::make_shared(MsanAllocInfo{(uptr)Allocated, Size, @@ -75,6 +93,7 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, m_AllocationMap.emplace(AI->AllocBegin, AI); } + // Update shadow memory ManagedQueue Queue(Context, Device); DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, AI->AllocBegin, AI->AllocSize, 0xff); @@ -144,6 +163,12 @@ ur_result_t MsanInterceptor::registerProgram(ur_program_handle_t Program) { return Result; } + getContext()->logger.info("registerDeviceGlobals"); + Result = registerDeviceGlobals(Program); + if (Result != UR_RESULT_SUCCESS) { + return Result; + } + return Result; } @@ -212,6 +237,56 @@ ur_result_t MsanInterceptor::registerSpirKernels(ur_program_handle_t Program) { return UR_RESULT_SUCCESS; } +ur_result_t +MsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) { + std::vector Devices = GetDevices(Program); + assert(Devices.size() != 0 && "No devices in registerDeviceGlobals"); + auto Context = GetContext(Program); + auto ContextInfo = getContextInfo(Context); + auto ProgramInfo = getProgramInfo(Program); + assert(ProgramInfo != nullptr && "unregistered program!"); + + for (auto Device : Devices) { + ManagedQueue Queue(Context, Device); + + size_t MetadataSize; + void *MetadataPtr; + auto Result = + getContext()->urDdiTable.Program.pfnGetGlobalVariablePointer( + Device, Program, kSPIR_MsanDeviceGlobalMetadata, &MetadataSize, + &MetadataPtr); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.info("No device globals"); + continue; + } + + const uint64_t NumOfDeviceGlobal = + MetadataSize / sizeof(DeviceGlobalInfo); + assert((MetadataSize % sizeof(DeviceGlobalInfo) == 0) && + "DeviceGlobal metadata size is not correct"); + std::vector GVInfos(NumOfDeviceGlobal); + Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, &GVInfos[0], MetadataPtr, + sizeof(DeviceGlobalInfo) * NumOfDeviceGlobal, 0, nullptr, nullptr); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.error("Device Global[{}] Read Failed: {}", + kSPIR_MsanDeviceGlobalMetadata, Result); + return Result; + } + + auto DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device); + for (size_t i = 0; i < NumOfDeviceGlobal; i++) { + const auto &GVInfo = GVInfos[i]; + UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, GVInfo.Addr, + GVInfo.Size, 0)); + ContextInfo->MaxAllocatedSize = + std::max(ContextInfo->MaxAllocatedSize, GVInfo.Size); + } + } + + return UR_RESULT_SUCCESS; +} + ur_result_t MsanInterceptor::insertContext(ur_context_handle_t Context, std::shared_ptr &CI) { std::scoped_lock Guard(m_ContextMapMutex); @@ -379,10 +454,14 @@ ur_result_t MsanInterceptor::prepareLaunch( } // Set LaunchInfo + auto ContextInfo = getContextInfo(LaunchInfo.Context); LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd; LaunchInfo.Data->DeviceTy = DeviceInfo->Type; LaunchInfo.Data->Debug = getOptions().Debug ? 1 : 0; + UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( + ContextInfo->Handle, DeviceInfo->Handle, nullptr, nullptr, + ContextInfo->MaxAllocatedSize, &LaunchInfo.Data->CleanShadow)); getContext()->logger.info( "launch_info {} (GlobalShadow={}, Device={}, Debug={})", @@ -465,6 +544,11 @@ ur_result_t USMLaunchInfo::initialize() { USMLaunchInfo::~USMLaunchInfo() { [[maybe_unused]] ur_result_t Result; if (Data) { + if (Data->CleanShadow) { + Result = getContext()->urDdiTable.USM.pfnFree(Context, + Data->CleanShadow); + assert(Result == UR_RESULT_SUCCESS); + } Result = getContext()->urDdiTable.USM.pfnFree(Context, (void *)Data); assert(Result == UR_RESULT_SUCCESS); } diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.hpp b/source/loader/layers/sanitizer/msan/msan_interceptor.hpp index 81c237380f..fea52741f3 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.hpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.hpp @@ -121,6 +121,7 @@ struct ProgramInfo { struct ContextInfo { ur_context_handle_t Handle; + size_t MaxAllocatedSize = 1024; std::atomic RefCount = 1; std::vector DeviceList; @@ -159,6 +160,11 @@ struct USMLaunchInfo { ur_result_t initialize(); }; +struct DeviceGlobalInfo { + uptr Size; + uptr Addr; +}; + struct SpirKernelInfo { uptr KernelName; uptr Size; @@ -174,7 +180,7 @@ class MsanInterceptor { ur_device_handle_t Device, const ur_usm_desc_t *Properties, ur_usm_pool_handle_t Pool, size_t Size, - void **ResultPtr); + AllocType Type, void **ResultPtr); ur_result_t releaseMemory(ur_context_handle_t Context, void *Ptr); ur_result_t registerProgram(ur_program_handle_t Program); @@ -261,6 +267,7 @@ class MsanInterceptor { std::shared_ptr &DeviceInfo); ur_result_t registerSpirKernels(ur_program_handle_t Program); + ur_result_t registerDeviceGlobals(ur_program_handle_t Program); private: std::unordered_map> diff --git a/source/loader/layers/sanitizer/msan/msan_libdevice.hpp b/source/loader/layers/sanitizer/msan/msan_libdevice.hpp index 32e8f36552..0888c9dc75 100644 --- a/source/loader/layers/sanitizer/msan/msan_libdevice.hpp +++ b/source/loader/layers/sanitizer/msan/msan_libdevice.hpp @@ -53,7 +53,7 @@ struct MsanLaunchInfo { MsanErrorReport Report; - uint8_t CleanShadow[128] = {}; + void *CleanShadow = nullptr; }; // Based on the observation, only the last 24 bits of the address of the private diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 2cdf8600d2..2573b4caa5 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -227,15 +227,10 @@ ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow( VirtualMemMaps[MappedPtr].first = PhysicalMem; } - // We don't need to record virtual memory map for null pointer, - // since it doesn't have an alloc info. - if (Ptr == 0) { - continue; + auto AllocInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Ptr); + if (AllocInfoItOp) { + VirtualMemMaps[MappedPtr].second.insert((*AllocInfoItOp)->second); } - - auto AllocInfoIt = getMsanInterceptor()->findAllocInfoByAddress(Ptr); - assert(AllocInfoIt); - VirtualMemMaps[MappedPtr].second.insert((*AllocInfoIt)->second); } return UR_RESULT_SUCCESS;