diff --git a/offload/include/OpenMP/Mapping.h b/offload/include/OpenMP/Mapping.h index b9f5c16582931..77fc90711abce 100644 --- a/offload/include/OpenMP/Mapping.h +++ b/offload/include/OpenMP/Mapping.h @@ -386,6 +386,12 @@ struct LookupResult { LookupResult() : Flags({0, 0, 0}), TPR() {} TargetPointerResultTy TPR; + + bool isEmpty() const { + bool IsEmpty = Flags.IsContained == 0 && Flags.ExtendsBefore == 0 && + Flags.ExtendsAfter == 0; + return IsEmpty; + } }; // This structure stores information of a mapped memory region. diff --git a/offload/include/Shared/Debug.h b/offload/include/Shared/Debug.h index 7c3db8dbf119f..ec8f4bbedaaeb 100644 --- a/offload/include/Shared/Debug.h +++ b/offload/include/Shared/Debug.h @@ -58,6 +58,8 @@ enum OpenMPInfoType : uint32_t { OMP_INFOTYPE_DATA_TRANSFER = 0x0020, // Print whenever data does not have a viable device counterpart. OMP_INFOTYPE_EMPTY_MAPPING = 0x0040, + // Print whenever data does not need to be transferred + OMP_INFOTYPE_REDUNDANT_TRANSFER = 0x0080, // Enable every flag. OMP_INFOTYPE_ALL = 0xffffffff, }; diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp index 5b25d955dd320..4a09d1b09ef27 100644 --- a/offload/libomptarget/omptarget.cpp +++ b/offload/libomptarget/omptarget.cpp @@ -1197,6 +1197,97 @@ class PrivateArgumentManagerTy { } }; +/// Try to determine if kernel argument is unused. This method +/// takes a conservative approach, i.e. it may return false +/// negatives but it should never return a false positive. +static bool isArgUnused(tgt_map_type ArgType) { + bool IsArgUnused = ArgType == OMP_TGT_MAPTYPE_NONE || + ArgType == OMP_TGT_MAPTYPE_FROM || + ArgType == OMP_TGT_MAPTYPE_TO || + ArgType == (OMP_TGT_MAPTYPE_FROM | OMP_TGT_MAPTYPE_TO); + return IsArgUnused; +} + +/// Try to find redundant mappings associated with a kernel launch, +/// and provide a masked version of the kernel argument types that +/// avoid redundant data transfers between the host and the device. +static std::unique_ptr maskRedundantTransfers(DeviceTy &Device, int32_t ArgNum, + int64_t *ArgTypes, int64_t *ArgSizes, + map_var_info_t *ArgNames, void **ArgPtrs, + void **ArgMappers) { + std::unique_ptr ArgTypesOverride = std::make_unique(ArgNum); + + bool AllArgsUnused = true; + + for (int32_t I = 0; I < ArgNum; ++I) { + bool IsCustomMapped = ArgMappers && ArgMappers[I]; + + if (IsCustomMapped) { + ArgTypesOverride[I] = ArgTypes[I]; + AllArgsUnused = false; + continue; + } + + tgt_map_type ArgType = (tgt_map_type) ArgTypes[I]; + + bool IsArgUnused = false; + + // Check for unused `map(buf[0:size])` mappings + IsArgUnused |= isArgUnused(ArgType); + + bool IsArgMemberPtr = ArgType & OMP_TGT_MAPTYPE_MEMBER_OF && + ArgType & OMP_TGT_MAPTYPE_PTR_AND_OBJ; + + tgt_map_type ArgTypeMemberPtrMasked = + (tgt_map_type)(ArgType & ~(OMP_TGT_MAPTYPE_MEMBER_OF | + OMP_TGT_MAPTYPE_PTR_AND_OBJ)); + + // Check for unused `map(wrapper.buf[0:size])` mappings + IsArgUnused |= AllArgsUnused && IsArgMemberPtr && + isArgUnused(ArgTypeMemberPtrMasked); + + if (!IsArgUnused) { + ArgTypesOverride[I] = ArgTypes[I]; + AllArgsUnused = false; + continue; + } + + MappingInfoTy &MappingInfo = Device.getMappingInfo(); + MappingInfoTy::HDTTMapAccessorTy HDTTMap = + MappingInfo.HostDataToTargetMap.getExclusiveAccessor(); + + bool IsExistingMapping = + !MappingInfo.lookupMapping(HDTTMap, ArgPtrs[I], ArgSizes[I]) + .isEmpty(); + + if (IsExistingMapping) { + ArgTypesOverride[I] = ArgTypes[I]; + AllArgsUnused = false; + continue; + } + + [[maybe_unused]] const std::string Name = + ArgNames && ArgNames[I] ? getNameFromMapping(ArgNames[I]) + : std::string("unknown"); + + bool IsArgFrom = ArgType & OMP_TGT_MAPTYPE_FROM; + bool IsArgTo = ArgType & OMP_TGT_MAPTYPE_TO; + + [[maybe_unused]] const char *Type = IsArgFrom && IsArgTo ? "tofrom" + : IsArgFrom ? "from" + : IsArgTo ? "to" + : "unknown"; + + INFO(OMP_INFOTYPE_REDUNDANT_TRANSFER, Device.DeviceID, "%s(%s)[%" PRId64 "] %s\n", Type, + Name.c_str(), ArgSizes[I], "is not used and will not be copied"); + + ArgTypesOverride[I] = + ArgType & ~(OMP_TGT_MAPTYPE_TO | OMP_TGT_MAPTYPE_FROM); + } + + return ArgTypesOverride; +} + /// Process data before launching the kernel, including calling targetDataBegin /// to map and transfer data to target device, transferring (first-)private /// variables. @@ -1417,11 +1508,16 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int NumClangLaunchArgs = KernelArgs.NumArgs; int Ret = OFFLOAD_SUCCESS; + + std::unique_ptr ArgTypesOverride = + maskRedundantTransfers(Device, NumClangLaunchArgs, KernelArgs.ArgTypes, + KernelArgs.ArgSizes, KernelArgs.ArgNames, KernelArgs.ArgPtrs, KernelArgs.ArgMappers); + if (NumClangLaunchArgs) { // Process data, such as data mapping, before launching the kernel Ret = processDataBefore(Loc, DeviceId, HostPtr, NumClangLaunchArgs, KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs, - KernelArgs.ArgSizes, KernelArgs.ArgTypes, + KernelArgs.ArgSizes, ArgTypesOverride.get(), KernelArgs.ArgNames, KernelArgs.ArgMappers, TgtArgs, TgtOffsets, PrivateArgumentManager, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { @@ -1473,7 +1569,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, // variables Ret = processDataAfter(Loc, DeviceId, HostPtr, NumClangLaunchArgs, KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs, - KernelArgs.ArgSizes, KernelArgs.ArgTypes, + KernelArgs.ArgSizes, ArgTypesOverride.get(), KernelArgs.ArgNames, KernelArgs.ArgMappers, PrivateArgumentManager, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { diff --git a/offload/test/mapping/skip_transfers.cpp b/offload/test/mapping/skip_transfers.cpp new file mode 100644 index 0000000000000..6e5bc605613bc --- /dev/null +++ b/offload/test/mapping/skip_transfers.cpp @@ -0,0 +1,88 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic +// RUN: env LIBOMPTARGET_INFO=160 %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic + +// REQUIRES: gpu +// clang-format on + +int main() { + float DataStack = 0; + +// CHECK: omptarget device 0 info: Copying data from device to host, +// TgtPtr=0x{{.*}}, HstPtr=0x{{.*}}, Size=4, Name=unknown +#pragma omp target map(from : DataStack) + { + DataStack = 1; + } + +// CHECK: omptarget device 0 info: Copying data from host to device, +// HstPtr=0x{{.*}}, TgtPtr=0x{{.*}}, Size=4, Name=unknown +#pragma omp target map(always to : DataStack) + ; + +// CHECK: omptarget device 0 info: tofrom(unknown)[4] is not used and will not +// be copied +#pragma omp target map(tofrom : DataStack) + ; + + int Size = 16; + double *Data = new double[Size]; + +// CHECK: omptarget device 0 info: Copying data from host to device, +// HstPtr=0x{{.*}}, TgtPtr=0x{{.*}}, Size=8, Name=unknown CHECK: omptarget +// device 0 info: Copying data from device to host, TgtPtr=0x{{.*}}, +// HstPtr=0x{{.*}}, Size=8, Name=unknown +#pragma omp target map(tofrom : Data[0 : 1]) + { + Data[0] = 1; + } + +// CHECK: omptarget device 0 info: Copying data from host to device, +// HstPtr=0x{{.*}}, TgtPtr=0x{{.*}}, Size=16, Name=unknown CHECK: omptarget +// device 0 info: Copying data from device to host, TgtPtr=0x{{.*}}, +// HstPtr=0x{{.*}}, Size=16, Name=unknown +#pragma omp target map(always tofrom : Data[0 : 2]) + ; + +// CHECK: omptarget device 0 info: from(unknown)[24] is not used and will not be +// copied +#pragma omp target map(from : Data[0 : 3]) + ; + +// CHECK: omptarget device 0 info: to(unknown)[24] is not used and will not be +// copied +#pragma omp target map(to : Data[0 : 3]) + ; + +// CHECK: omptarget device 0 info: tofrom(unknown)[32] is not used and will not +// be copied +#pragma omp target map(tofrom : Data[0 : 4]) + ; + +// CHECK: omptarget device 0 info: Copying data from host to device, +// HstPtr=0x{{.*}}, TgtPtr=0x{{.*}}, Size=40, Name=unknown +#pragma omp target map(to : Data[0 : 5]) + { +#pragma omp teams + Data[0] = 1; + } + + struct { + double *Data; + } Wrapper{.Data = Data}; + +// CHECK: omptarget device 0 info: Copying data from host to device, +// HstPtr=0x{{.*}}, TgtPtr=0x{{.*}}, Size=48, Name=unknown CHECK: omptarget +// device 0 info: Copying data from device to host, TgtPtr=0x{{.*}}, +// HstPtr=0x{{.*}}, Size=48, Name=unknown +#pragma omp target map(tofrom : Wrapper.Data[0 : 6]) + { + Wrapper.Data[0] = 1; + } + + // CHECK: omptarget device 0 info: unknown(unknown)[8] is not used and will not be copied + // CHECK: omptarget device 0 info: tofrom(unknown)[56] is not used and will not be copied + #pragma omp target map(tofrom: Wrapper.Data[0:7]) + ; +}