Skip to content

Revert "[UR][CUDA] Add tensor map APIs" #2610

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
299 changes: 0 additions & 299 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,6 @@ typedef enum ur_function_t {
UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245,
/// Enumerator for ::urEnqueueEventsWaitWithBarrierExt
UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246,
/// Enumerator for ::urTensorMapEncodeIm2ColExp
UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP = 247,
/// Enumerator for ::urTensorMapEncodeTiledExp
UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP = 248,
/// Enumerator for ::urPhysicalMemGetInfo
UR_FUNCTION_PHYSICAL_MEM_GET_INFO = 249,
/// @cond
Expand Down Expand Up @@ -12131,258 +12127,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp(
/// array.
ur_event_handle_t *phEvent);

#if !defined(__GNUC__)
#pragma endregion
#endif
// Intel 'oneAPI' Unified Runtime Experimental API for mapping tensor objects
#if !defined(__GNUC__)
#pragma region tensor_map_(experimental)
#endif
///////////////////////////////////////////////////////////////////////////////
/// @brief Handle of tensor map object
typedef struct ur_exp_tensor_map_handle_t_ *ur_exp_tensor_map_handle_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map data type
typedef uint32_t ur_exp_tensor_map_data_type_flags_t;
typedef enum ur_exp_tensor_map_data_type_flag_t {
/// 1 byte
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8 = UR_BIT(0),
/// 2 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16 = UR_BIT(1),
/// 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32 = UR_BIT(2),
/// 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32 = UR_BIT(3),
/// 8 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64 = UR_BIT(4),
/// 8 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64 = UR_BIT(5),
/// 2 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16 = UR_BIT(6),
/// 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32 = UR_BIT(7),
/// 8 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64 = UR_BIT(8),
/// 2 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16 = UR_BIT(9),
/// 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ = UR_BIT(10),
/// 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32 = UR_BIT(11),
/// 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ = UR_BIT(12),
/// @cond
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_data_type_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_data_type_flags_t
#define UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK 0xffffe000

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map interleave
typedef uint32_t ur_exp_tensor_map_interleave_flags_t;
typedef enum ur_exp_tensor_map_interleave_flag_t {
/// No interleave
UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE = UR_BIT(0),
/// 16B interleave
UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B = UR_BIT(1),
/// 32B interleave
UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B = UR_BIT(2),
/// @cond
UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_interleave_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_interleave_flags_t
#define UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK 0xfffffff8

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map l2 promotion
typedef uint32_t ur_exp_tensor_map_l2_promotion_flags_t;
typedef enum ur_exp_tensor_map_l2_promotion_flag_t {
/// No promotion type
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE = UR_BIT(0),
/// 64B promotion type
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B = UR_BIT(1),
/// 128B promotion type
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B = UR_BIT(2),
/// 256B promotion type
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B = UR_BIT(3),
/// @cond
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_l2_promotion_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_l2_promotion_flags_t
#define UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK 0xfffffff0

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map swizzle
typedef uint32_t ur_exp_tensor_map_swizzle_flags_t;
typedef enum ur_exp_tensor_map_swizzle_flag_t {
/// No swizzle
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE = UR_BIT(0),
/// 32B swizzle
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B = UR_BIT(1),
/// 64B swizzle
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B = UR_BIT(2),
/// 128B swizzle
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B = UR_BIT(3),
/// @cond
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_swizzle_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_swizzle_flags_t
#define UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK 0xfffffff0

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map OOB fill
typedef uint32_t ur_exp_tensor_map_oob_fill_flags_t;
typedef enum ur_exp_tensor_map_oob_fill_flag_t {
/// No OOB fill
UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE = UR_BIT(0),
/// Refer to NVIDIA docs
UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA = UR_BIT(1),
/// @cond
UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_oob_fill_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_oob_fill_flags_t
#define UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK 0xfffffffc

///////////////////////////////////////////////////////////////////////////////
/// @brief Encode tensor map with image data
///
/// @details
/// - Map encode using im2col.
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_DEVICE_LOST
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType`
/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave`
/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle`
/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion`
/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == GlobalAddress`
/// + `NULL == GlobalDim`
/// + `NULL == GlobalStrides`
/// + `NULL == PixelBoxLowerCorner`
/// + `NULL == PixelBoxUpperCorner`
/// + `NULL == ElementStrides`
/// + `NULL == hTensorMap`
/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT
/// + `TensorRank < 3`
UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp(
/// [in] Handle of the device object.
ur_device_handle_t hDevice,
/// [in] Data type of the tensor object.
ur_exp_tensor_map_data_type_flags_t TensorMapType,
/// [in] Dimensionality of tensor; must be at least 3.
uint32_t TensorRank,
/// [in] Starting address of memory region described by tensor.
void *GlobalAddress,
/// [in] Array containing tensor size (number of elements) along each of
/// the TensorRank dimensions.
const uint64_t *GlobalDim,
/// [in] Array containing stride size (in bytes) along each of the
/// TensorRank - 1 dimensions.
const uint64_t *GlobalStrides,
/// [in] Array containing DHW dimensions of lower box corner.
const int *PixelBoxLowerCorner,
/// [in] Array containing DHW dimensions of upper box corner.
const int *PixelBoxUpperCorner,
/// [in] Number of channels per pixel.
uint32_t ChannelsPerPixel,
/// [in] Number of pixels per column.
uint32_t PixelsPerColumn,
/// [in] Array containing traversal stride in each of the TensorRank
/// dimensions.
const uint32_t *ElementStrides,
/// [in] Type of interleaved layout the tensor addresses
ur_exp_tensor_map_interleave_flags_t Interleave,
/// [in] Bank swizzling pattern inside shared memory
ur_exp_tensor_map_swizzle_flags_t Swizzle,
/// [in] L2 promotion size.
ur_exp_tensor_map_l2_promotion_flags_t L2Promotion,
/// [in] Indicates whether zero or special NaN constant will be used to
/// fill out-of-bounds elements.
ur_exp_tensor_map_oob_fill_flags_t OobFill,
/// [out] Handle of the tensor map object.
ur_exp_tensor_map_handle_t *hTensorMap);

///////////////////////////////////////////////////////////////////////////////
/// @brief Encode tensor map with tiled data
///
/// @details
/// - Tiled map encode.
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_DEVICE_LOST
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType`
/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave`
/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle`
/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion`
/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == GlobalAddress`
/// + `NULL == GlobalDim`
/// + `NULL == GlobalStrides`
/// + `NULL == BoxDim`
/// + `NULL == ElementStrides`
/// + `NULL == hTensorMap`
/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT
/// + `TensorRank < 3`
UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp(
/// [in] Handle of the device object.
ur_device_handle_t hDevice,
/// [in] Data type of the tensor object.
ur_exp_tensor_map_data_type_flags_t TensorMapType,
/// [in] Dimensionality of tensor; must be at least 3.
uint32_t TensorRank,
/// [in] Starting address of memory region described by tensor.
void *GlobalAddress,
/// [in] Array containing tensor size (number of elements) along each of
/// the TensorRank dimensions.
const uint64_t *GlobalDim,
/// [in] Array containing stride size (in bytes) along each of the
/// TensorRank - 1 dimensions.
const uint64_t *GlobalStrides,
/// [in] Array containing traversal box size (number of elments) along
/// each of the TensorRank dimensions. Specifies how many elements to be
/// traversed along each tensor dimension.
const uint32_t *BoxDim,
/// [in] Array containing traversal stride in each of the TensorRank
/// dimensions.
const uint32_t *ElementStrides,
/// [in] Type of interleaved layout the tensor addresses
ur_exp_tensor_map_interleave_flags_t Interleave,
/// [in] Bank swizzling pattern inside shared memory
ur_exp_tensor_map_swizzle_flags_t Swizzle,
/// [in] L2 promotion size.
ur_exp_tensor_map_l2_promotion_flags_t L2Promotion,
/// [in] Indicates whether zero or special NaN constant will be used to
/// fill out-of-bounds elements.
ur_exp_tensor_map_oob_fill_flags_t OobFill,
/// [out] Handle of the tensor map object.
ur_exp_tensor_map_handle_t *hTensorMap);

#if !defined(__GNUC__)
#pragma endregion
#endif
Expand Down Expand Up @@ -14572,49 +14316,6 @@ typedef struct ur_command_buffer_command_get_info_exp_params_t {
size_t **ppPropSizeRet;
} ur_command_buffer_command_get_info_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urTensorMapEncodeIm2ColExp
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_tensor_map_encode_im_2_col_exp_params_t {
ur_device_handle_t *phDevice;
ur_exp_tensor_map_data_type_flags_t *pTensorMapType;
uint32_t *pTensorRank;
void **pGlobalAddress;
const uint64_t **pGlobalDim;
const uint64_t **pGlobalStrides;
const int **pPixelBoxLowerCorner;
const int **pPixelBoxUpperCorner;
uint32_t *pChannelsPerPixel;
uint32_t *pPixelsPerColumn;
const uint32_t **pElementStrides;
ur_exp_tensor_map_interleave_flags_t *pInterleave;
ur_exp_tensor_map_swizzle_flags_t *pSwizzle;
ur_exp_tensor_map_l2_promotion_flags_t *pL2Promotion;
ur_exp_tensor_map_oob_fill_flags_t *pOobFill;
ur_exp_tensor_map_handle_t **phTensorMap;
} ur_tensor_map_encode_im_2_col_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urTensorMapEncodeTiledExp
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_tensor_map_encode_tiled_exp_params_t {
ur_device_handle_t *phDevice;
ur_exp_tensor_map_data_type_flags_t *pTensorMapType;
uint32_t *pTensorRank;
void **pGlobalAddress;
const uint64_t **pGlobalDim;
const uint64_t **pGlobalStrides;
const uint32_t **pBoxDim;
const uint32_t **pElementStrides;
ur_exp_tensor_map_interleave_flags_t *pInterleave;
ur_exp_tensor_map_swizzle_flags_t *pSwizzle;
ur_exp_tensor_map_l2_promotion_flags_t *pL2Promotion;
ur_exp_tensor_map_oob_fill_flags_t *pOobFill;
ur_exp_tensor_map_handle_t **phTensorMap;
} ur_tensor_map_encode_tiled_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urUsmP2PEnablePeerAccessExp
/// @details Each entry is a pointer to the parameter passed to the function;
Expand Down
2 changes: 0 additions & 2 deletions include/ur_api_funcs.def
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,6 @@ _UR_API(urCommandBufferUpdateSignalEventExp)
_UR_API(urCommandBufferUpdateWaitEventsExp)
_UR_API(urCommandBufferGetInfoExp)
_UR_API(urCommandBufferCommandGetInfoExp)
_UR_API(urTensorMapEncodeIm2ColExp)
_UR_API(urTensorMapEncodeTiledExp)
_UR_API(urUsmP2PEnablePeerAccessExp)
_UR_API(urUsmP2PDisablePeerAccessExp)
_UR_API(urUsmP2PPeerAccessGetInfoExp)
Expand Down
46 changes: 0 additions & 46 deletions include/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -1685,51 +1685,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable(
typedef ur_result_t(UR_APICALL *ur_pfnGetCommandBufferExpProcAddrTable_t)(
ur_api_version_t, ur_command_buffer_exp_dditable_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urTensorMapEncodeIm2ColExp
typedef ur_result_t(UR_APICALL *ur_pfnTensorMapEncodeIm2ColExp_t)(
ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *,
const uint64_t *, const uint64_t *, const int *, const int *, uint32_t,
uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t,
ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t,
ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urTensorMapEncodeTiledExp
typedef ur_result_t(UR_APICALL *ur_pfnTensorMapEncodeTiledExp_t)(
ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *,
const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *,
ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t,
ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t,
ur_exp_tensor_map_handle_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Table of TensorMapExp functions pointers
typedef struct ur_tensor_map_exp_dditable_t {
ur_pfnTensorMapEncodeIm2ColExp_t pfnEncodeIm2ColExp;
ur_pfnTensorMapEncodeTiledExp_t pfnEncodeTiledExp;
} ur_tensor_map_exp_dditable_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Exported function for filling application's TensorMapExp table
/// with current process' addresses
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION
UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable(
/// [in] API version requested
ur_api_version_t version,
/// [in,out] pointer to table of DDI function pointers
ur_tensor_map_exp_dditable_t *pDdiTable);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urGetTensorMapExpProcAddrTable
typedef ur_result_t(UR_APICALL *ur_pfnGetTensorMapExpProcAddrTable_t)(
ur_api_version_t, ur_tensor_map_exp_dditable_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urUsmP2PEnablePeerAccessExp
typedef ur_result_t(UR_APICALL *ur_pfnUsmP2PEnablePeerAccessExp_t)(
Expand Down Expand Up @@ -1948,7 +1903,6 @@ typedef struct ur_dditable_t {
ur_usm_dditable_t USM;
ur_usm_exp_dditable_t USMExp;
ur_command_buffer_exp_dditable_t CommandBufferExp;
ur_tensor_map_exp_dditable_t TensorMapExp;
ur_usm_p2p_exp_dditable_t UsmP2PExp;
ur_virtual_mem_dditable_t VirtualMem;
ur_device_dditable_t Device;
Expand Down
Loading
Loading