Skip to content

Commit 407b51b

Browse files
committed
Replace loader handles with field at start of handle data
This replaces the handle logic in the loader from wrapped pointers to a ddi table at the start of the handle struct itself.
1 parent aa10700 commit 407b51b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+1320
-4434
lines changed

unified-runtime/scripts/generate_code.py

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -258,25 +258,6 @@ def _mako_loader_cpp(path, namespace, tags, version, specs, meta):
258258
"make_loader_cpp path %s namespace %s version %s\n" % (path, namespace, version)
259259
)
260260
loc = 0
261-
template = "ldrddi.hpp.mako"
262-
fin = os.path.join(templates_dir, template)
263-
264-
name = "%s_ldrddi" % (namespace)
265-
filename = "%s.hpp" % (name)
266-
fout = os.path.join(path, filename)
267-
268-
print("Generating %s..." % fout)
269-
loc += util.makoWrite(
270-
fin,
271-
fout,
272-
name=name,
273-
ver=version,
274-
namespace=namespace,
275-
tags=tags,
276-
specs=specs,
277-
meta=meta,
278-
)
279-
280261
template = "ldrddi.cpp.mako"
281262
fin = os.path.join(templates_dir, template)
282263

unified-runtime/scripts/templates/helper.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,20 +1510,6 @@ def get_initial_null_set(obj):
15101510
return ""
15111511

15121512

1513-
"""
1514-
Public:
1515-
returns true if the function always wraps output pointers in loader handles
1516-
"""
1517-
1518-
1519-
def always_wrap_outputs(obj):
1520-
cname = obj_traits.class_name(obj)
1521-
return (cname, obj["name"]) in [
1522-
("$xProgram", "Link"),
1523-
("$xProgram", "LinkExp"),
1524-
]
1525-
1526-
15271513
"""
15281514
Private:
15291515
returns the list of parameters, filtering based on desc tags

unified-runtime/scripts/templates/ldrddi.cpp.mako

Lines changed: 22 additions & 254 deletions
Large diffs are not rendered by default.

unified-runtime/scripts/templates/ldrddi.hpp.mako

Lines changed: 0 additions & 57 deletions
This file was deleted.

unified-runtime/scripts/templates/ur_interface_loader.cpp.mako

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ from templates import helper as th
2020
//===----------------------------------------------------------------------===//
2121
#include <${n}_api.h>
2222
#include <${n}_ddi.h>
23+
#include <mutex>
2324

2425
#include "ur_interface_loader.hpp"
2526

@@ -68,22 +69,45 @@ ${X}_APIEXPORT ${x}_result_t ${X}_APICALL ${tbl['export']['name']}(
6869
} // extern "C"
6970
#endif
7071

71-
#ifdef UR_STATIC_ADAPTER_${Adapter}
72-
namespace ur::${adapter} {
73-
ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) {
74-
if (ddi == nullptr) {
72+
namespace {
73+
ur_result_t populateDdiTable(ur_dditable_t *ddi) {
74+
if (ddi == nullptr) {
7575
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
7676
}
7777

7878
ur_result_t result;
7979

80+
#ifdef UR_STATIC_ADAPTER_${Adapter}
81+
#define NAMESPACE_ ::ur::${adapter}
82+
#else
83+
#define NAMESPACE_
84+
#endif
85+
8086
%for tbl in th.get_pfntables(specs, meta, n, tags):
81-
result = ${n}::${adapter}::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} );
87+
result = NAMESPACE_::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} );
8288
if (result != UR_RESULT_SUCCESS)
8389
return result;
8490
%endfor
8591

92+
#undef NAMESPACE_
93+
8694
return result;
8795
}
8896
}
97+
98+
99+
namespace ur::${adapter} {
100+
const ${x}_dditable_t *ddi_getter::value() {
101+
static std::once_flag flag;
102+
static ${x}_dditable_t table;
103+
104+
std::call_once(flag, []() { populateDdiTable(&table); });
105+
return &table;
106+
}
107+
108+
#ifdef UR_STATIC_ADAPTER_${Adapter}
109+
ur_result_t urAdapterGetDdiTables(${x}_dditable_t *ddi) {
110+
return populateDdiTable(ddi);
111+
}
89112
#endif
113+
}

unified-runtime/scripts/templates/ur_interface_loader.hpp.mako

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ from templates import helper as th
1818
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1919
//
2020
//===----------------------------------------------------------------------===//
21+
#pragma once
22+
2123
#include <${n}_api.h>
2224
#include <${n}_ddi.h>
2325

@@ -36,4 +38,8 @@ ${x}_result_t ${th.make_func_name(n, tags, obj)}(
3638
#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO
3739
ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi);
3840
#endif
41+
42+
struct ddi_getter {
43+
const static ${x}_dditable_t *value();
44+
};
3945
}

unified-runtime/source/adapters/cuda/adapter.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include "logger/ur_logger.hpp"
1515
#include "tracing.hpp"
1616

17-
struct ur_adapter_handle_t_ {
17+
struct ur_adapter_handle_t_ : ur_handle_t_ {
1818
std::atomic<uint32_t> RefCount = 0;
1919
std::mutex Mutex;
2020
struct cuda_tracing_context_t_ *TracingCtx = nullptr;
@@ -41,7 +41,8 @@ class ur_legacy_sink : public logger::Sink {
4141
// through UR entry points.
4242
// https://github.com/oneapi-src/unified-runtime/issues/1330
4343
ur_adapter_handle_t_::ur_adapter_handle_t_()
44-
: logger(logger::get_logger("cuda",
44+
: ur_handle_t_(),
45+
logger(logger::get_logger("cuda",
4546
/*default_log_level*/ logger::Level::ERR)) {
4647

4748
if (std::getenv("UR_LOG_CUDA") != nullptr)

unified-runtime/source/adapters/cuda/common.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,8 @@ inline umf_result_t setCUMemoryProviderParams(
9393
}
9494

9595
} // namespace umf
96+
97+
struct cuda_ddi_getter {
98+
const static ur_dditable_t *value();
99+
};
100+
using ur_handle_t_ = ur_handle_base_t_<cuda_ddi_getter>;

unified-runtime/source/adapters/cuda/context.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ CreateHostMemoryProviderPool(ur_device_handle_t_ *DeviceHandle,
108108
return UR_RESULT_SUCCESS;
109109
}
110110

111-
struct ur_context_handle_t_ {
111+
struct ur_context_handle_t_ : ur_handle_t_ {
112112

113113
struct deleter_data {
114114
ur_context_extended_deleter_t Function;
@@ -126,7 +126,7 @@ struct ur_context_handle_t_ {
126126
umf_memory_pool_handle_t MemoryPoolHost = nullptr;
127127

128128
ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices)
129-
: Devices{Devs, Devs + NumDevices}, RefCount{1} {
129+
: ur_handle_t_(), Devices{Devs, Devs + NumDevices}, RefCount{1} {
130130
for (auto &Dev : Devices) {
131131
urDeviceRetain(Dev);
132132
}

unified-runtime/source/adapters/cuda/device.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
#include "common.hpp"
1818

19-
struct ur_device_handle_t_ {
19+
struct ur_device_handle_t_ : ur_handle_t_ {
2020
private:
2121
using native_type = CUdevice;
2222

@@ -40,8 +40,8 @@ struct ur_device_handle_t_ {
4040
public:
4141
ur_device_handle_t_(native_type cuDevice, CUcontext cuContext, CUevent evBase,
4242
ur_platform_handle_t platform, uint32_t DevIndex)
43-
: CuDevice(cuDevice), CuContext(cuContext), EvBase(evBase), RefCount{1},
44-
Platform(platform), DeviceIndex{DevIndex} {
43+
: ur_handle_t_(), CuDevice(cuDevice), CuContext(cuContext),
44+
EvBase(evBase), RefCount{1}, Platform(platform), DeviceIndex{DevIndex} {
4545

4646
UR_CHECK_ERROR(cuDeviceGetAttribute(
4747
&MaxRegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,

unified-runtime/source/adapters/cuda/event.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type,
2424
native_type EvEnd, native_type EvQueued,
2525
native_type EvStart, CUstream Stream,
2626
uint32_t StreamToken)
27-
: CommandType{Type}, RefCount{1}, HasOwnership{true},
27+
: ur_handle_t_(), CommandType{Type}, RefCount{1}, HasOwnership{true},
2828
HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false},
2929
StreamToken{StreamToken}, EventID{0}, EvEnd{EvEnd}, EvStart{EvStart},
3030
EvQueued{EvQueued}, Queue{Queue}, Stream{Stream}, Context{Context} {
@@ -34,11 +34,12 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type,
3434

3535
ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context,
3636
CUevent EventNative)
37-
: CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false},
38-
HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false},
39-
IsInterop{true}, StreamToken{std::numeric_limits<uint32_t>::max()},
40-
EventID{0}, EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr},
41-
Queue{nullptr}, Stream{nullptr}, Context{Context} {
37+
: ur_handle_t_(), CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1},
38+
HasOwnership{false}, HasBeenWaitedOn{false}, IsRecorded{false},
39+
IsStarted{false}, IsInterop{true},
40+
StreamToken{std::numeric_limits<uint32_t>::max()}, EventID{0},
41+
EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr},
42+
Stream{nullptr}, Context{Context} {
4243
urContextRetain(Context);
4344
}
4445

unified-runtime/source/adapters/cuda/event.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
/// UR Event mapping to CUevent
1919
///
20-
struct ur_event_handle_t_ {
20+
struct ur_event_handle_t_ : ur_handle_t_ {
2121
public:
2222
using native_type = CUevent;
2323

unified-runtime/source/adapters/cuda/kernel.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
/// A compiler pass converts the UR API local memory model into the
3535
/// CUDA shared model. This object simply calculates the total of
3636
/// shared memory, and the initial offsets of each parameter.
37-
struct ur_kernel_handle_t_ {
37+
struct ur_kernel_handle_t_ : ur_handle_t_ {
3838
using native_type = CUfunction;
3939

4040
native_type Function;
@@ -250,8 +250,9 @@ struct ur_kernel_handle_t_ {
250250
ur_kernel_handle_t_(CUfunction Func, CUfunction FuncWithOffsetParam,
251251
const char *Name, ur_program_handle_t Program,
252252
ur_context_handle_t Context)
253-
: Function{Func}, FunctionWithOffsetParam{FuncWithOffsetParam},
254-
Name{Name}, Context{Context}, Program{Program}, RefCount{1} {
253+
: ur_handle_t_(), Function{Func},
254+
FunctionWithOffsetParam{FuncWithOffsetParam}, Name{Name},
255+
Context{Context}, Program{Program}, RefCount{1} {
255256
urProgramRetain(Program);
256257
urContextRetain(Context);
257258
/// Note: this code assumes that there is only one device per context

unified-runtime/source/adapters/cuda/memory.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ struct SurfaceMem {
310310
/// is on a different device, marked by
311311
/// LastQueueWritingToMemObj->getDevice()
312312
///
313-
struct ur_mem_handle_t_ {
313+
struct ur_mem_handle_t_ : ur_handle_t_ {
314314
// Context where the memory object is accessible
315315
ur_context_handle_t Context;
316316

@@ -345,17 +345,17 @@ struct ur_mem_handle_t_ {
345345
/// Constructs the UR mem handler for a non-typed allocation ("buffer")
346346
ur_mem_handle_t_(ur_context_handle_t Ctxt, ur_mem_flags_t MemFlags,
347347
BufferMem::AllocMode Mode, void *HostPtr, size_t Size)
348-
: Context{Ctxt}, RefCount{1}, MemFlags{MemFlags},
348+
: ur_handle_t_(), Context{Ctxt}, RefCount{1}, MemFlags{MemFlags},
349349
HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false),
350350
Mem{std::in_place_type<BufferMem>, Ctxt, this, Mode, HostPtr, Size} {
351351
urContextRetain(Context);
352352
};
353353

354354
// Subbuffer constructor
355355
ur_mem_handle_t_(ur_mem_handle_t Parent, size_t SubBufferOffset)
356-
: Context{Parent->Context}, RefCount{1}, MemFlags{Parent->MemFlags},
357-
HaveMigratedToDeviceSinceLastWrite(Parent->Context->Devices.size(),
358-
false),
356+
: ur_handle_t_(), Context{Parent->Context}, RefCount{1},
357+
MemFlags{Parent->MemFlags}, HaveMigratedToDeviceSinceLastWrite(
358+
Parent->Context->Devices.size(), false),
359359
Mem{BufferMem{std::get<BufferMem>(Parent->Mem)}} {
360360
auto &SubBuffer = std::get<BufferMem>(Mem);
361361
SubBuffer.Parent = Parent;
@@ -376,7 +376,7 @@ struct ur_mem_handle_t_ {
376376
ur_mem_handle_t_(ur_context_handle_t Ctxt, ur_mem_flags_t MemFlags,
377377
ur_image_format_t ImageFormat, ur_image_desc_t ImageDesc,
378378
void *HostPtr)
379-
: Context{Ctxt}, RefCount{1}, MemFlags{MemFlags},
379+
: ur_handle_t_(), Context{Ctxt}, RefCount{1}, MemFlags{MemFlags},
380380
HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false),
381381
Mem{std::in_place_type<SurfaceMem>,
382382
Ctxt,

unified-runtime/source/adapters/cuda/physical_mem.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
/// UR queue mapping on physical memory allocations used in virtual memory
2121
/// management.
2222
///
23-
struct ur_physical_mem_handle_t_ {
23+
struct ur_physical_mem_handle_t_ : ur_handle_t_ {
2424
using native_type = CUmemGenericAllocationHandle;
2525

2626
std::atomic_uint32_t RefCount;
@@ -33,8 +33,8 @@ struct ur_physical_mem_handle_t_ {
3333
ur_physical_mem_handle_t_(native_type PhysMem, ur_context_handle_t_ *Ctx,
3434
ur_device_handle_t Device, size_t Size,
3535
ur_physical_mem_properties_t Properties)
36-
: RefCount(1), PhysicalMem(PhysMem), Context(Ctx), Device(Device),
37-
Size(Size), Properties(Properties) {
36+
: ur_handle_t_(), RefCount(1), PhysicalMem(PhysMem), Context(Ctx),
37+
Device(Device), Size(Size), Properties(Properties) {
3838
urContextRetain(Context);
3939
urDeviceRetain(Device);
4040
}

unified-runtime/source/adapters/cuda/platform.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
//===----------------------------------------------------------------------===//
1010
#pragma once
1111

12+
#include "common.hpp"
1213
#include <ur/ur.hpp>
1314
#include <vector>
1415

15-
struct ur_platform_handle_t_ {
16+
struct ur_platform_handle_t_ : ur_handle_t_ {
1617
std::vector<std::unique_ptr<ur_device_handle_t_>> Devices;
1718
};

0 commit comments

Comments
 (0)