-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathxpu_mock.cpp
258 lines (201 loc) · 7.3 KB
/
xpu_mock.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#include "xpu_mock.h"
#include <Python.h>
#include <dlfcn.h> // dladdr
#include <execinfo.h>
#include <frameobject.h>
#include <cstdint>
#include <cstring>
#include <functional>
#include <map>
#include <mutex>
#include <vector>
#include "backtrace.h"
#include "elf_parser.h"
#include "hook.h"
#include "hooks/print_hook.h"
#include "logger/StringRef.h"
#include "logger/logger.h"
#include "statistic.h"
#include "support.h"
// NB: don't use same name with original function, this will result in the
// replacement not taking effect
#define DEF_FUNCTION_IMPL(name, RetT, ...) \
typedef RetT (*name##_t)(__VA_ARGS__); \
name##_t origin_##name = nullptr; \
RetT name(__VA_ARGS__)
#define DEF_FUNCTION_INT(name, ...) DEF_FUNCTION_IMPL(name, int, __VA_ARGS__)
#define DEF_FUNCTION_VOID(name, ...) DEF_FUNCTION_IMPL(name, void, __VA_ARGS__)
namespace {
//-------------------------- xpu api --------------------------//
DEF_FUNCTION_INT(xpu_current_device, int* devid) {
return origin_xpu_current_device(devid);
}
DEF_FUNCTION_INT(xpu_malloc, void** pdevptr, uint64_t size, int kind) {
int r = 0;
int devId = 0;
CHECK(origin_xpu_current_device, "xpu_current_device not binded");
r = origin_xpu_current_device(&devId);
if (r != 0) {
return r;
}
r = origin_xpu_malloc(pdevptr, size, kind);
if (r != 0) {
LOG(WARN) << "xpu malloc device memory failed!\n"
<< hook::MemoryStatisticCollection::instance();
return r;
}
hook::MemoryStatisticCollection::instance().record_alloc(
hook::HookRuntimeContext::instance().curLibName(), devId, *pdevptr,
size, kind);
return r;
}
DEF_FUNCTION_INT(xpu_free, void* devptr) {
int r = 0;
int devId = 0;
CHECK(origin_xpu_current_device, "xpu_current_device not binded");
r = origin_xpu_current_device(&devId);
if (r != 0) {
return r;
}
r = origin_xpu_free(devptr);
hook::MemoryStatisticCollection::instance().record_free(
hook::HookRuntimeContext::instance().curLibName(), devId, devptr);
return r;
}
DEF_FUNCTION_INT(xpu_wait, void* stream) { return origin_xpu_wait(stream); }
DEF_FUNCTION_INT(xpu_memcpy, void* dst, const void* src, uint64_t size,
int kind) {
return origin_xpu_memcpy(dst, src, size, kind);
}
DEF_FUNCTION_INT(xpu_set_device, int devid) {
return origin_xpu_set_device(devid);
}
DEF_FUNCTION_INT(xpu_launch_async, void* func) {
return origin_xpu_launch_async(func);
}
std::string launch_args_parser(void* func) {
auto libName = hook::HookRuntimeContext::instance().curLibName();
return hook::getSymbolTable(libName)->lookUpSymbol(func);
}
DEF_FUNCTION_INT(xpu_stream_create, void** pstream) {
return origin_xpu_stream_create(pstream);
}
DEF_FUNCTION_INT(xpu_stream_destroy, void* stream) {
return origin_xpu_stream_destroy(stream);
}
//-------------------------- cuda api --------------------------//
DEF_FUNCTION_INT(cudaSetDevice, int device) {
return origin_cudaSetDevice(device);
}
DEF_FUNCTION_INT(cudaGetDevice, int* device) {
return origin_cudaGetDevice(device);
}
DEF_FUNCTION_INT(cudaMalloc, void** devPtr, size_t size) {
int r = 0;
int devId = 0;
CHECK(origin_cudaGetDevice, "cudaGetDevice not binded");
r = origin_cudaGetDevice(&devId);
if (r != 0) {
return r;
}
r = origin_cudaMalloc(devPtr, size);
if (r != 0) {
LOG(WARN) << "xpu cudaMalloc device memory failed!\n"
<< hook::MemoryStatisticCollection::instance();
return r;
}
hook::MemoryStatisticCollection::instance().record_alloc(
hook::HookRuntimeContext::instance().curLibName(), devId, *devPtr, size,
/*kind=GLOBAL_MEM*/ 0);
return r;
}
DEF_FUNCTION_INT(cudaFree, void* devPtr) {
int r = 0;
int devId = 0;
CHECK(origin_cudaGetDevice, "cudaGetDevice not binded");
r = origin_cudaGetDevice(&devId);
if (r != 0) {
return r;
}
r = origin_cudaFree(devPtr);
hook::MemoryStatisticCollection::instance().record_free(
hook::HookRuntimeContext::instance().curLibName(), devId, devPtr);
return r;
}
DEF_FUNCTION_INT(cudaMemcpy, void* dst, const void* src, size_t count,
int kind) {
return origin_cudaMemcpy(dst, src, count, kind);
}
/// need gcc version > 9.0
/// #define BUILD_FEATURE(name) hook::FHookFeature(STR_TO_TYPE(#name), &name,
/// &origin_##name)
#define BUILD_FEATURE(name) hook::HookFeature(#name, &name, &origin_##name)
class XpuRuntimeApiHook : public hook::HookInstallerWrap<XpuRuntimeApiHook> {
public:
bool targetLib(const char* name) {
return !adt::StringRef(name).contain("libxpurt.so") &&
!adt::StringRef(name).contain("libcudart.so");
}
// need gcc version > 9.0
// hook::FHookFeature symbols[14] = {
hook::HookFeature symbols[14] = {
BUILD_FEATURE(xpu_malloc),
BUILD_FEATURE(xpu_free),
BUILD_FEATURE(xpu_current_device),
BUILD_FEATURE(xpu_set_device),
BUILD_FEATURE(xpu_wait),
BUILD_FEATURE(xpu_memcpy),
BUILD_FEATURE(xpu_launch_async)
.setGetNewCallback([](const hook::OriginalInfo& info) {
hook::createSymbolTable(info.libName, info.baseHeadPtr);
})
.setArgsParser(&launch_args_parser),
BUILD_FEATURE(xpu_stream_create),
BUILD_FEATURE(xpu_stream_destroy),
BUILD_FEATURE(cudaMalloc),
BUILD_FEATURE(cudaFree),
BUILD_FEATURE(cudaMemcpy),
BUILD_FEATURE(cudaSetDevice),
BUILD_FEATURE(cudaGetDevice),
};
void onSuccess() { LOG(INFO) << "install " << curSymName() << " success"; }
};
struct PatchRuntimeHook : public hook::HookInstallerWrap<PatchRuntimeHook> {
static int unifySetDevice(int devId) {
LOG(INFO) << "devId:" << devId;
auto ret = PatchRuntimeHook::instance()->xpu_set_device_(devId);
CHECK_EQ(ret, 0, "xpu_set_device fail with result:{}", ret);
return PatchRuntimeHook::instance()->cuda_set_device_(devId);
}
using SetDevFuncType_t = decltype(&unifySetDevice);
bool targetLib(const char* name) {
return !adt::StringRef(name).contain("libcudart.so") &&
!adt::StringRef(name).contain("libxpurt.so");
}
bool targetSym(const char* name) {
return adt::StringRef(name) == "cudaSetDevice" ||
adt::StringRef(name) == "xpu_set_device";
}
void* newFuncPtr(const hook::OriginalInfo& info) {
if (adt::StringRef(curSymName()) == "xpu_set_device") {
xpu_set_device_ =
reinterpret_cast<SetDevFuncType_t>(info.oldFuncPtr);
return reinterpret_cast<void*>(&unifySetDevice);
}
cuda_set_device_ = reinterpret_cast<SetDevFuncType_t>(info.oldFuncPtr);
return reinterpret_cast<void*>(&unifySetDevice);
}
void onSuccess() {}
static PatchRuntimeHook* instance() {
static auto install_wrap = std::make_shared<PatchRuntimeHook>();
return install_wrap.get();
}
SetDevFuncType_t cuda_set_device_{nullptr};
SetDevFuncType_t xpu_set_device_{nullptr};
};
} // namespace
void __runtimeapi_hook_initialize() {
static auto install_wrap = std::make_shared<XpuRuntimeApiHook>();
install_wrap->install();
}
void dh_patch_runtime() { PatchRuntimeHook::instance()->install(); }