-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathZenTimer.hh
439 lines (378 loc) · 14 KB
/
ZenTimer.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
#ifndef TIMER_H
#define TIMER_H
// zentimer.h
//
// Written by: Mark Johnstone ([email protected])
// Modified by: Scott F. Kaplan ([email protected])
//
// Last Updated: July 21, 1997
//
// Inspired by: "Zen of Code Optimization" by Michael Abrash
//
// For use on: Intel (tm) Pentium(tm) and P6(tm) processor machines ONLY
//
// Purpose: Alows timing of the number of cycles that a particular
// piece of code took to execute on a pentium machine. Usable
// in user mode with no special privileges
//
// Copyright (c) 1995 Mark Johnstone
//
// Restrictions: Please feel free to use this timer in any way you choose,
// and to make any changes that you find usefull. All I ask
// is that you keep the references to my name and that of
// Michael Abrash (who inspired this work) in the resulting
// code.
//
// Warranty: NONE AT ALL!!!
// This code implements a simple class library that makes use of an
// undocumented feature of the Intel Pentium (and P6) processors.
// This feature is a 64 bit register that increments once for every
// clock cycle. On a 100 MHZ machine this register can count for
// about 5,800 years.
// To use this file, you need a modified version of gas (the GNU assembler)
// The patch file has been successfully used with:
// GNU assembler version cygnus/linux-2.5.2l.15 (i586-unknown-linux),
// using BFD version cygnus/linux-2.5.2l.11
// HOW TO USE THE ZEN TIMER:
//
// The zen timer can be used by simply creating an instance of the class
// zen_timer and then calling the methods "timer_on", "timer_off", and
// "timer_print".
//
// To create a timer, you simply create an instance of the class zen_timer.
// If an optional file name is passed to the constructor of the zen_timer,
// then a record will be written to that file for every interval timed by
// that instance of the timer. There are a couple of utilities that are
// provided with this timer to make use of this the file (time2ascii and
// time2histo).
//
// timer_on:
//
// The method "void timer_on(void)" turns on the timer for this particular
// instance of the zen timer (note: you *can* have several timers running
// at the same or different times.)
//
// timer_off:
//
// The method "void timer_off(void)" turns off the timer for this particular
// instance of the zen timer. The timer must have been turned on prior to
// calling this method. The number of clock cycles between the call to this
// method and the last call to timer_on for this instance of the timer are
// computed, and if a time file is being created then this interval is written
// out to the time file. In either case, this interval is processed to provide
// the average interval across all calls to this timer instance.
//
// timer_pause:
// timer_resume:
//
// The method "void timer_pause(void)" pauses the recording of time for a
// particular timer. The parallel routine "void timer_resume(void) turns this
// timer back on. This is usefull if you want to time a particular routine,
// but not any of the routines that this routine calls. You do this by
// calling timer_pause just before any calls that you don't want to time,
// and calling timer_resume just after the call.
//
//
// timer_print:
//
// the method "void timer_print(void)" prints out some statistical information
// about this instance of the zen timer. Currently this information goes to
// standard error. This method prints out the number of intervals timed by
// this instance of the timer, as well as the minimum interval, maximum
// interval, and average interval length.
//
// timer_reset:
//
// the method "void timer_reset(void)" resets the timer to the initial state.
// Only use this method if for some reason you want to throw away the
// information that you have previously recorded.
// NOTE: it is not necessary to call this method when you first instantiate
// an instance of the object.
//
// getTime:
//
// the method "unsigned long long getTime()" returns the timing result from
// a single timing instance. The method ensures that this timer has
// only performed one measurement, and then returns the number of
// cycles for that measurement.
// The zen timer requires a patched version of the gnu assembler to work.
// Here is the patch file. Simply cut it out of this file, remove the
// comment characters in the left two columns, and run it through patch.
//
// Note that while three undocumented opcodes are defined in this patch file
// "wrmsr", "rdmsr", and "rdtsc", only rdtsc is used in this file. The two
// other opcodes are usable in kernel mode only.
//
// RCS file: /r/cvs/tools/binutils2/include/opcode/i386.h,v
// retrieving revision 1.1.1.1
// diff -c -r1.1.1.1 i386.h
// *** /tmp/T0a28802 Thu Sep 7 22:44:33 1995
// --- i386.h Thu Sep 7 22:14:39 1995
// ***************
// *** 429,435 ****
// {"int3", 0, 0xcc, _, NoModrm, { 0, 0, 0} },
// {"into", 0, 0xce, _, NoModrm, { 0, 0, 0} },
// {"iret", 0, 0xcf, _, NoModrm, { 0, 0, 0} },
// ! /* i386sl (and i486sl?) only */
// {"rsm", 0, 0x0faa, _, NoModrm,{ 0, 0, 0} },
//
// {"boundl", 2, 0x62, _, Modrm, { Reg32, Mem, 0} },
// --- 429,435 ----
// {"int3", 0, 0xcc, _, NoModrm, { 0, 0, 0} },
// {"into", 0, 0xce, _, NoModrm, { 0, 0, 0} },
// {"iret", 0, 0xcf, _, NoModrm, { 0, 0, 0} },
// ! /* i386sl, i486sl(?) and later 486's, and Pentium only */
// {"rsm", 0, 0x0faa, _, NoModrm,{ 0, 0, 0} },
//
// {"boundl", 2, 0x62, _, Modrm, { Reg32, Mem, 0} },
// ***************
// *** 729,734 ****
// --- 729,746 ----
// {"wbinvd", 0, 0x0f09, _, NoModrm, { 0, 0, 0} },
// {"invlpg", 1, 0x0f01, 7, Modrm, { Mem, 0, 0} },
//
// + /* late 486 and Pentium extension */
// +
// + {"cpuid", 0, 0x0fa2, _, NoModrm, { 0, 0, 0 } },
// +
// + /* Pentium extensions (no CMPXCHG8B, I haven't yet figured out whether
// + there's some arcane way to specify its operand or whether the
// + rest of the assembler needs to be modified to support it) */
// +
// + {"wrmsr", 0, 0x0f30, _, NoModrm, { 0, 0, 0 } },
// + {"rdmsr", 0, 0x0f31, _, NoModrm, { 0, 0, 0 } },
// + {"rdtsc", 0, 0x0f32, _, NoModrm, { 0, 0, 0 } },
// +
// {"", 0, 0, 0, 0, { 0, 0, 0} } /* sentinel */
// };
// #undef _
// ***************
// *** 754,759 ****
// --- 766,772 ----
// {"ds", SReg2, 3}, {"fs", SReg3, 4}, {"gs", SReg3, 5},
// /* control registers */
// {"cr0", Control, 0}, {"cr2", Control, 2}, {"cr3", Control, 3},
// + {"cr4", Control, 4},
// /* debug registers */
// {"db0", Debug, 0}, {"db1", Debug, 1}, {"db2", Debug, 2},
// {"db3", Debug, 3}, {"db6", Debug, 6}, {"db7", Debug, 7},
//
// For more information about undocumented features of Intel processors,
// see the WEB page: http://x86.metronet.com/
// ---------- Start of Zen Timer Code -------------
#include <assert.h>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
class zen_timer {
int output_file;
bool timer_in_use;
unsigned long long int min_time;
unsigned long long int max_time;
unsigned long long int total_time;
int number_of_calls;
// Here I use a kind of tacky hack to get at the 64-bit time register
// in the Pentium. I declare a unsigned long long int (64-bit) as part
// of a union and then declare two unsigned long ints (32-bit) to
// overlay this memory. I then have the two halves of the register
// pair stored into the memory for the entire 64-bit int. It would
// be better to move the entire 64 bits in one operation, but I don't
// know how to do that (anyone care to teach me?)
union {
volatile unsigned long long int last_timer_value;
struct {
volatile unsigned long int last_timer_value_low;
volatile unsigned long int last_timer_value_high;
} on;
};
union {
volatile unsigned long long int current_timer_value;
struct {
volatile unsigned long int current_timer_value_low;
volatile unsigned long int current_timer_value_high;
} off;
};
unsigned long long int total_pause_interval;
unsigned int number_of_pauses;
union {
volatile unsigned long long int start_pause_interval;
struct {
volatile unsigned long int start_pause_interval_low;
volatile unsigned long int start_pause_interval_high;
} start_pause;
};
union {
volatile unsigned long long int end_pause_interval;
struct {
volatile unsigned long int end_pause_interval_low;
volatile unsigned long int end_pause_interval_high;
} end_pause;
};
public:
inline zen_timer(char *file_name = NULL); // writes each time sample to
// file_name
inline ~zen_timer(); // closes the output file.
inline void timer_on(void); // turn on the timer
inline void timer_off(void); // turn off the timer
inline void timer_pause(void); // pause the recording of the time
// interval
inline void timer_resume(void); // resume the recording of the time
// interval
inline void timer_reset(void); // reset the timer to time=0, no calls
inline void timer_print(void); // print the avg number of
// cycles/call
inline unsigned long long getTime (); // Return the result of a
// single timing
};
#define OVERHEAD (13) // 14 cycles of overhead when run in user
// mode
#define PAUSE_OVERHEAD (31) // 31 cycles of overhead when the
// timer is paused and resumed
// USE THIS OVERHEAD IF YOU HAVE A FUNCTION CALL
//#define PAUSE_OVERHEAD (27) // It appears that if the interval
// being timed contains a function
// call then different code for the
// timer is generated, and a
// different overhead should be used.
// USE THIS OVERHEAD IF YOU DON'T HAVE A
// FUNCTION CALL.
inline zen_timer::zen_timer(char *file_name)
{
timer_in_use = false;
min_time = 0xffffffffffffffffll;
max_time = 0;
total_time = 0;
number_of_calls = 0;
current_timer_value = 0;
last_timer_value = 0;
// If this timer is created with a file name argument, then
// open the dribble file.
if (file_name != NULL) {
output_file = open(file_name,O_WRONLY | O_CREAT | O_TRUNC, 0644);
if (output_file == -1) {
cerr << "couldn't open output file " << file_name << endl;
exit(-1);
}
} else {
output_file = 0;
}
}
inline zen_timer::~zen_timer()
{
if (output_file) {
close(output_file);
}
}
inline void zen_timer::timer_on(void)
{
assert(timer_in_use == false);
timer_in_use = true;
total_pause_interval = 0;
number_of_pauses = 0;
// This inline assembly code calls the undocumented opcode "rdtsc" to
// read the value of the tsc counter. Then the value of this register
// pair is saved.
asm volatile ("rdtsc;
movl %%edx, %0;
movl %%eax, %1"
: "=r" (on.last_timer_value_high),
"=r" (on.last_timer_value_low)
: /* no inputs */
: "%edx", "%eax");
}
inline void zen_timer::timer_off(void) {
assert(timer_in_use == true);
asm volatile ("rdtsc;
movl %%edx, %0;
movl %%eax, %1"
: "=r" (off.current_timer_value_high),
"=r" (off.current_timer_value_low)
: /* no inputs */
: "%edx", "%eax");
timer_in_use = false;
unsigned long long int time;
unsigned long long int output_time;
time = current_timer_value - last_timer_value;
time -= total_pause_interval;
total_time += time;
if (time > max_time) {
max_time = time;
}
if (time < min_time) {
min_time = time;
}
number_of_calls++;
if (output_file) {
output_time = time - OVERHEAD;
write(output_file,(char *) &output_time, sizeof(unsigned long long int));
}
}
inline void zen_timer::timer_pause(void) {
assert(timer_in_use == true);
asm volatile ("rdtsc;
movl %%edx, %0;
movl %%eax, %1"
: "=r" (start_pause.start_pause_interval_high),
"=r" (start_pause.start_pause_interval_low)
: /* no inputs */
: "%edx", "%eax");
}
inline void zen_timer::timer_resume(void) {
assert(timer_in_use == true);
unsigned long long int time;
number_of_pauses++;
asm volatile ("rdtsc;
movl %%edx, %0;
movl %%eax, %1"
: "=r" (end_pause.end_pause_interval_high),
"=r" (end_pause.end_pause_interval_low)
: /* no inputs */
: "%edx", "%eax");
time = end_pause_interval - start_pause_interval;
#if (defined (PAUSE_OVERHEAD) && (PAUSE_OVERHEAD != 0))
total_pause_interval += (time + PAUSE_OVERHEAD);
#else
total_pause_interval += time;
#endif
}
inline void zen_timer::timer_reset(void) {
timer_in_use = false;
total_time = 0;
number_of_calls = 0;
total_pause_interval = 0;
number_of_pauses++;
}
inline void zen_timer::timer_print(void) {
if (timer_in_use == true) {
cerr << "Error: Tried to print Zen Timer information while timer was running" << endl;
}
if (number_of_calls == 0) {
cerr << "timer was not called." << endl;
} else {
cerr << endl;
cerr << "timer was called " << number_of_calls << " times" << endl;
cerr << endl;
cerr << "for an average of "
<< (total_time - (OVERHEAD * number_of_calls)) / number_of_calls
<< " cycles per call" << endl;
cerr << "the minimum time was: " << (min_time - OVERHEAD) << endl;
cerr << "the maximum time was: " << (max_time - OVERHEAD) << endl;
cerr << endl;
}
}
inline unsigned long long zen_timer::getTime () {
if (number_of_calls != 1) {
cerr << "getTime() can only be used on a timer called once."
<< endl;
exit(-1);
} else {
assert ((min_time == max_time) && (max_time == total_time));
return (total_time - OVERHEAD);
}
}
#endif