Skip to content

Commit cfaa096

Browse files
committed
Time::HiRes Win32 remove a few layers of jump fn calls from QPC()
1 parent 35df764 commit cfaa096

File tree

1 file changed

+36
-5
lines changed

1 file changed

+36
-5
lines changed

dist/Time-HiRes/HiRes.xs

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,20 @@ typedef struct {
156156
unsigned long run_count;
157157
} my_cxt_t;
158158

159+
typedef BOOL (WINAPI *pfnQueryPerformanceCounter_T)(LARGE_INTEGER*);
160+
159161
static unsigned __int64 tick_frequency = 0;
160162
static unsigned __int64 qpc_res_ns = 0;
161163
static unsigned __int64 qpc_res_ns_realtime = 0;
164+
static pfnQueryPerformanceCounter_T pfnQueryPerformanceCounter = NULL;
162165

163166
#define S_InterlockedExchange64(_d,_s) \
164167
InterlockedExchange64((LONG64 volatile *)(_d),(LONG64)(_s))
168+
#define S_InterlockedExchangePointer(_d,_s) \
169+
InterlockedExchangePointer((PVOID volatile *)(_d),(PVOID)(_s))
170+
171+
#undef QueryPerformanceCounter
172+
#define QueryPerformanceCounter pfnQueryPerformanceCounter
165173

166174
/* Visual C++ 2013 and older don't have the timespec structure.
167175
* Neither do mingw.org compilers with MinGW runtimes older than 3.22. */
@@ -259,17 +267,19 @@ _GetSystemTimePreciseAsFileTime(pTHX)
259267
{
260268
#define MY_CXTX (*MY_CXT_x)
261269
unsigned __int64 ticks;
262-
unsigned __int64 ticks_mem;
270+
263271
unsigned __int64 timesys;
264-
__int64 diff;
265272
/* If no threads, CC will probably optimize away all MY_CXT_x references
266273
so they directly access the C static global struct. */
267274
my_cxt_t * MY_CXT_x;
268275

269-
QueryPerformanceCounter((LARGE_INTEGER*)&ticks_mem);
276+
{
277+
unsigned __int64 ticks_mem;
278+
QueryPerformanceCounter((LARGE_INTEGER*)&ticks_mem);
270279
/* Inform the CC nothing external or in this fn (ptr aliasing) can ever
271280
rewrite the value in ticks. Increases chance of CC using registers. */
272-
ticks = ticks_mem;
281+
ticks = ticks_mem;
282+
}
273283
{
274284
dMY_CXT;
275285
MY_CXT_x = &(MY_CXT);
@@ -282,6 +292,7 @@ _GetSystemTimePreciseAsFileTime(pTHX)
282292
MY_CXTX.reset_time = timesys + MAX_PERF_COUNTER_TICKS;
283293
}
284294
else {
295+
__int64 diff;
285296
ticks -= MY_CXTX.base_ticks;
286297
timesys = MY_CXTX.base_systime_as_filetime.ft_i64
287298
+ Const64(IV_1E7) * (ticks / tick_frequency)
@@ -292,6 +303,7 @@ _GetSystemTimePreciseAsFileTime(pTHX)
292303
GetSystemTimeAsFileTime(&MY_CXTX.base_systime_as_filetime.ft_val);
293304
timesys = MY_CXTX.base_systime_as_filetime.ft_i64;
294305
}
306+
/* Note this invisible else {} branch, SKIPS calling GetSystemTimeAsFileTime() */
295307
}
296308
#undef MY_CXTX
297309
{
@@ -1002,7 +1014,7 @@ S_croak_xs_unimplemented(const CV *const cv)
10021014
SV* sv = cv_name(cv, NULL, 0);
10031015
Perl_croak_nocontext(
10041016
"%s::%s(): unimplemented in this platform" + (sizeof("%s::")-1), SvPVX(sv));
1005-
#if 0
1017+
#if 0 /* former implementation, retired because of machine code bloat */
10061018
char buf[sizeof("CODE(0x%" UVxf ")") + (sizeof(UV)*8)];
10071019
const char * pv1;
10081020
const GV *const gv = CvGV(cv);
@@ -1066,6 +1078,25 @@ BOOT:
10661078
l_qpc_res_ns_realtime = l_qpc_res_ns > 100 ? l_qpc_res_ns : 100;
10671079
S_InterlockedExchange64(&qpc_res_ns_realtime, l_qpc_res_ns_realtime);
10681080
}
1081+
{/* Remove a couple jump stub funcs between kernel32->kernelbase->ntdll
1082+
for perf reasons. RtlQueryPerformanceCounter() was added in NT 6.1,
1083+
so a fallback path is still required to QPC()@K32.dll. */
1084+
pfnQueryPerformanceCounter_T QPCfn = pfnQueryPerformanceCounter;
1085+
if (!QPCfn) {
1086+
HMODULE hmod = GetModuleHandleW(L"NTDLL.DLL");
1087+
if (hmod) {
1088+
QPCfn = (pfnQueryPerformanceCounter_T)GetProcAddress(hmod,"RtlQueryPerformanceCounter");
1089+
if (QPCfn)
1090+
goto QPC_done;
1091+
}
1092+
#undef QueryPerformanceCounter
1093+
QPCfn = QueryPerformanceCounter; /* Get the public API fallback sym. */
1094+
#undef QueryPerformanceCounter
1095+
#QueryPerformanceCounter pfnQueryPerformanceCounter
1096+
QPC_done:
1097+
S_InterlockedExchangePointer(&pfnQueryPerformanceCounter, QPCfn);
1098+
}
1099+
}
10691100
}
10701101
#endif
10711102
#ifdef HAS_GETTIMEOFDAY

0 commit comments

Comments
 (0)