Skip to content

Commit e2062a2

Browse files
committed
Time::HiRes::bootstrap() use more local vars in registers vs global derefs
-each reference to a global var like qpc_res_ns or tick_frequency is 7 bytes in machine code, or a couple more bytes than 7. Since BOOT:{} runs only once, and the chance 2 parallel BOOT:{} XSUBs in 2 different my_perls is almost zero, and even if there are 2 parallel OS threads executing, 1 OS thread isn't going help shave time off the 2nd OS thread. So to reduce the number of 7 byte opcodes that are reading from the global vars, maximize C auto vars as much as possible. QueryPerformanceFrequency() internally on Win7 is around 1-3 ptr derefs into NT's "VDSO" aka KUSER_SHARED_DATA. On Win2k, QPF() is a ring 0 call. -slide indent level to the left b/c the Win32 code block is nested too deep and almost ever statement would exceed 80 chars -cache PL_modglobal to a register, PL_modglobal is a big U32 offset 0x698 into my_perl struct " 48 8B 9F 98 06 00 00 mov rbx, [rdi+698h] "
1 parent 7ab39d0 commit e2062a2

File tree

1 file changed

+26
-21
lines changed

1 file changed

+26
-21
lines changed

dist/Time-HiRes/HiRes.xs

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -972,33 +972,38 @@ BOOT:
972972
MY_CXT_INIT;
973973
#endif
974974
#if defined(WIN32) || defined(CYGWIN_WITH_W32API)
975-
if (tick_frequency == 0) { /* no DllMain() in very rare static Perls */
976-
unsigned __int64 l_tick_frequency;
975+
{
976+
unsigned __int64 l_qpc_res_ns;
977+
unsigned __int64 l_qpc_res_ns_realtime;
978+
unsigned __int64 l_tick_frequency = tick_frequency;
979+
if (l_tick_frequency == 0) { /* no DllMain() in very rare static Perls */
977980
/* from MSDN: >= WinXP, function will always succeed and never return zero */
978-
if (!QueryPerformanceFrequency((LARGE_INTEGER*)&l_tick_frequency))
981+
unsigned __int64 l_tick_frequency_mem;
982+
if (!QueryPerformanceFrequency((LARGE_INTEGER*)&l_tick_frequency_mem))
979983
croak("%s(): unimplemented in this platform", "QueryPerformanceFrequency");
984+
l_tick_frequency = l_tick_frequency_mem;
980985
/* 32-bit CPU anti-sharding paranoia */
981-
S_InterlockedExchange64(&tick_frequency, l_tick_frequency);
982-
}
983-
if (qpc_res_ns == 0) {
984-
unsigned __int64 l_qpc_res_ns =
985-
IV_1E9 > tick_frequency ? IV_1E9 / tick_frequency : 1;
986-
S_InterlockedExchange64(&qpc_res_ns, l_qpc_res_ns);
987-
}
988-
if (qpc_res_ns_realtime == 0) {
989-
/* the resolution can't be smaller than 100ns because our implementation
990-
* of CLOCK_REALTIME is using FILETIME internally */
991-
unsigned __int64 l_qpc_res_ns_realtime =
992-
qpc_res_ns > 100 ? qpc_res_ns : 100;
993-
S_InterlockedExchange64(&qpc_res_ns_realtime, l_qpc_res_ns_realtime);
994-
}
986+
S_InterlockedExchange64(&tick_frequency, l_tick_frequency);
987+
}
988+
l_qpc_res_ns = qpc_res_ns;
989+
if (l_qpc_res_ns == 0) {
990+
l_qpc_res_ns = IV_1E9 > l_tick_frequency ? IV_1E9 / l_tick_frequency : 1;
991+
S_InterlockedExchange64(&qpc_res_ns, l_qpc_res_ns);
992+
}
993+
l_qpc_res_ns_realtime = qpc_res_ns_realtime;
994+
if (l_qpc_res_ns_realtime == 0) {
995+
/* the resolution can't be smaller than 100ns because our implementation
996+
* of CLOCK_REALTIME is using FILETIME internally */
997+
l_qpc_res_ns_realtime = l_qpc_res_ns > 100 ? l_qpc_res_ns : 100;
998+
S_InterlockedExchange64(&qpc_res_ns_realtime, l_qpc_res_ns_realtime);
999+
}
1000+
}
9951001
#endif
9961002
#ifdef HAS_GETTIMEOFDAY
9971003
{
998-
(void) hv_store(PL_modglobal, "Time::NVtime", 12,
999-
newSViv(PTR2IV(myNVtime)), 0);
1000-
(void) hv_store(PL_modglobal, "Time::U2time", 12,
1001-
newSViv(PTR2IV(myU2time)), 0);
1004+
HV* const modglobal = PL_modglobal;
1005+
(void)hv_stores(modglobal, "Time::NVtime", newSViv(PTR2IV(myNVtime)));
1006+
(void)hv_stores(modglobal, "Time::U2time", newSViv(PTR2IV(myU2time)));
10021007
}
10031008
#endif
10041009
#if defined(PERL_DARWIN)

0 commit comments

Comments
 (0)