157 lines
5.6 KiB
Diff
157 lines
5.6 KiB
Diff
From 9c739800a8915d5f2a73c840190920e95ffa1c5c Mon Sep 17 00:00:00 2001
|
|
From: Reini Urban <rurban@cpan.org>
|
|
Date: Fri, 18 Feb 2022 09:46:45 +0100
|
|
Subject: [PATCH] fix armv7 asm inline error GH #115
|
|
|
|
some armv7 buildroot variants fail on asm.
|
|
we already probe for that, so use it.
|
|
Fixes GH #115
|
|
|
|
[Retrieved from:
|
|
https://github.com/rurban/safeclib/commit/9c739800a8915d5f2a73c840190920e95ffa1c5c]
|
|
Signed-off-by: Fabrice Fontaine <fontaine.fabrice@gmail.com>
|
|
---
|
|
tests/perf_private.h | 49 +++++++++++++++++++++++++-------------------
|
|
1 file changed, 28 insertions(+), 21 deletions(-)
|
|
|
|
diff --git a/tests/perf_private.h b/tests/perf_private.h
|
|
index 3296cb3d..843674d3 100644
|
|
--- a/tests/perf_private.h
|
|
+++ b/tests/perf_private.h
|
|
@@ -1,9 +1,9 @@
|
|
/*------------------------------------------------------------------
|
|
* perf_private.h - Internal benchmarking tools
|
|
*
|
|
- * 2020 Reini Urban
|
|
+ * 2020,2022 Reini Urban
|
|
*
|
|
- * Copyright (c) 2017, 2020 Reini Urban
|
|
+ * Copyright (c) 2017, 2020, 2022 Reini Urban
|
|
* All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person
|
|
@@ -55,13 +55,16 @@ static inline uint64_t timer_start();
|
|
static inline uint64_t timer_end();
|
|
|
|
static inline clock_t rdtsc() {
|
|
-#ifdef __x86_64__
|
|
+#ifndef ASM_INLINE
|
|
+#define NO_CYCLE_COUNTER
|
|
+ return clock();
|
|
+#elif defined __x86_64__
|
|
uint64_t a, d;
|
|
- __asm__ volatile("rdtsc" : "=a"(a), "=d"(d));
|
|
+ ASM_INLINE volatile("rdtsc" : "=a"(a), "=d"(d));
|
|
return (clock_t)(a | (d << 32));
|
|
#elif defined(__i386__)
|
|
clock_t x;
|
|
- __asm__ volatile("rdtsc" : "=A"(x));
|
|
+ ASM_INLINE volatile("rdtsc" : "=A"(x));
|
|
return x;
|
|
#elif defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && (SIZEOF_SIZE_T == 4)
|
|
// V7 is the earliest arch that has a standard cyclecount (some say 6)
|
|
@@ -69,11 +72,11 @@ static inline clock_t rdtsc() {
|
|
uint32_t pmuseren;
|
|
uint32_t pmcntenset;
|
|
// Read the user mode perf monitor counter access permissions.
|
|
- asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
|
|
+ ASM_INLINE volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
|
|
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
|
|
- asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
|
|
+ ASM_INLINE volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
|
|
if (pmcntenset & 0x80000000ul) { // Is it counting?
|
|
- asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
|
|
+ ASM_INLINE volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
|
|
// The counter is set up to count every 64th cycle
|
|
return (int64_t)(pmccntr) * 64; // Should optimize to << 6
|
|
}
|
|
@@ -83,22 +86,22 @@ static inline clock_t rdtsc() {
|
|
uint64_t pmccntr;
|
|
uint64_t pmuseren = 1UL;
|
|
// Read the user mode perf monitor counter access permissions.
|
|
- //asm volatile("mrs cntv_ctl_el0, %0" : "=r" (pmuseren));
|
|
+ //ASM_INLINE volatile("mrs cntv_ctl_el0, %0" : "=r" (pmuseren));
|
|
if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
|
|
- asm volatile("mrs %0, cntvct_el0" : "=r" (pmccntr));
|
|
+ ASM_INLINE volatile("mrs %0, cntvct_el0" : "=r" (pmccntr));
|
|
return (uint64_t)(pmccntr) * 64; // Should optimize to << 6
|
|
}
|
|
return (uint64_t)rdtsc();
|
|
#elif defined(__powerpc64__) || defined(__ppc64__)
|
|
uint64_t tb;
|
|
- __asm__ volatile (\
|
|
+ ASM_INLINE volatile (\
|
|
"mfspr %0, 268"
|
|
: "=r" (tb));
|
|
return tb;
|
|
#elif defined(__powerpc__) || defined(__ppc__)
|
|
// This returns a time-base, which is not always precisely a cycle-count.
|
|
uint32_t tbu, tbl, tmp;
|
|
- __asm__ volatile (\
|
|
+ ASM_INLINE volatile (\
|
|
"0:\n"
|
|
"mftbu %0\n"
|
|
"mftbl %1\n"
|
|
@@ -109,12 +112,12 @@ static inline clock_t rdtsc() {
|
|
return (((uint64_t) tbu << 32) | tbl);
|
|
#elif defined(__sparc__)
|
|
uint64_t tick;
|
|
- asm(".byte 0x83, 0x41, 0x00, 0x00");
|
|
- asm("mov %%g1, %0" : "=r" (tick));
|
|
+ ASM_INLINE(".byte 0x83, 0x41, 0x00, 0x00");
|
|
+ ASM_INLINE("mov %%g1, %0" : "=r" (tick));
|
|
return tick;
|
|
#elif defined(__ia64__)
|
|
uint64_t itc;
|
|
- asm("mov %0 = ar.itc" : "=r" (itc));
|
|
+ ASM_INLINE("mov %0 = ar.itc" : "=r" (itc));
|
|
return itc;
|
|
#else
|
|
#define NO_CYCLE_COUNTER
|
|
@@ -126,9 +129,11 @@ static inline clock_t rdtsc() {
|
|
// 3.2.1 The Improved Benchmarking Method
|
|
static inline uint64_t timer_start()
|
|
{
|
|
-#if defined (__i386__) || (defined(__x86_64__) && SIZEOF_SIZE_T == 4)
|
|
+#ifndef ASM_INLINE
|
|
+ return (uint64_t)rdtsc();
|
|
+#elif defined (__i386__) || (defined(__x86_64__) && SIZEOF_SIZE_T == 4)
|
|
uint32_t cycles_high, cycles_low;
|
|
- __asm__ volatile
|
|
+ ASM_INLINE volatile
|
|
("cpuid\n\t"
|
|
"rdtsc\n\t"
|
|
"mov %%edx, %0\n\t"
|
|
@@ -137,7 +142,7 @@ static inline uint64_t timer_start()
|
|
return ((uint64_t)cycles_high << 32) | cycles_low;
|
|
#elif defined __x86_64__
|
|
uint32_t cycles_high, cycles_low;
|
|
- __asm__ volatile
|
|
+ ASM_INLINE volatile
|
|
("cpuid\n\t"
|
|
"rdtsc\n\t"
|
|
"mov %%edx, %0\n\t"
|
|
@@ -151,9 +156,11 @@ static inline uint64_t timer_start()
|
|
|
|
static inline uint64_t timer_end()
|
|
{
|
|
-#if defined (__i386__) || (defined(__x86_64__) && defined (HAVE_BIT32))
|
|
+#ifndef ASM_INLINE
|
|
+ return (uint64_t)rdtsc();
|
|
+#elif defined (__i386__) || (defined(__x86_64__) && defined (HAVE_BIT32))
|
|
uint32_t cycles_high, cycles_low;
|
|
- __asm__ volatile
|
|
+ ASM_INLINE volatile
|
|
("rdtscp\n\t"
|
|
"mov %%edx, %0\n\t"
|
|
"mov %%eax, %1\n\t"
|
|
@@ -162,7 +169,7 @@ static inline uint64_t timer_end()
|
|
return ((uint64_t)cycles_high << 32) | cycles_low;
|
|
#elif defined __x86_64__
|
|
uint32_t cycles_high, cycles_low;
|
|
- __asm__ volatile
|
|
+ ASM_INLINE volatile
|
|
("rdtscp\n\t"
|
|
"mov %%edx, %0\n\t"
|
|
"mov %%eax, %1\n\t"
|