mirror of
https://github.com/Mercury-Language/mercury.git
synced 2025-12-09 10:52:24 +00:00
Discussion of these changes can be found on the Mercury developers
mailing list archives from June 2018.
COPYING.LIB:
Add a special linking exception to the LGPL.
*:
Update references to COPYING.LIB.
Clean up some minor errors that have accumulated in copyright
messages.
581 lines
17 KiB
C
581 lines
17 KiB
C
// vim: ts=4 sw=4 expandtab ft=c
|
|
|
|
// Copyright (C) 2007, 2009-2011 The University of Melbourne.
|
|
// Copyright (C) 2014-2016, 2018 The Mercury team.
|
|
// This file is distributed under the terms specified in COPYING.LIB.
|
|
|
|
// mercury_atomic_ops.c
|
|
|
|
#include "mercury_imp.h"
|
|
#include "mercury_atomic_ops.h"
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
#if defined(MR_THREAD_SAFE)
|
|
|
|
// Definitions for the atomic functions declared `extern inline'.
|
|
|
|
MR_OUTLINE_DEFN(
|
|
MR_bool
|
|
MR_compare_and_swap_int(volatile MR_Integer *addr, MR_Integer old,
|
|
MR_Integer new_val)
|
|
,
|
|
{
|
|
MR_COMPARE_AND_SWAP_WORD_BODY;
|
|
}
|
|
)
|
|
|
|
MR_OUTLINE_DEFN(
|
|
MR_bool
|
|
MR_compare_and_swap_uint(volatile MR_Unsigned *addr, MR_Unsigned old,
|
|
MR_Unsigned new_val)
|
|
,
|
|
{
|
|
MR_COMPARE_AND_SWAP_WORD_BODY;
|
|
}
|
|
)
|
|
|
|
MR_OUTLINE_DEFN(
|
|
MR_Integer
|
|
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
|
|
,
|
|
{
|
|
MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
|
|
}
|
|
)
|
|
|
|
MR_OUTLINE_DEFN(
|
|
MR_Unsigned
|
|
MR_atomic_add_and_fetch_uint(volatile MR_Unsigned *addr, MR_Unsigned addend)
|
|
,
|
|
{
|
|
MR_ATOMIC_ADD_AND_FETCH_UINT_BODY;
|
|
}
|
|
)
|
|
|
|
MR_OUTLINE_DEFN(
|
|
void
|
|
MR_atomic_add_int(volatile MR_Integer *addr, MR_Integer addend)
|
|
,
|
|
{
|
|
MR_ATOMIC_ADD_INT_BODY;
|
|
}
|
|
)
|
|
|
|
MR_OUTLINE_DEFN(
|
|
void
|
|
MR_atomic_add_uint(volatile MR_Unsigned *addr, MR_Unsigned addend)
|
|
,
|
|
{
|
|
MR_ATOMIC_ADD_UINT_BODY;
|
|
}
|
|
)
|
|
|
|
MR_OUTLINE_DEFN(
|
|
void
|
|
MR_atomic_sub_int(volatile MR_Integer *addr, MR_Integer x)
|
|
,
|
|
{
|
|
MR_ATOMIC_SUB_INT_BODY;
|
|
}
|
|
)
|
|
|
|
MR_OUTLINE_DEFN(
|
|
void
|
|
MR_atomic_inc_int(volatile MR_Integer *addr)
|
|
,
|
|
{
|
|
MR_ATOMIC_INC_INT_BODY;
|
|
}
|
|
)
|
|
|
|
MR_OUTLINE_DEFN(
|
|
void
|
|
MR_atomic_inc_uint(volatile MR_Unsigned *addr)
|
|
,
|
|
{
|
|
MR_ATOMIC_INC_UINT_BODY;
|
|
}
|
|
)
|
|
|
|
MR_OUTLINE_DEFN(
|
|
void
|
|
MR_atomic_dec_int(volatile MR_Integer *addr)
|
|
,
|
|
{
|
|
MR_ATOMIC_DEC_INT_BODY;
|
|
}
|
|
)
|
|
|
|
MR_OUTLINE_DEFN(
|
|
MR_bool
|
|
MR_atomic_dec_and_is_zero_int(volatile MR_Integer *addr)
|
|
,
|
|
{
|
|
MR_ATOMIC_DEC_AND_IS_ZERO_INT_BODY;
|
|
}
|
|
)
|
|
|
|
MR_OUTLINE_DEFN(
|
|
MR_bool
|
|
MR_atomic_dec_and_is_zero_uint(volatile MR_Unsigned *addr)
|
|
,
|
|
{
|
|
MR_ATOMIC_DEC_AND_IS_ZERO_UINT_BODY;
|
|
}
|
|
)
|
|
|
|
#endif // MR_THREAD_SAFE
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
#if defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
|
|
|
|
// Profiling of the parallel runtime.
|
|
|
|
#if defined(MR_GNUC) && (defined(__i386__) || defined(__x86_64__))
|
|
// True if the RDTSCP and RDTSC instructions are available respectively.
|
|
|
|
static MR_bool MR_rdtscp_is_available = MR_FALSE;
|
|
static MR_bool MR_rdtsc_is_available = MR_FALSE;
|
|
#endif
|
|
|
|
MR_uint_least64_t MR_cpu_cycles_per_sec = 0;
|
|
|
|
#if defined(MR_GNUC) && (defined(__i386__) || defined(__x86_64__))
|
|
|
|
// Set this to 1 to enable some printfs below
|
|
#define MR_DEBUG_CPU_FEATURE_DETECTION 0
|
|
|
|
// cpuid, rdtscp and rdtsc are i386/x86_64 instructions.
|
|
|
|
static __inline__ void MR_cpuid(MR_Unsigned code, MR_Unsigned sub_code,
|
|
MR_Unsigned *a, MR_Unsigned *b,
|
|
MR_Unsigned *c, MR_Unsigned *d);
|
|
|
|
static __inline__ void MR_rdtscp(MR_uint_least64_t *tsc,
|
|
MR_Unsigned *processor_id);
|
|
|
|
static __inline__ void MR_rdtsc(MR_uint_least64_t *tsc);
|
|
|
|
// Return zero if parsing failed, otherwise return the number of cycles per
|
|
// second.
|
|
|
|
static MR_uint_least64_t parse_freq_from_x86_brand_string(char *string);
|
|
|
|
#endif // MR_GNUC && (__i386__ || __x86_64__)
|
|
|
|
void
|
|
MR_do_cpu_feature_detection(void)
|
|
{
|
|
#if defined(MR_GNUC) && (defined(__i386__) || defined(__x86_64__))
|
|
MR_Unsigned a, b, c, d;
|
|
MR_Unsigned eflags, old_eflags;
|
|
MR_Unsigned maximum_extended_page;
|
|
MR_Unsigned extended_family, basic_family, family;
|
|
MR_Unsigned extended_model, model;
|
|
|
|
// Check for the CPUID instruction. CPUID is supported if we can flip bit
|
|
// 21 in the CPU's EFLAGS register. The assembly below is written in a
|
|
// subset of i386 and x86_64 assembly. To read and write EFLAGS we have
|
|
// to go via the C stack.
|
|
|
|
__asm__ ("pushf; pop %0"
|
|
:"=r"(eflags));
|
|
old_eflags = eflags;
|
|
// Flip bit 21
|
|
eflags ^= (1 << 21);
|
|
__asm__ ("push %0; popf; pushf; pop %0;"
|
|
:"=r"(eflags)
|
|
:"0"(eflags));
|
|
|
|
// Test to see if our change held. We don't restore eflags, a change to
|
|
// the ID bit has no effect.
|
|
|
|
if (eflags == old_eflags) {
|
|
#if MR_DEBUG_CPU_FEATURE_DETECTION
|
|
fprintf(stderr, "This CPU doesn't support the CPUID instruction.\n",
|
|
eflags, old_eflags);
|
|
#endif
|
|
return;
|
|
}
|
|
|
|
// CPUID 0 gives the maximum basic CPUID page in EAX. Basic pages go up to
|
|
// but not including 0x40000000.
|
|
|
|
MR_cpuid(0, 0, &a, &b, &c, &d);
|
|
if (a < 1) {
|
|
return;
|
|
}
|
|
|
|
// CPUID 1 gives type, family, model and stepping information in EAX.
|
|
MR_cpuid(1, 0, &a, &b, &c, &d);
|
|
|
|
// Bit 4 in EDX is high if RDTSC is available
|
|
if (d & (1 << 4)) {
|
|
MR_rdtsc_is_available = MR_TRUE;
|
|
}
|
|
|
|
// BTW: Intel can't count:
|
|
//
|
|
// http://www.pagetable.com/?p=18
|
|
// http://www.codinghorror.com/blog/archives/000364.html
|
|
//
|
|
// 486 (1989): family 4
|
|
// Pentium (1993): family 5
|
|
// Pentium Pro (1995): family 6, models 0 and 1
|
|
// Pentium 2 (1997): family 6, models 3, 5 and 6
|
|
// Pentium 3 (2000): family 6, models 7, 8, 10, 11
|
|
// Itanium (2001): family 7
|
|
// Pentium 4 (2000): family 15/0
|
|
// Itanium 2 (2002): family 15/1 and 15/2
|
|
// Pentium D: family 15/4
|
|
// Pentium M (2003): family 6, models 9 and 13
|
|
// Core (2006): family 6, model 14
|
|
// Core 2 (2006): family 6, model 15
|
|
// i7: family 6, model 26
|
|
// Atom: family 6, model 28
|
|
//
|
|
// This list is incomplete, it doesn't cover AMD or any other brand of x86
|
|
// processor, and it probably doesn't cover all post-pentium Intel
|
|
// processors.
|
|
|
|
// bits 8-11 (first bit (LSB) is bit 0)
|
|
basic_family = (a & 0x00000F00) >> 8;
|
|
if (basic_family == 0x0F) {
|
|
// bits 20-27
|
|
extended_family = (a & 0x0FF00000) >> 20;
|
|
family = basic_family + extended_family;
|
|
} else {
|
|
family = basic_family;
|
|
}
|
|
|
|
// I'm not using the model value but I'll leave the code here in case we
|
|
// have a reason to use it in the future.
|
|
|
|
// bits 4-7
|
|
model = (a & 0x000000F0) >> 4;
|
|
if ((basic_family == 0x0F) || (basic_family == 0x06)) {
|
|
// bits 16-19
|
|
extended_model = (a & 0x000F0000) >> 16;
|
|
model += (extended_model << 4);
|
|
}
|
|
#if MR_DEBUG_CPU_FEATURE_DETECTION
|
|
fprintf(stderr, "This is family %d and model %d\n", family, model);
|
|
#endif
|
|
|
|
// Now check for P3 or higher since they have the extended pages.
|
|
if (family < 6) {
|
|
// This is a 486 or Pentium.
|
|
return;
|
|
}
|
|
// I could bail out here if this was a pentium 3, but there is a more
|
|
// reliable check for extended CPUID support below that should work on AMD
|
|
// chips as well, if I knew all the model numbers for all family 6
|
|
// processors and knew if they honoured extended CPUID.
|
|
|
|
// Extended CPUID 0x80000000.
|
|
//
|
|
// EAX contains the maximum extended CPUID node.
|
|
|
|
MR_cpuid(0x80000000, 0, &a, &b, &c, &d);
|
|
if ((a & 0x80000000) == 0) {
|
|
// Extended CPUID is not supported.
|
|
// Note that this check is still not as reliable as I'd like. If it
|
|
// succeeds I'm not confident that the processor definitely implements
|
|
// extended CPUID.
|
|
|
|
return;
|
|
}
|
|
maximum_extended_page = a;
|
|
#if MR_DEBUG_CPU_FEATURE_DETECTION
|
|
fprintf(stderr, "Maximum extended CPUID page: 0x%x\n", maximum_extended_page);
|
|
#endif
|
|
|
|
// Extended CPUID 0x80000001
|
|
//
|
|
// If EDX bit 27 is set the RDTSCP instruction is available.
|
|
|
|
if (maximum_extended_page >= 0x80000001) {
|
|
MR_cpuid(0x80000001, 0, &a, &b, &c, &d);
|
|
#if MR_DEBUG_CPU_FEATURE_DETECTION
|
|
fprintf(stderr, "CPUID 0x80000001 EDX: 0x%x\n", d);
|
|
#endif
|
|
if ((d & (1 << 27))) {
|
|
// This processor supports RDTSCP.
|
|
|
|
#if MR_DEBUG_CPU_FEATURE_DETECTION
|
|
fprintf(stderr, "RDTSCP is available\n");
|
|
#endif
|
|
MR_rdtscp_is_available = MR_TRUE;
|
|
}
|
|
}
|
|
|
|
if (maximum_extended_page >= 0x80000004) {
|
|
// 3 CPUID pages, 4 return registers each, containing 4 bytes each,
|
|
// plus a null byte. Intel says they include their own null byte, but
|
|
// for the cost of a single byte I feel safer using our own.
|
|
|
|
#define CPUID_BRAND_STRING_SIZE (3*4*4 + 1)
|
|
char buff[CPUID_BRAND_STRING_SIZE];
|
|
unsigned int page;
|
|
unsigned int byte;
|
|
unsigned int shift;
|
|
|
|
// This processor supports the brand string from which we can
|
|
// try to extract the clock speed. This algorithm is described
|
|
// in the Intel Instruction Set Reference, Volume 2B, Chapter 3,
|
|
// Pages 207-208, In particular the flow chart in figure 3-10.
|
|
// This does not work on AMD processors since they don't include
|
|
// the clock speed in the brand string.
|
|
|
|
for (page = 0; page < 3; page++) {
|
|
MR_cpuid(page + 0x80000002, 0, &a, &b, &c, &d);
|
|
#if MR_DEBUG_CPU_FEATURE_DETECTION
|
|
fprintf(stderr, "CPUID page: 0x%.8x, eax: 0x%.8x, ebx: 0x%.8x, ecx: 0x%.8x, edx: 0x%.8x\n",
|
|
page + 0x80000002, a, b, c, d);
|
|
#endif
|
|
for (byte = 0; byte < 4; byte++) {
|
|
shift = byte * 8;
|
|
buff[page*4*4 + 0 + byte] = (char)(0xFF & (a >> shift));
|
|
buff[page*4*4 + 4 + byte] = (char)(0xFF & (b >> shift));
|
|
buff[page*4*4 + 8 + byte] = (char)(0xFF & (c >> shift));
|
|
buff[page*4*4 + 12 + byte] = (char)(0xFF & (d >> shift));
|
|
}
|
|
}
|
|
// Add a null byte.
|
|
buff[CPUID_BRAND_STRING_SIZE - 1] = 0;
|
|
#if MR_DEBUG_CPU_FEATURE_DETECTION
|
|
fprintf(stderr, "CPUID Brand string: %s\n", buff);
|
|
#endif
|
|
|
|
MR_cpu_cycles_per_sec = parse_freq_from_x86_brand_string(buff);
|
|
#if MR_DEBUG_CPU_FEATURE_DETECTION
|
|
if (MR_cpu_cycles_per_sec == 0) {
|
|
fprintf(stderr, "Failed to detect cycles per second "
|
|
"you can probably blame AMD for this.\n");
|
|
} else {
|
|
fprintf(stderr, "Cycles per second: %ld\n", MR_cpu_cycles_per_sec);
|
|
}
|
|
#endif
|
|
}
|
|
#endif // MR_GNUC && (__i386__ || __x86_64__)
|
|
}
|
|
|
|
#if defined(MR_GNUC) && (defined(__i386__) || defined(__x86_64__))
|
|
static MR_uint_least64_t
|
|
parse_freq_from_x86_brand_string(char *string)
|
|
{
|
|
unsigned int brand_string_len;
|
|
unsigned int i;
|
|
double multiplier;
|
|
int freq_index = -1;
|
|
|
|
brand_string_len = strlen(string);
|
|
|
|
// There will be at least five characters if we can parse this, three
|
|
// for the '?Hz' suffix, at least one for the units, plus a space at
|
|
// the beginning of the number.
|
|
|
|
if (!(brand_string_len > 5))
|
|
return 0;
|
|
|
|
if (!((string[brand_string_len - 1] == 'z') &&
|
|
(string[brand_string_len - 2] == 'H'))) {
|
|
return 0;
|
|
}
|
|
|
|
switch (string[brand_string_len - 3]) {
|
|
case 'M':
|
|
multiplier = 1000000.0;
|
|
break;
|
|
case 'G':
|
|
multiplier = 1000000000.0;
|
|
break;
|
|
case 'T':
|
|
// Yes, this is defined in the specification, Intel have some
|
|
// strong ambitions regarding Moore's law. :-)
|
|
// We include it here to conform with the standard.
|
|
|
|
multiplier = 1000000000000.0;
|
|
break;
|
|
default:
|
|
return 0;
|
|
}
|
|
|
|
// Search for the beginning of the digits.
|
|
for (i = brand_string_len - 4; i >= 0; i--) {
|
|
if (string[i] == ' ') {
|
|
freq_index = i+1;
|
|
break;
|
|
}
|
|
}
|
|
if (freq_index == -1) {
|
|
// We didn't find the beginning of the frequency.
|
|
return 0;
|
|
}
|
|
|
|
// If strtod fails it returns zero, so if we fail to parse a number here,
|
|
// we will return zero, which our caller understands as a parsing failure.
|
|
|
|
return (MR_uint_least64_t)(strtod(&string[freq_index], NULL) * multiplier);
|
|
}
|
|
#endif // MR_GNUC && (__i386__ || __x86_64__)
|
|
|
|
void
|
|
MR_profiling_start_timer(MR_Timer *timer)
|
|
{
|
|
#if defined(MR_GNUC) && (defined(__i386__) || defined(__x86_64__))
|
|
// If we don't have enough data to fill in all the fields of this structure
|
|
// we leave them alone, we won't check them later without checking
|
|
// MR_rdtsc{p}_is_available first.
|
|
|
|
if (MR_rdtscp_is_available) {
|
|
MR_rdtscp(&(timer->MR_timer_time), &(timer->MR_timer_processor_id));
|
|
} else if (MR_rdtsc_is_available) {
|
|
MR_rdtsc(&(timer->MR_timer_time));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void
|
|
MR_profiling_stop_timer(MR_Timer *timer, MR_Stats *stats)
|
|
{
|
|
#if defined(MR_GNUC) && (defined(__i386__) || defined(__x86_64__))
|
|
MR_Timer now;
|
|
MR_int_least64_t duration;
|
|
MR_uint_least64_t duration_squared;
|
|
|
|
if (MR_rdtscp_is_available) {
|
|
MR_rdtscp(&(now.MR_timer_time), &(now.MR_timer_processor_id));
|
|
if (timer->MR_timer_processor_id == now.MR_timer_processor_id) {
|
|
duration = now.MR_timer_time - timer->MR_timer_time;
|
|
duration_squared = duration * duration;
|
|
MR_atomic_inc_uint(&(stats->MR_stat_count_recorded));
|
|
#if MR_LOW_TAG_BITS >= 3
|
|
MR_atomic_add_int(&(stats->MR_stat_sum), duration);
|
|
MR_atomic_add_uint(&(stats->MR_stat_sum_squares), duration_squared);
|
|
#else
|
|
MR_US_SPIN_LOCK(&(stats->MR_stat_sums_lock));
|
|
stats->MR_stat_sum += duration;
|
|
stats->MR_stat_sum_squares += duration_squared;
|
|
MR_US_UNLOCK(&(stats->MR_stat_sums_lock));
|
|
#endif
|
|
} else {
|
|
MR_atomic_inc_uint(&(stats->MR_stat_count_not_recorded));
|
|
}
|
|
} else if (MR_rdtsc_is_available) {
|
|
MR_rdtsc(&(now.MR_timer_time));
|
|
duration = now.MR_timer_time - timer->MR_timer_time;
|
|
duration_squared = duration * duration;
|
|
MR_atomic_inc_uint(&(stats->MR_stat_count_recorded));
|
|
#if MR_LOW_TAG_BITS >= 3
|
|
MR_atomic_add_int(&(stats->MR_stat_sum), duration);
|
|
MR_atomic_add_uint(&(stats->MR_stat_sum_squares), duration_squared);
|
|
#else
|
|
MR_US_SPIN_LOCK(&(stats->MR_stat_sums_lock));
|
|
stats->MR_stat_sum += duration;
|
|
stats->MR_stat_sum_squares += duration_squared;
|
|
MR_US_UNLOCK(&(stats->MR_stat_sums_lock));
|
|
#endif
|
|
}
|
|
#else // not MR_GNUC && (__i386__ || __x86_64__)
|
|
// No TSC support on this architecture or with this C compiler.
|
|
MR_atomic_inc_int(&(stats->MR_stat_count_recorded));
|
|
#endif // not MR_GNUC && (__i386__ || __x86_64__)
|
|
}
|
|
|
|
// The TSC works and MR_cpu_cycles_per_sec is nonzero.
|
|
|
|
extern MR_bool
|
|
MR_tsc_is_sensible(void)
|
|
{
|
|
#if defined(MR_GNUC) && (defined(__i386__) || defined(__x86_64__))
|
|
return ((MR_rdtscp_is_available || MR_rdtsc_is_available) &&
|
|
(MR_cpu_cycles_per_sec != 0));
|
|
#else
|
|
return MR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
MR_uint_least64_t
|
|
MR_read_cpu_tsc(void)
|
|
{
|
|
#if defined(MR_GNUC) && (defined(__i386__) || defined(__x86_64__))
|
|
MR_uint_least64_t tsc;
|
|
|
|
if (MR_rdtsc_is_available) {
|
|
MR_rdtsc(&tsc);
|
|
} else {
|
|
tsc = 0;
|
|
}
|
|
return tsc;
|
|
#else // not MR_GNUC && (__i386__ || __x86_64__)
|
|
return 0;
|
|
#endif // not MR_GNUC && (__i386__ || __x86_64__)
|
|
}
|
|
|
|
// It is convenient that this instruction is the same on both i386 and x86_64.
|
|
|
|
#if defined(MR_GNUC) && (defined(__i386__) || defined(__x86_64__))
|
|
|
|
static __inline__ void
|
|
MR_cpuid(MR_Unsigned code, MR_Unsigned sub_code,
|
|
MR_Unsigned *a, MR_Unsigned *b, MR_Unsigned *c, MR_Unsigned *d)
|
|
{
|
|
#ifdef __x86_64__
|
|
__asm__("cpuid"
|
|
: "=a"(*a), "=b"(*b), "=c"(*c), "=d"(*d)
|
|
: "0"(code), "2"(sub_code));
|
|
#elif defined(__i386__)
|
|
// i386 is more register staved, in particular we can't use ebx in
|
|
// position independent code. And we can't move ebx into another
|
|
// general purpose register, between register pinning, PIC, the
|
|
// stack and frame pointers and the other registers used by CPUID
|
|
// there are literally no general purpose registers left on i386.
|
|
|
|
__asm__("pushl %%ebx; \
|
|
cpuid; \
|
|
movl %%ebx, %1; \
|
|
popl %%ebx;"
|
|
: "=a"(*a), "=m"(*b), "=c"(*c), "=d"(*d)
|
|
: "0"(code), "2"(sub_code)
|
|
: "memory");
|
|
#endif
|
|
}
|
|
|
|
static __inline__ void
|
|
MR_rdtscp(MR_uint_least64_t *tsc, MR_Unsigned *processor_id)
|
|
{
|
|
MR_Unsigned tsc_low;
|
|
MR_Unsigned tsc_high;
|
|
|
|
// On 64bit systems the high 32 bits of RAX and RDX are 0 filled by
|
|
// rdtsc{p}.
|
|
|
|
__asm__("rdtscp"
|
|
: "=a"(tsc_low), "=d"(tsc_high), "=c"(*processor_id));
|
|
|
|
*tsc = tsc_high;
|
|
*tsc = *tsc << 32;
|
|
*tsc |= tsc_low;
|
|
}
|
|
|
|
static __inline__ void
|
|
MR_rdtsc(MR_uint_least64_t *tsc)
|
|
{
|
|
MR_Unsigned tsc_low;
|
|
MR_Unsigned tsc_high;
|
|
|
|
__asm__("rdtsc"
|
|
: "=a"(tsc_low), "=d"(tsc_high));
|
|
|
|
*tsc = tsc_high;
|
|
*tsc = *tsc << 32;
|
|
*tsc |= tsc_low;
|
|
}
|
|
|
|
#endif // MR_GNUC && (__i386__ || __x86_64__)
|
|
|
|
#endif // MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
|