mirror of
https://github.com/Mercury-Language/mercury.git
synced 2025-12-16 14:25:56 +00:00
currently holding it did not allocate a new context ID. A user looking at
this behaviour from threadscope would see thread 27 (for instance) finish, and
then immediately begin executing again. Therefore we now allocates a new
context ID when a context is reused making the context look different from
threadscope's point of view. New context IDs are already allocated to
contexts that are allocated from the free context lists.
runtime/mercury_context.c:
As above.
The next context id variable is now accessed atomically rather than being
protected by the free context list lock.
runtime/mercury_atomic_ops.h:
runtime/mercury_atomic_ops.c:
Implement a new atomic operation, MR_atomic_add_and_fetch_int, this is
used to allocate context ids.
Reimplement MR_atomic_add_int in terms of MR_atomic_add_and_fetch_int when
handwritten assembler support is not available.
runtime/mercury_atomic_ops.c:
Re-order atomic operations to match the order in the header file.
runtime/mercury_atomic_ops.h:
Place the definition of the MR_ATOMIC_PAUSE macro before the other atomic
operations since MR_atomic_add_and_fetch_int depends on it. This also
conforms with the coding standard.
runtime/mercury_threadscope.h:
Make the Context ID type a MR_Integer to match the argument size on the
available atomic operations.
643 lines
25 KiB
C
643 lines
25 KiB
C
/*
|
|
** vim:ts=4 sw=4 expandtab
|
|
*/
|
|
/*
|
|
** Copyright (C) 2007, 2009-2010 The University of Melbourne.
|
|
** This file may only be copied under the terms of the GNU Library General
|
|
** Public License - see the file COPYING.LIB in the Mercury distribution.
|
|
*/
|
|
|
|
/*
|
|
** mercury_atomic.h - defines atomic operations and other primitives used by
|
|
** the parallel runtime.
|
|
*/
|
|
|
|
#ifndef MERCURY_ATOMIC_OPS_H
|
|
#define MERCURY_ATOMIC_OPS_H
|
|
|
|
#include "mercury_std.h"
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
#if defined(MR_LL_PARALLEL_CONJ)
|
|
|
|
/*
|
|
* Intel and AMD support a pause instruction that is roughly equivalent
|
|
* to a no-op. Intel recommend that it is used in spin-loops to improve
|
|
* performance. Without a pause instruction multiple simultaneous
|
|
* read-requests will be in-flight for the synchronization variable from a
|
|
* single thread. Giving the pause instruction causes these to be executed
|
|
* in sequence allowing the processor to handle the change in the
|
|
* synchronization variable more easily.
|
|
*
|
|
* On some chips it may cause the spin-loop to use less power.
|
|
*
|
|
* This instruction was introduced with the Pentium 4 but is backwards
|
|
* compatible, This works because the two byte instruction for PAUSE is
|
|
* equivalent to the NOP instruction prefixed by REPE. Therefore older
|
|
* processors perform a no-op.
|
|
*
|
|
* This is not really an atomic instruction but we name it
|
|
* MR_ATOMIC_PAUSE for consistency.
|
|
*
|
|
* References: Intel and AMD documentation for PAUSE, Intel optimisation
|
|
* guide.
|
|
*/
|
|
#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
|
|
!defined(MR_DO_NOT_USE_CPU_RELAX)
|
|
|
|
#define MR_ATOMIC_PAUSE \
|
|
do { \
|
|
__asm__ __volatile__("pause"); \
|
|
} while(0)
|
|
|
|
#else
|
|
|
|
/* Fall back to a no-op */
|
|
#define MR_ATOMIC_PAUSE \
|
|
do { \
|
|
; \
|
|
} while(0)
|
|
|
|
#endif
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
/*
|
|
** Declarations for inline atomic operations.
|
|
*/
|
|
|
|
/*
|
|
** If the value at addr is equal to old, assign new to addr and return true.
|
|
** Otherwise return false.
|
|
*/
|
|
MR_EXTERN_INLINE MR_bool
|
|
MR_compare_and_swap_word(volatile MR_Integer *addr, MR_Integer old,
|
|
MR_Integer new_val);
|
|
|
|
/*
|
|
** Atomically add to an integer in memory and retrieve the result. In other
|
|
** words an atomic pre-increment operation.
|
|
*/
|
|
MR_EXTERN_INLINE MR_Integer
|
|
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend);
|
|
|
|
/*
|
|
** Atomically add the second argument to the memory pointed to by the first
|
|
** argument.
|
|
*/
|
|
MR_EXTERN_INLINE void
|
|
MR_atomic_add_int(volatile MR_Integer *addr, MR_Integer addend);
|
|
|
|
/*
|
|
** Atomically subtract the second argument from the memory pointed to by the
|
|
** first argument.
|
|
*/
|
|
MR_EXTERN_INLINE void
|
|
MR_atomic_sub_int(volatile MR_Integer *addr, MR_Integer x);
|
|
|
|
/*
|
|
** Increment the word pointed at by the address.
|
|
*/
|
|
MR_EXTERN_INLINE void
|
|
MR_atomic_inc_int(volatile MR_Integer *addr);
|
|
|
|
/*
|
|
** Decrement the word pointed at by the address.
|
|
*/
|
|
MR_EXTERN_INLINE void
|
|
MR_atomic_dec_int(volatile MR_Integer *addr);
|
|
|
|
/*
|
|
** Decrement the integer pointed at by the address and return true iff it is
|
|
** zero after the decrement.
|
|
*/
|
|
MR_EXTERN_INLINE MR_bool
|
|
MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
|
|
|
|
/*
|
|
** For information about GCC's builtins for atomic operations see:
|
|
** http://gcc.gnu.org/onlinedocs/gcc-4.2.4/gcc/Atomic-Builtins.html
|
|
*/
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && \
|
|
!defined(MR_AVOID_COMPILER_INTRINSICS)
|
|
|
|
/*
|
|
** gcc 4.1 and above have builtin atomic operations.
|
|
*/
|
|
#define MR_COMPARE_AND_SWAP_WORD_BODY \
|
|
do { \
|
|
return __sync_bool_compare_and_swap(addr, old, new_val); \
|
|
} while (0)
|
|
|
|
#elif defined(__GNUC__) && defined(__x86_64__)
|
|
|
|
#define MR_COMPARE_AND_SWAP_WORD_BODY \
|
|
do { \
|
|
char result; \
|
|
\
|
|
__asm__ __volatile__( \
|
|
"lock; cmpxchgq %4, %0; setz %1" \
|
|
: "=m"(*addr), "=q"(result), "=a"(old) \
|
|
: "m"(*addr), "r" (new_val), "a"(old) \
|
|
); \
|
|
return (int) result; \
|
|
} while (0)
|
|
|
|
#elif defined(__GNUC__) && defined(__i386__)
|
|
|
|
/* Really 486 or better. */
|
|
#define MR_COMPARE_AND_SWAP_WORD_BODY \
|
|
do { \
|
|
char result; \
|
|
\
|
|
__asm__ __volatile__( \
|
|
"lock; cmpxchgl %4, %0; setz %1" \
|
|
: "=m"(*addr), "=q"(result), "=a"(old) \
|
|
: "m"(*addr), "r" (new_val), "a"(old) \
|
|
); \
|
|
return (int) result; \
|
|
} while (0)
|
|
|
|
#endif
|
|
|
|
#ifdef MR_COMPARE_AND_SWAP_WORD_BODY
|
|
MR_EXTERN_INLINE MR_bool
|
|
MR_compare_and_swap_word(volatile MR_Integer *addr, MR_Integer old,
|
|
MR_Integer new_val)
|
|
{
|
|
MR_COMPARE_AND_SWAP_WORD_BODY;
|
|
}
|
|
#endif
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && \
|
|
!defined(MR_AVOID_COMPILER_INTRINSICS)
|
|
|
|
#define MR_ATOMIC_ADD_AND_FETCH_INT_BODY \
|
|
do { \
|
|
return __sync_add_and_fetch(addr, addend); \
|
|
} while (0)
|
|
|
|
#elif defined(MR_COMPARE_AND_SWAP_WORD_BODY)
|
|
/*
|
|
** If there is no GCC builtin for this then it can be implemented in terms
|
|
** of compare and swap, assuming that that has been implemented in
|
|
** assembler for this architecture.
|
|
*/
|
|
#define MR_ATOMIC_ADD_AND_FETCH_INT_BODY \
|
|
do { \
|
|
MR_Integer temp; \
|
|
temp = *addr; \
|
|
while (!MR_compare_and_swap_word(addr, temp, temp+addend)) { \
|
|
MR_ATOMIC_PAUSE; \
|
|
temp = *addr; \
|
|
} \
|
|
return temp+addend; \
|
|
} while (0)
|
|
|
|
#endif
|
|
|
|
#ifdef MR_ATOMIC_ADD_AND_FETCH_INT_BODY
|
|
MR_EXTERN_INLINE MR_Integer
|
|
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
|
|
{
|
|
MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
|
|
}
|
|
#endif
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
#if defined(__GNUC__) && defined(__x86_64__) && \
|
|
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
|
|
|
|
#define MR_ATOMIC_ADD_INT_BODY \
|
|
do { \
|
|
__asm__ __volatile__( \
|
|
"lock; addq %2, %0" \
|
|
: "=m"(*addr) \
|
|
: "m"(*addr), "r"(addend) \
|
|
); \
|
|
} while (0)
|
|
|
|
#elif defined(__GNUC__) && defined(__i386__)
|
|
|
|
#define MR_ATOMIC_ADD_INT_BODY \
|
|
do { \
|
|
__asm__ __volatile__( \
|
|
"lock; addl %2, %0;" \
|
|
: "=m"(*addr) \
|
|
: "m"(*addr), "r"(addend) \
|
|
); \
|
|
} while (0)
|
|
|
|
#elif defined(MR_ATOMIC_ADD_AND_FETCH_INT_BODY)
|
|
|
|
#define MR_ATOMIC_ADD_INT_BODY \
|
|
do { \
|
|
MR_atomic_add_and_fetch_int(addr, addend); \
|
|
} while (0)
|
|
|
|
#endif
|
|
|
|
#ifdef MR_ATOMIC_ADD_INT_BODY
|
|
MR_EXTERN_INLINE void
|
|
MR_atomic_add_int(volatile MR_Integer *addr, MR_Integer addend)
|
|
{
|
|
MR_ATOMIC_ADD_INT_BODY;
|
|
}
|
|
#endif
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
#if defined(__GNUC__) && defined(__x86_64__) && \
|
|
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
|
|
|
|
#define MR_ATOMIC_SUB_INT_BODY \
|
|
do { \
|
|
__asm__ __volatile__( \
|
|
"lock; subq %2, %0;" \
|
|
: "=m"(*addr) \
|
|
: "m"(*addr), "r"(x) \
|
|
); \
|
|
} while (0)
|
|
|
|
#elif defined(__GNUC__) && defined(__i386__)
|
|
|
|
#define MR_ATOMIC_SUB_INT_BODY \
|
|
do { \
|
|
__asm__ __volatile__( \
|
|
"lock; subl %2, %0;" \
|
|
: "=m"(*addr) \
|
|
: "m"(*addr), "r"(x) \
|
|
); \
|
|
} while (0)
|
|
|
|
#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
|
|
|
|
#define MR_ATOMIC_SUB_INT_BODY \
|
|
do { \
|
|
__sync_sub_and_fetch(addr, x); \
|
|
} while (0)
|
|
|
|
#endif
|
|
|
|
#ifdef MR_ATOMIC_SUB_INT_BODY
|
|
MR_EXTERN_INLINE void
|
|
MR_atomic_sub_int(volatile MR_Integer *addr, MR_Integer x)
|
|
{
|
|
MR_ATOMIC_SUB_INT_BODY;
|
|
}
|
|
#endif
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
#if defined(__GNUC__) && defined(__x86_64__) && \
|
|
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
|
|
|
|
#define MR_ATOMIC_INC_INT_BODY \
|
|
do { \
|
|
__asm__ __volatile__( \
|
|
"lock; incq %0;" \
|
|
: "=m"(*addr) \
|
|
: "m"(*addr) \
|
|
); \
|
|
} while (0)
|
|
|
|
#elif defined(__GNUC__) && defined(__i386__) && \
|
|
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
|
|
|
|
/* Really 486 or better. */
|
|
#define MR_ATOMIC_INC_INT_BODY \
|
|
do { \
|
|
__asm__ __volatile__( \
|
|
"lock; incl %0;" \
|
|
: "=m"(*addr) \
|
|
: "m"(*addr) \
|
|
); \
|
|
} while (0)
|
|
|
|
#else
|
|
|
|
/*
|
|
** Fall back to an atomic add 1 operation.
|
|
**
|
|
** We could fall back to the built-in GCC instructions but they also fetch
|
|
** the value. I believe this is more efficient.
|
|
** - pbone
|
|
*/
|
|
#define MR_ATOMIC_INC_INT_BODY \
|
|
MR_atomic_add_int(addr, 1) \
|
|
|
|
#endif
|
|
|
|
#ifdef MR_ATOMIC_INC_INT_BODY
|
|
MR_EXTERN_INLINE void
|
|
MR_atomic_inc_int(volatile MR_Integer *addr)
|
|
{
|
|
MR_ATOMIC_INC_INT_BODY;
|
|
}
|
|
#endif
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
#if defined(__GNUC__) && defined(__x86_64__) && \
|
|
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
|
|
|
|
#define MR_ATOMIC_DEC_INT_BODY \
|
|
do { \
|
|
__asm__ __volatile__( \
|
|
"lock; decq %0;" \
|
|
: "=m"(*addr) \
|
|
: "m"(*addr) \
|
|
); \
|
|
} while (0)
|
|
|
|
#elif defined(__GNUC__) && defined(__i386__) && \
|
|
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
|
|
|
|
/* Really 486 or better. */
|
|
#define MR_ATOMIC_DEC_INT_BODY \
|
|
do { \
|
|
__asm__ __volatile__( \
|
|
"lock; decl %0;" \
|
|
: "=m"(*addr) \
|
|
: "m"(*addr) \
|
|
); \
|
|
} while (0)
|
|
#else
|
|
/*
|
|
** Fall back to an atomic subtract 1 operation.
|
|
*/
|
|
|
|
#define MR_ATOMIC_DEC_INT_BODY \
|
|
MR_atomic_sub_int(addr, 1)
|
|
|
|
#endif
|
|
|
|
#ifdef MR_ATOMIC_DEC_INT_BODY
|
|
MR_EXTERN_INLINE void
|
|
MR_atomic_dec_int(volatile MR_Integer *addr)
|
|
{
|
|
MR_ATOMIC_DEC_INT_BODY;
|
|
}
|
|
#endif
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
/*
|
|
** Note that on x86(_64) we have to use the sub instruction rather than the
|
|
** dec instruction because we need it to set the CPU flags.
|
|
*/
|
|
#if defined(__GNUC__) && defined(__x86_64__) && \
|
|
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
|
|
|
|
/*
|
|
** This could be trivially implemented using the __sync_sub_and_fetch compiler
|
|
** intrinsic. However on X86(_64) this will use a compare and exchange loop.
|
|
** We can avoid this because we don't need to retrieve the result of the
|
|
** subtraction.
|
|
*/
|
|
* While fetching the value is more powerful on
|
|
* x86(_64) it requires a compare and exchange loop.
|
|
#define MR_ATOMIC_DEC_INT_AND_IS_ZERO_BODY \
|
|
do { \
|
|
char is_zero; \
|
|
__asm__( \
|
|
"lock; subq $1, %0; setz %1" \
|
|
: "=m"(*addr), "=q"(is_zero) \
|
|
: "m"(*addr) \
|
|
); \
|
|
return (MR_bool)is_zero; \
|
|
} while (0)
|
|
|
|
#elif defined(__GNUC__) && defined(__i386__)
|
|
|
|
#define MR_ATOMIC_DEC_INT_AND_IS_ZERO_BODY \
|
|
do { \
|
|
char is_zero; \
|
|
__asm__( \
|
|
"lock: subl $1, %0; setz %1" \
|
|
: "=m"(*addr), "=q"(is_zero) \
|
|
: "m"(*addr) \
|
|
); \
|
|
return (MR_bool)is_zero; \
|
|
} while (0)
|
|
|
|
#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
|
|
|
|
#define MR_ATOMIC_DEC_INT_AND_IS_ZERO_BODY \
|
|
do { \
|
|
return (__sync_sub_and_fetch(addr, 1) == 0); \
|
|
} while (0)
|
|
|
|
#endif
|
|
|
|
#ifdef MR_ATOMIC_DEC_INT_AND_IS_ZERO_BODY
|
|
MR_EXTERN_INLINE MR_bool
|
|
MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr)
|
|
{
|
|
MR_ATOMIC_DEC_INT_AND_IS_ZERO_BODY;
|
|
}
|
|
#endif
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
/*
|
|
** Memory fence operations.
|
|
*/
|
|
#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
|
|
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
|
|
|
|
/*
|
|
** Guarantees that any stores executed before this fence are globally
|
|
** visible before those after this fence.
|
|
*/
|
|
#define MR_CPU_SFENCE \
|
|
do { \
|
|
__asm__ __volatile__("sfence"); \
|
|
} while(0)
|
|
|
|
/*
|
|
** Guarantees that any loads executed before this fence are complete before
|
|
** any loads after this fence.
|
|
*/
|
|
#define MR_CPU_LFENCE \
|
|
do { \
|
|
__asm__ __volatile__("lfence"); \
|
|
} while(0)
|
|
|
|
/*
|
|
** A combination of the above.
|
|
*/
|
|
#define MR_CPU_MFENCE \
|
|
do { \
|
|
__asm__ __volatile__("mfence"); \
|
|
} while(0)
|
|
|
|
#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
|
|
|
|
/*
|
|
** Our memory fences are better than GCC's. GCC only implements a full
|
|
** fence.
|
|
*/
|
|
#define MR_CPU_MFENCE \
|
|
do { \
|
|
__sync_synchronize(); \
|
|
} while(0)
|
|
#define MR_CPU_SFENCE MR_CPU_MFENCE
|
|
#define MR_CPU_LFENCE MR_CPU_MFENCE
|
|
|
|
#else
|
|
|
|
#pragma error "Please implement memory fence operations for this " \
|
|
"compiler/architecture"
|
|
|
|
#endif
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
/*
|
|
** Roll our own cheap user-space mutual exclusion locks. Blocking without
|
|
** spinning is not supported. Storage for these locks should be volatile.
|
|
**
|
|
** I expect these to be faster than pthread mutexes when threads are pinned and
|
|
** critical sections are short.
|
|
*/
|
|
typedef MR_Unsigned MR_Us_Lock;
|
|
|
|
#define MR_US_LOCK_INITIAL_VALUE (0)
|
|
|
|
#define MR_US_TRY_LOCK(x) \
|
|
MR_compare_and_swap_word(x, 0, 1)
|
|
|
|
#define MR_US_SPIN_LOCK(x) \
|
|
do { \
|
|
while (!MR_compare_and_swap_word(x, 0, 1)) { \
|
|
MR_ATOMIC_PAUSE; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define MR_US_UNLOCK(x) \
|
|
do { \
|
|
MR_CPU_MFENCE; \
|
|
*x = 0; \
|
|
} while (0)
|
|
|
|
/*
|
|
** Similar support for condition variables. Again, make sure that storage for
|
|
** these is declared as volatile.
|
|
**
|
|
** XXX: These are not atomic, A waiting thread will not see a change until
|
|
** sometime after the signaling thread has signaled the condition. The same
|
|
** race can occur when clearing a condition. Order of memory operations is not
|
|
** guaranteed either.
|
|
*/
|
|
typedef MR_Unsigned MR_Us_Cond;
|
|
|
|
#define MR_US_COND_CLEAR(x) \
|
|
do { \
|
|
MR_CPU_MFENCE; \
|
|
*x = 0; \
|
|
} while (0)
|
|
|
|
#define MR_US_COND_SET(x) \
|
|
do { \
|
|
MR_CPU_MFENCE; \
|
|
*x = 1; \
|
|
MR_CPU_MFENCE; \
|
|
} while (0)
|
|
|
|
#define MR_US_SPIN_COND(x) \
|
|
do { \
|
|
while (!(*x)) { \
|
|
MR_ATOMIC_PAUSE; \
|
|
} \
|
|
MR_CPU_MFENCE; \
|
|
} while (0)
|
|
|
|
#endif /* MR_LL_PARALLEL_CONJ */
|
|
|
|
/*
|
|
** If we don't have definitions available for this compiler or architecture
|
|
** then we will get a link error in low-level .par grades. No other grades
|
|
** currently require any atomic ops.
|
|
*/
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
#if defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
|
|
|
|
/*
|
|
** Declarations for profiling the parallel runtime.
|
|
*/
|
|
|
|
typedef struct {
|
|
MR_Unsigned MR_stat_count_recorded;
|
|
MR_Unsigned MR_stat_count_not_recorded;
|
|
/*
|
|
** The total number of times this event occurred is implicitly the
|
|
** sum of the recorded and not_recorded counts.
|
|
*/
|
|
MR_int_least64_t MR_stat_sum;
|
|
MR_uint_least64_t MR_stat_sum_squares;
|
|
/*
|
|
** The sum of squares is used to calculate variance and standard
|
|
** deviation.
|
|
*/
|
|
} MR_Stats;
|
|
|
|
typedef struct {
|
|
MR_uint_least64_t MR_timer_time;
|
|
MR_Unsigned MR_timer_processor_id;
|
|
} MR_Timer;
|
|
|
|
/*
|
|
** The number of CPU clock cycles per second, ie a 1GHz CPU will have a value
|
|
** of 10^9, zero if unknown.
|
|
** This value is only available after MR_do_cpu_feature_detection() has been
|
|
** called.
|
|
*/
|
|
extern MR_uint_least64_t MR_cpu_cycles_per_sec;
|
|
|
|
/*
|
|
** Do CPU feature detection, this is necessary for profiling parallel code
|
|
** execution and the threadscope code.
|
|
** On i386 and x86_64 machines this uses CPUID to determine if the RDTSCP
|
|
** instruction is available and not prohibited by the OS.
|
|
** This function is idempotent.
|
|
*/
|
|
extern void
|
|
MR_do_cpu_feature_detection(void);
|
|
|
|
/*
|
|
** Start and initialize a timer structure.
|
|
*/
|
|
extern void
|
|
MR_profiling_start_timer(MR_Timer *timer);
|
|
|
|
/*
|
|
** Stop the timer and update stats with the results.
|
|
*/
|
|
extern void
|
|
MR_profiling_stop_timer(MR_Timer *timer, MR_Stats *stats);
|
|
|
|
/*
|
|
** Read the CPU's TSC. This is currently only implemented for i386 and x86-64
|
|
** systems. It returns 0 when support is not available.
|
|
*/
|
|
extern MR_uint_least64_t
|
|
MR_read_cpu_tsc(void);
|
|
|
|
#endif /* MR_PROFILE_PARALLEL_EXECUTION_SUPPORT */
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
#endif /* not MERCURY_ATOMIC_OPS_H */
|