mirror of
https://github.com/Mercury-Language/mercury.git
synced 2025-12-14 13:23:53 +00:00
When an engine steals a spark and executes it using the context it is
currently holding it did not allocate a new context ID. A user looking at
this behaviour from threadscope would see thread 27 (for instance) finish, and
then immediately begin executing again. Therefore we now allocates a new
context ID when a context is reused making the context look different from
threadscope's point of view. New context IDs are already allocated to
contexts that are allocated from the free context lists.
runtime/mercury_context.c:
As above.
The next context id variable is now accessed atomically rather than being
protected by the free context list lock.
runtime/mercury_atomic_ops.h:
runtime/mercury_atomic_ops.c:
Implement a new atomic operation, MR_atomic_add_and_fetch_int, this is
used to allocate context ids.
Reimplement MR_atomic_add_int in terms of MR_atomic_add_and_fetch_int when
handwritten assembler support is not available.
runtime/mercury_atomic_ops.c:
Re-order atomic operations to match the order in the header file.
runtime/mercury_atomic_ops.h:
Place the definition of the MR_ATOMIC_PAUSE macro before the other atomic
operations since MR_atomic_add_and_fetch_int depends on it. This also
conforms with the coding standard.
runtime/mercury_threadscope.h:
Make the Context ID type a MR_Integer to match the argument size on the
available atomic operations.
This commit is contained in:
@@ -33,20 +33,11 @@ MR_OUTLINE_DEFN(
|
||||
)
|
||||
|
||||
MR_OUTLINE_DEFN(
|
||||
void
|
||||
MR_atomic_inc_int(volatile MR_Integer *addr)
|
||||
MR_Integer
|
||||
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
|
||||
,
|
||||
{
|
||||
MR_ATOMIC_INC_INT_BODY;
|
||||
}
|
||||
)
|
||||
|
||||
MR_OUTLINE_DEFN(
|
||||
void
|
||||
MR_atomic_dec_int(volatile MR_Integer *addr)
|
||||
,
|
||||
{
|
||||
MR_ATOMIC_DEC_INT_BODY;
|
||||
MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
|
||||
}
|
||||
)
|
||||
|
||||
@@ -68,6 +59,24 @@ MR_OUTLINE_DEFN(
|
||||
}
|
||||
)
|
||||
|
||||
MR_OUTLINE_DEFN(
|
||||
void
|
||||
MR_atomic_inc_int(volatile MR_Integer *addr)
|
||||
,
|
||||
{
|
||||
MR_ATOMIC_INC_INT_BODY;
|
||||
}
|
||||
)
|
||||
|
||||
MR_OUTLINE_DEFN(
|
||||
void
|
||||
MR_atomic_dec_int(volatile MR_Integer *addr)
|
||||
,
|
||||
{
|
||||
MR_ATOMIC_DEC_INT_BODY;
|
||||
}
|
||||
)
|
||||
|
||||
MR_OUTLINE_DEFN(
|
||||
MR_bool
|
||||
MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr)
|
||||
|
||||
@@ -21,6 +21,48 @@
|
||||
|
||||
#if defined(MR_LL_PARALLEL_CONJ)
|
||||
|
||||
/*
|
||||
* Intel and AMD support a pause instruction that is roughly equivalent
|
||||
* to a no-op. Intel recommend that it is used in spin-loops to improve
|
||||
* performance. Without a pause instruction multiple simultaneous
|
||||
* read-requests will be in-flight for the synchronization variable from a
|
||||
* single thread. Giving the pause instruction causes these to be executed
|
||||
* in sequence allowing the processor to handle the change in the
|
||||
* synchronization variable more easily.
|
||||
*
|
||||
* On some chips it may cause the spin-loop to use less power.
|
||||
*
|
||||
* This instruction was introduced with the Pentium 4 but is backwards
|
||||
* compatible, This works because the two byte instruction for PAUSE is
|
||||
* equivalent to the NOP instruction prefixed by REPE. Therefore older
|
||||
* processors perform a no-op.
|
||||
*
|
||||
* This is not really an atomic instruction but we name it
|
||||
* MR_ATOMIC_PAUSE for consistency.
|
||||
*
|
||||
* References: Intel and AMD documentation for PAUSE, Intel optimisation
|
||||
* guide.
|
||||
*/
|
||||
#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
|
||||
!defined(MR_DO_NOT_USE_CPU_RELAX)
|
||||
|
||||
#define MR_ATOMIC_PAUSE \
|
||||
do { \
|
||||
__asm__ __volatile__("pause"); \
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
|
||||
/* Fall back to a no-op */
|
||||
#define MR_ATOMIC_PAUSE \
|
||||
do { \
|
||||
; \
|
||||
} while(0)
|
||||
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
** Declarations for inline atomic operations.
|
||||
*/
|
||||
@@ -33,6 +75,13 @@ MR_EXTERN_INLINE MR_bool
|
||||
MR_compare_and_swap_word(volatile MR_Integer *addr, MR_Integer old,
|
||||
MR_Integer new_val);
|
||||
|
||||
/*
|
||||
** Atomically add to an integer in memory and retrieve the result. In other
|
||||
** words an atomic pre-increment operation.
|
||||
*/
|
||||
MR_EXTERN_INLINE MR_Integer
|
||||
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend);
|
||||
|
||||
/*
|
||||
** Atomically add the second argument to the memory pointed to by the first
|
||||
** argument.
|
||||
@@ -66,6 +115,11 @@ MR_atomic_dec_int(volatile MR_Integer *addr);
|
||||
MR_EXTERN_INLINE MR_bool
|
||||
MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
|
||||
|
||||
/*
|
||||
** For information about GCC's builtins for atomic operations see:
|
||||
** http://gcc.gnu.org/onlinedocs/gcc-4.2.4/gcc/Atomic-Builtins.html
|
||||
*/
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
@@ -122,6 +176,43 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && \
|
||||
!defined(MR_AVOID_COMPILER_INTRINSICS)
|
||||
|
||||
#define MR_ATOMIC_ADD_AND_FETCH_INT_BODY \
|
||||
do { \
|
||||
return __sync_add_and_fetch(addr, addend); \
|
||||
} while (0)
|
||||
|
||||
#elif defined(MR_COMPARE_AND_SWAP_WORD_BODY)
|
||||
/*
|
||||
** If there is no GCC builtin for this then it can be implemented in terms
|
||||
** of compare and swap, assuming that that has been implemented in
|
||||
** assembler for this architecture.
|
||||
*/
|
||||
#define MR_ATOMIC_ADD_AND_FETCH_INT_BODY \
|
||||
do { \
|
||||
MR_Integer temp; \
|
||||
temp = *addr; \
|
||||
while (!MR_compare_and_swap_word(addr, temp, temp+addend)) { \
|
||||
MR_ATOMIC_PAUSE; \
|
||||
temp = *addr; \
|
||||
} \
|
||||
return temp+addend; \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef MR_ATOMIC_ADD_AND_FETCH_INT_BODY
|
||||
MR_EXTERN_INLINE MR_Integer
|
||||
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
|
||||
{
|
||||
MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
#if defined(__GNUC__) && defined(__x86_64__) && \
|
||||
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
|
||||
|
||||
@@ -145,11 +236,11 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
|
||||
); \
|
||||
} while (0)
|
||||
|
||||
#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
|
||||
#elif defined(MR_ATOMIC_ADD_AND_FETCH_INT_BODY)
|
||||
|
||||
#define MR_ATOMIC_ADD_INT_BODY \
|
||||
do { \
|
||||
__sync_add_and_fetch(addr, addend); \
|
||||
MR_atomic_add_and_fetch_int(addr, addend); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
@@ -358,48 +449,6 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
* Intel and AMD support a pause instruction that is roughly equivalent
|
||||
* to a no-op. Intel recommend that it is used in spin-loops to improve
|
||||
* performance. Without a pause instruction multiple simultaneous
|
||||
* read-requests will be in-flight for the synchronization variable from a
|
||||
* single thread. Giving the pause instruction causes these to be executed
|
||||
* in sequence allowing the processor to handle the change in the
|
||||
* synchronization variable more easily.
|
||||
*
|
||||
* On some chips it may cause the spin-loop to use less power.
|
||||
*
|
||||
* This instruction was introduced with the Pentium 4 but is backwards
|
||||
* compatible, This works because the two byte instruction for PAUSE is
|
||||
* equivalent to the NOP instruction prefixed by REPE. Therefore older
|
||||
* processors perform a no-op.
|
||||
*
|
||||
* This is not really an atomic instruction but we name it
|
||||
* MR_ATOMIC_PAUSE for consistency.
|
||||
*
|
||||
* References: Intel and AMD documentation for PAUSE, Intel optimisation
|
||||
* guide.
|
||||
*/
|
||||
#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
|
||||
!defined(MR_DO_NOT_USE_CPU_RELAX)
|
||||
|
||||
#define MR_ATOMIC_PAUSE \
|
||||
do { \
|
||||
__asm__ __volatile__("pause"); \
|
||||
} while(0)
|
||||
|
||||
#else
|
||||
|
||||
/* Fall back to a no-op */
|
||||
#define MR_ATOMIC_PAUSE \
|
||||
do { \
|
||||
; \
|
||||
} while(0)
|
||||
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
** Memory fence operations.
|
||||
*/
|
||||
|
||||
@@ -120,10 +120,16 @@ static MR_Integer MR_primordial_thread_cpu = -1;
|
||||
|
||||
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
|
||||
/*
|
||||
** This is used to give each context its own unique ID. It is protected by the
|
||||
** free_context_list_lock.
|
||||
** This is used to give each context its own unique ID. It is accessed with
|
||||
** atomic operations.
|
||||
*/
|
||||
static MR_ContextId MR_next_context_id = 0;
|
||||
|
||||
/*
|
||||
** Allocate a context ID.
|
||||
*/
|
||||
static MR_ContextId
|
||||
allocate_context_id(void);
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -646,9 +652,6 @@ MR_Context *
|
||||
MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
|
||||
{
|
||||
MR_Context *c;
|
||||
#if MR_THREADSCOPE
|
||||
MR_Unsigned context_id;
|
||||
#endif
|
||||
|
||||
MR_LOCK(&free_context_list_lock, "create_context");
|
||||
|
||||
@@ -681,9 +684,6 @@ MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
|
||||
} else {
|
||||
c = NULL;
|
||||
}
|
||||
#if MR_THREADSCOPE
|
||||
context_id = MR_next_context_id++;
|
||||
#endif
|
||||
MR_UNLOCK(&free_context_list_lock, "create_context i");
|
||||
|
||||
if (c == NULL) {
|
||||
@@ -701,7 +701,7 @@ MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
|
||||
#endif
|
||||
}
|
||||
#ifdef MR_THREADSCOPE
|
||||
c->MR_ctxt_num_id = context_id;
|
||||
c->MR_ctxt_num_id = allocate_context_id();
|
||||
#endif
|
||||
|
||||
MR_init_context_maybe_generator(c, id, gen);
|
||||
@@ -756,6 +756,13 @@ MR_destroy_context(MR_Context *c)
|
||||
MR_UNLOCK(&free_context_list_lock, "destroy_context");
|
||||
}
|
||||
|
||||
#ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
|
||||
static MR_ContextId
|
||||
allocate_context_id(void) {
|
||||
return MR_atomic_add_and_fetch_int(&MR_next_context_id, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef MR_LL_PARALLEL_CONJ
|
||||
|
||||
static void
|
||||
@@ -1238,6 +1245,11 @@ MR_define_entry(MR_do_runnext);
|
||||
MR_load_context(MR_ENGINE(MR_eng_this_context));
|
||||
} else {
|
||||
#ifdef MR_THREADSCOPE
|
||||
/*
|
||||
** Allocate a new context Id so that someone looking at the threadscope
|
||||
** profile sees this as new work.
|
||||
*/
|
||||
MR_ENGINE(MR_eng_this_context)->MR_ctxt_num_id = allocate_context_id();
|
||||
MR_threadscope_post_run_context();
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ typedef struct MR_threadscope_event_buffer MR_threadscope_event_buffer_t;
|
||||
|
||||
typedef MR_uint_least16_t MR_EngineId;
|
||||
typedef MR_uint_least16_t MR_ContextStopReason;
|
||||
typedef MR_uint_least32_t MR_ContextId;
|
||||
typedef MR_Integer MR_ContextId;
|
||||
|
||||
/*
|
||||
** This must be called by the primordial thread before starting any other
|
||||
|
||||
Reference in New Issue
Block a user