mirror of
https://github.com/Mercury-Language/mercury.git
synced 2025-12-19 07:45:09 +00:00
When an engine steals a spark and executes it using the context it is
currently holding it did not allocate a new context ID. A user looking at
this behaviour from threadscope would see thread 27 (for instance) finish, and
then immediately begin executing again. Therefore we now allocates a new
context ID when a context is reused making the context look different from
threadscope's point of view. New context IDs are already allocated to
contexts that are allocated from the free context lists.
runtime/mercury_context.c:
As above.
The next context id variable is now accessed atomically rather than being
protected by the free context list lock.
runtime/mercury_atomic_ops.h:
runtime/mercury_atomic_ops.c:
Implement a new atomic operation, MR_atomic_add_and_fetch_int, this is
used to allocate context ids.
Reimplement MR_atomic_add_int in terms of MR_atomic_add_and_fetch_int when
handwritten assembler support is not available.
runtime/mercury_atomic_ops.c:
Re-order atomic operations to match the order in the header file.
runtime/mercury_atomic_ops.h:
Place the definition of the MR_ATOMIC_PAUSE macro before the other atomic
operations since MR_atomic_add_and_fetch_int depends on it. This also
conforms with the coding standard.
runtime/mercury_threadscope.h:
Make the Context ID type a MR_Integer to match the argument size on the
available atomic operations.
This commit is contained in:
@@ -33,20 +33,11 @@ MR_OUTLINE_DEFN(
|
|||||||
)
|
)
|
||||||
|
|
||||||
MR_OUTLINE_DEFN(
|
MR_OUTLINE_DEFN(
|
||||||
void
|
MR_Integer
|
||||||
MR_atomic_inc_int(volatile MR_Integer *addr)
|
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
|
||||||
,
|
,
|
||||||
{
|
{
|
||||||
MR_ATOMIC_INC_INT_BODY;
|
MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
MR_OUTLINE_DEFN(
|
|
||||||
void
|
|
||||||
MR_atomic_dec_int(volatile MR_Integer *addr)
|
|
||||||
,
|
|
||||||
{
|
|
||||||
MR_ATOMIC_DEC_INT_BODY;
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -68,6 +59,24 @@ MR_OUTLINE_DEFN(
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
MR_OUTLINE_DEFN(
|
||||||
|
void
|
||||||
|
MR_atomic_inc_int(volatile MR_Integer *addr)
|
||||||
|
,
|
||||||
|
{
|
||||||
|
MR_ATOMIC_INC_INT_BODY;
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
MR_OUTLINE_DEFN(
|
||||||
|
void
|
||||||
|
MR_atomic_dec_int(volatile MR_Integer *addr)
|
||||||
|
,
|
||||||
|
{
|
||||||
|
MR_ATOMIC_DEC_INT_BODY;
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
MR_OUTLINE_DEFN(
|
MR_OUTLINE_DEFN(
|
||||||
MR_bool
|
MR_bool
|
||||||
MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr)
|
MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr)
|
||||||
|
|||||||
@@ -21,6 +21,48 @@
|
|||||||
|
|
||||||
#if defined(MR_LL_PARALLEL_CONJ)
|
#if defined(MR_LL_PARALLEL_CONJ)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Intel and AMD support a pause instruction that is roughly equivalent
|
||||||
|
* to a no-op. Intel recommend that it is used in spin-loops to improve
|
||||||
|
* performance. Without a pause instruction multiple simultaneous
|
||||||
|
* read-requests will be in-flight for the synchronization variable from a
|
||||||
|
* single thread. Giving the pause instruction causes these to be executed
|
||||||
|
* in sequence allowing the processor to handle the change in the
|
||||||
|
* synchronization variable more easily.
|
||||||
|
*
|
||||||
|
* On some chips it may cause the spin-loop to use less power.
|
||||||
|
*
|
||||||
|
* This instruction was introduced with the Pentium 4 but is backwards
|
||||||
|
* compatible, This works because the two byte instruction for PAUSE is
|
||||||
|
* equivalent to the NOP instruction prefixed by REPE. Therefore older
|
||||||
|
* processors perform a no-op.
|
||||||
|
*
|
||||||
|
* This is not really an atomic instruction but we name it
|
||||||
|
* MR_ATOMIC_PAUSE for consistency.
|
||||||
|
*
|
||||||
|
* References: Intel and AMD documentation for PAUSE, Intel optimisation
|
||||||
|
* guide.
|
||||||
|
*/
|
||||||
|
#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
|
||||||
|
!defined(MR_DO_NOT_USE_CPU_RELAX)
|
||||||
|
|
||||||
|
#define MR_ATOMIC_PAUSE \
|
||||||
|
do { \
|
||||||
|
__asm__ __volatile__("pause"); \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
/* Fall back to a no-op */
|
||||||
|
#define MR_ATOMIC_PAUSE \
|
||||||
|
do { \
|
||||||
|
; \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Declarations for inline atomic operations.
|
** Declarations for inline atomic operations.
|
||||||
*/
|
*/
|
||||||
@@ -33,6 +75,13 @@ MR_EXTERN_INLINE MR_bool
|
|||||||
MR_compare_and_swap_word(volatile MR_Integer *addr, MR_Integer old,
|
MR_compare_and_swap_word(volatile MR_Integer *addr, MR_Integer old,
|
||||||
MR_Integer new_val);
|
MR_Integer new_val);
|
||||||
|
|
||||||
|
/*
|
||||||
|
** Atomically add to an integer in memory and retrieve the result. In other
|
||||||
|
** words an atomic pre-increment operation.
|
||||||
|
*/
|
||||||
|
MR_EXTERN_INLINE MR_Integer
|
||||||
|
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Atomically add the second argument to the memory pointed to by the first
|
** Atomically add the second argument to the memory pointed to by the first
|
||||||
** argument.
|
** argument.
|
||||||
@@ -66,6 +115,11 @@ MR_atomic_dec_int(volatile MR_Integer *addr);
|
|||||||
MR_EXTERN_INLINE MR_bool
|
MR_EXTERN_INLINE MR_bool
|
||||||
MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
|
MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
|
||||||
|
|
||||||
|
/*
|
||||||
|
** For information about GCC's builtins for atomic operations see:
|
||||||
|
** http://gcc.gnu.org/onlinedocs/gcc-4.2.4/gcc/Atomic-Builtins.html
|
||||||
|
*/
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
@@ -122,6 +176,43 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
|
|||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
|
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && \
|
||||||
|
!defined(MR_AVOID_COMPILER_INTRINSICS)
|
||||||
|
|
||||||
|
#define MR_ATOMIC_ADD_AND_FETCH_INT_BODY \
|
||||||
|
do { \
|
||||||
|
return __sync_add_and_fetch(addr, addend); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#elif defined(MR_COMPARE_AND_SWAP_WORD_BODY)
|
||||||
|
/*
|
||||||
|
** If there is no GCC builtin for this then it can be implemented in terms
|
||||||
|
** of compare and swap, assuming that that has been implemented in
|
||||||
|
** assembler for this architecture.
|
||||||
|
*/
|
||||||
|
#define MR_ATOMIC_ADD_AND_FETCH_INT_BODY \
|
||||||
|
do { \
|
||||||
|
MR_Integer temp; \
|
||||||
|
temp = *addr; \
|
||||||
|
while (!MR_compare_and_swap_word(addr, temp, temp+addend)) { \
|
||||||
|
MR_ATOMIC_PAUSE; \
|
||||||
|
temp = *addr; \
|
||||||
|
} \
|
||||||
|
return temp+addend; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef MR_ATOMIC_ADD_AND_FETCH_INT_BODY
|
||||||
|
MR_EXTERN_INLINE MR_Integer
|
||||||
|
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
|
||||||
|
{
|
||||||
|
MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
#if defined(__GNUC__) && defined(__x86_64__) && \
|
#if defined(__GNUC__) && defined(__x86_64__) && \
|
||||||
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
|
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
|
||||||
|
|
||||||
@@ -145,11 +236,11 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
|
|||||||
); \
|
); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
|
#elif defined(MR_ATOMIC_ADD_AND_FETCH_INT_BODY)
|
||||||
|
|
||||||
#define MR_ATOMIC_ADD_INT_BODY \
|
#define MR_ATOMIC_ADD_INT_BODY \
|
||||||
do { \
|
do { \
|
||||||
__sync_add_and_fetch(addr, addend); \
|
MR_atomic_add_and_fetch_int(addr, addend); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -358,48 +449,6 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
|
|||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
/*
|
|
||||||
* Intel and AMD support a pause instruction that is roughly equivalent
|
|
||||||
* to a no-op. Intel recommend that it is used in spin-loops to improve
|
|
||||||
* performance. Without a pause instruction multiple simultaneous
|
|
||||||
* read-requests will be in-flight for the synchronization variable from a
|
|
||||||
* single thread. Giving the pause instruction causes these to be executed
|
|
||||||
* in sequence allowing the processor to handle the change in the
|
|
||||||
* synchronization variable more easily.
|
|
||||||
*
|
|
||||||
* On some chips it may cause the spin-loop to use less power.
|
|
||||||
*
|
|
||||||
* This instruction was introduced with the Pentium 4 but is backwards
|
|
||||||
* compatible, This works because the two byte instruction for PAUSE is
|
|
||||||
* equivalent to the NOP instruction prefixed by REPE. Therefore older
|
|
||||||
* processors perform a no-op.
|
|
||||||
*
|
|
||||||
* This is not really an atomic instruction but we name it
|
|
||||||
* MR_ATOMIC_PAUSE for consistency.
|
|
||||||
*
|
|
||||||
* References: Intel and AMD documentation for PAUSE, Intel optimisation
|
|
||||||
* guide.
|
|
||||||
*/
|
|
||||||
#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
|
|
||||||
!defined(MR_DO_NOT_USE_CPU_RELAX)
|
|
||||||
|
|
||||||
#define MR_ATOMIC_PAUSE \
|
|
||||||
do { \
|
|
||||||
__asm__ __volatile__("pause"); \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
/* Fall back to a no-op */
|
|
||||||
#define MR_ATOMIC_PAUSE \
|
|
||||||
do { \
|
|
||||||
; \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Memory fence operations.
|
** Memory fence operations.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -120,10 +120,16 @@ static MR_Integer MR_primordial_thread_cpu = -1;
|
|||||||
|
|
||||||
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
|
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
|
||||||
/*
|
/*
|
||||||
** This is used to give each context its own unique ID. It is protected by the
|
** This is used to give each context its own unique ID. It is accessed with
|
||||||
** free_context_list_lock.
|
** atomic operations.
|
||||||
*/
|
*/
|
||||||
static MR_ContextId MR_next_context_id = 0;
|
static MR_ContextId MR_next_context_id = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
** Allocate a context ID.
|
||||||
|
*/
|
||||||
|
static MR_ContextId
|
||||||
|
allocate_context_id(void);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -646,9 +652,6 @@ MR_Context *
|
|||||||
MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
|
MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
|
||||||
{
|
{
|
||||||
MR_Context *c;
|
MR_Context *c;
|
||||||
#if MR_THREADSCOPE
|
|
||||||
MR_Unsigned context_id;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
MR_LOCK(&free_context_list_lock, "create_context");
|
MR_LOCK(&free_context_list_lock, "create_context");
|
||||||
|
|
||||||
@@ -681,9 +684,6 @@ MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
|
|||||||
} else {
|
} else {
|
||||||
c = NULL;
|
c = NULL;
|
||||||
}
|
}
|
||||||
#if MR_THREADSCOPE
|
|
||||||
context_id = MR_next_context_id++;
|
|
||||||
#endif
|
|
||||||
MR_UNLOCK(&free_context_list_lock, "create_context i");
|
MR_UNLOCK(&free_context_list_lock, "create_context i");
|
||||||
|
|
||||||
if (c == NULL) {
|
if (c == NULL) {
|
||||||
@@ -701,7 +701,7 @@ MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#ifdef MR_THREADSCOPE
|
#ifdef MR_THREADSCOPE
|
||||||
c->MR_ctxt_num_id = context_id;
|
c->MR_ctxt_num_id = allocate_context_id();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
MR_init_context_maybe_generator(c, id, gen);
|
MR_init_context_maybe_generator(c, id, gen);
|
||||||
@@ -756,6 +756,13 @@ MR_destroy_context(MR_Context *c)
|
|||||||
MR_UNLOCK(&free_context_list_lock, "destroy_context");
|
MR_UNLOCK(&free_context_list_lock, "destroy_context");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
|
||||||
|
static MR_ContextId
|
||||||
|
allocate_context_id(void) {
|
||||||
|
return MR_atomic_add_and_fetch_int(&MR_next_context_id, 1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef MR_LL_PARALLEL_CONJ
|
#ifdef MR_LL_PARALLEL_CONJ
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -1238,6 +1245,11 @@ MR_define_entry(MR_do_runnext);
|
|||||||
MR_load_context(MR_ENGINE(MR_eng_this_context));
|
MR_load_context(MR_ENGINE(MR_eng_this_context));
|
||||||
} else {
|
} else {
|
||||||
#ifdef MR_THREADSCOPE
|
#ifdef MR_THREADSCOPE
|
||||||
|
/*
|
||||||
|
** Allocate a new context Id so that someone looking at the threadscope
|
||||||
|
** profile sees this as new work.
|
||||||
|
*/
|
||||||
|
MR_ENGINE(MR_eng_this_context)->MR_ctxt_num_id = allocate_context_id();
|
||||||
MR_threadscope_post_run_context();
|
MR_threadscope_post_run_context();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ typedef struct MR_threadscope_event_buffer MR_threadscope_event_buffer_t;
|
|||||||
|
|
||||||
typedef MR_uint_least16_t MR_EngineId;
|
typedef MR_uint_least16_t MR_EngineId;
|
||||||
typedef MR_uint_least16_t MR_ContextStopReason;
|
typedef MR_uint_least16_t MR_ContextStopReason;
|
||||||
typedef MR_uint_least32_t MR_ContextId;
|
typedef MR_Integer MR_ContextId;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** This must be called by the primordial thread before starting any other
|
** This must be called by the primordial thread before starting any other
|
||||||
|
|||||||
Reference in New Issue
Block a user