When an engine steals a spark and executes it using the context it is

currently holding it did not allocate a new context ID.  A user looking at
this behaviour from threadscope would see thread 27 (for instance) finish, and
then immediately begin executing again.  Therefore we now allocates a new
context ID when a context is reused making the context look different from
threadscope's point of view.  New context IDs are already allocated to
contexts that are allocated from the free context lists.

runtime/mercury_context.c:
    As above.

    The next context id variable is now accessed atomically rather than being
    protected by the free context list lock.

runtime/mercury_atomic_ops.h:
runtime/mercury_atomic_ops.c:
    Implement a new atomic operation, MR_atomic_add_and_fetch_int, this is
    used to allocate context ids.

    Reimplement MR_atomic_add_int in terms of MR_atomic_add_and_fetch_int when
    handwritten assembler support is not available.

runtime/mercury_atomic_ops.c:
    Re-order atomic operations to match the order in the header file.

runtime/mercury_atomic_ops.h:
    Place the definition of the MR_ATOMIC_PAUSE macro before the other atomic
    operations since MR_atomic_add_and_fetch_int depends on it.  This also
    conforms with the coding standard.

runtime/mercury_threadscope.h:
    Make the Context ID type a MR_Integer to match the argument size on the
    available atomic operations.
This commit is contained in:
Paul Bone
2010-02-17 02:37:45 +00:00
parent 8db94039a6
commit 6b2bc6a66a
4 changed files with 136 additions and 66 deletions

View File

@@ -33,20 +33,11 @@ MR_OUTLINE_DEFN(
) )
MR_OUTLINE_DEFN( MR_OUTLINE_DEFN(
void MR_Integer
MR_atomic_inc_int(volatile MR_Integer *addr) MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
, ,
{ {
MR_ATOMIC_INC_INT_BODY; MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
}
)
MR_OUTLINE_DEFN(
void
MR_atomic_dec_int(volatile MR_Integer *addr)
,
{
MR_ATOMIC_DEC_INT_BODY;
} }
) )
@@ -68,6 +59,24 @@ MR_OUTLINE_DEFN(
} }
) )
MR_OUTLINE_DEFN(
void
MR_atomic_inc_int(volatile MR_Integer *addr)
,
{
MR_ATOMIC_INC_INT_BODY;
}
)
MR_OUTLINE_DEFN(
void
MR_atomic_dec_int(volatile MR_Integer *addr)
,
{
MR_ATOMIC_DEC_INT_BODY;
}
)
MR_OUTLINE_DEFN( MR_OUTLINE_DEFN(
MR_bool MR_bool
MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr) MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr)

View File

@@ -21,6 +21,48 @@
#if defined(MR_LL_PARALLEL_CONJ) #if defined(MR_LL_PARALLEL_CONJ)
/*
* Intel and AMD support a pause instruction that is roughly equivalent
* to a no-op. Intel recommend that it is used in spin-loops to improve
* performance. Without a pause instruction multiple simultaneous
* read-requests will be in-flight for the synchronization variable from a
* single thread. Giving the pause instruction causes these to be executed
* in sequence allowing the processor to handle the change in the
* synchronization variable more easily.
*
* On some chips it may cause the spin-loop to use less power.
*
* This instruction was introduced with the Pentium 4 but is backwards
* compatible, This works because the two byte instruction for PAUSE is
* equivalent to the NOP instruction prefixed by REPE. Therefore older
* processors perform a no-op.
*
* This is not really an atomic instruction but we name it
* MR_ATOMIC_PAUSE for consistency.
*
* References: Intel and AMD documentation for PAUSE, Intel optimisation
* guide.
*/
#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
!defined(MR_DO_NOT_USE_CPU_RELAX)
#define MR_ATOMIC_PAUSE \
do { \
__asm__ __volatile__("pause"); \
} while(0)
#else
/* Fall back to a no-op */
#define MR_ATOMIC_PAUSE \
do { \
; \
} while(0)
#endif
/*---------------------------------------------------------------------------*/
/* /*
** Declarations for inline atomic operations. ** Declarations for inline atomic operations.
*/ */
@@ -33,6 +75,13 @@ MR_EXTERN_INLINE MR_bool
MR_compare_and_swap_word(volatile MR_Integer *addr, MR_Integer old, MR_compare_and_swap_word(volatile MR_Integer *addr, MR_Integer old,
MR_Integer new_val); MR_Integer new_val);
/*
** Atomically add to an integer in memory and retrieve the result. In other
** words an atomic pre-increment operation.
*/
MR_EXTERN_INLINE MR_Integer
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend);
/* /*
** Atomically add the second argument to the memory pointed to by the first ** Atomically add the second argument to the memory pointed to by the first
** argument. ** argument.
@@ -66,6 +115,11 @@ MR_atomic_dec_int(volatile MR_Integer *addr);
MR_EXTERN_INLINE MR_bool MR_EXTERN_INLINE MR_bool
MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr); MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
/*
** For information about GCC's builtins for atomic operations see:
** http://gcc.gnu.org/onlinedocs/gcc-4.2.4/gcc/Atomic-Builtins.html
*/
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
@@ -122,6 +176,43 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && \
!defined(MR_AVOID_COMPILER_INTRINSICS)
#define MR_ATOMIC_ADD_AND_FETCH_INT_BODY \
do { \
return __sync_add_and_fetch(addr, addend); \
} while (0)
#elif defined(MR_COMPARE_AND_SWAP_WORD_BODY)
/*
** If there is no GCC builtin for this then it can be implemented in terms
** of compare and swap, assuming that that has been implemented in
** assembler for this architecture.
*/
#define MR_ATOMIC_ADD_AND_FETCH_INT_BODY \
do { \
MR_Integer temp; \
temp = *addr; \
while (!MR_compare_and_swap_word(addr, temp, temp+addend)) { \
MR_ATOMIC_PAUSE; \
temp = *addr; \
} \
return temp+addend; \
} while (0)
#endif
#ifdef MR_ATOMIC_ADD_AND_FETCH_INT_BODY
MR_EXTERN_INLINE MR_Integer
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
{
MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
}
#endif
/*---------------------------------------------------------------------------*/
#if defined(__GNUC__) && defined(__x86_64__) && \ #if defined(__GNUC__) && defined(__x86_64__) && \
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER) !defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
@@ -145,11 +236,11 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
); \ ); \
} while (0) } while (0)
#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) #elif defined(MR_ATOMIC_ADD_AND_FETCH_INT_BODY)
#define MR_ATOMIC_ADD_INT_BODY \ #define MR_ATOMIC_ADD_INT_BODY \
do { \ do { \
__sync_add_and_fetch(addr, addend); \ MR_atomic_add_and_fetch_int(addr, addend); \
} while (0) } while (0)
#endif #endif
@@ -358,48 +449,6 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
/*
* Intel and AMD support a pause instruction that is roughly equivalent
* to a no-op. Intel recommend that it is used in spin-loops to improve
* performance. Without a pause instruction multiple simultaneous
* read-requests will be in-flight for the synchronization variable from a
* single thread. Giving the pause instruction causes these to be executed
* in sequence allowing the processor to handle the change in the
* synchronization variable more easily.
*
* On some chips it may cause the spin-loop to use less power.
*
* This instruction was introduced with the Pentium 4 but is backwards
* compatible, This works because the two byte instruction for PAUSE is
* equivalent to the NOP instruction prefixed by REPE. Therefore older
* processors perform a no-op.
*
* This is not really an atomic instruction but we name it
* MR_ATOMIC_PAUSE for consistency.
*
* References: Intel and AMD documentation for PAUSE, Intel optimisation
* guide.
*/
#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
!defined(MR_DO_NOT_USE_CPU_RELAX)
#define MR_ATOMIC_PAUSE \
do { \
__asm__ __volatile__("pause"); \
} while(0)
#else
/* Fall back to a no-op */
#define MR_ATOMIC_PAUSE \
do { \
; \
} while(0)
#endif
/*---------------------------------------------------------------------------*/
/* /*
** Memory fence operations. ** Memory fence operations.
*/ */

View File

@@ -120,10 +120,16 @@ static MR_Integer MR_primordial_thread_cpu = -1;
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT) #if defined(MR_LL_PARALLEL_CONJ) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
/* /*
** This is used to give each context its own unique ID. It is protected by the ** This is used to give each context its own unique ID. It is accessed with
** free_context_list_lock. ** atomic operations.
*/ */
static MR_ContextId MR_next_context_id = 0; static MR_ContextId MR_next_context_id = 0;
/*
** Allocate a context ID.
*/
static MR_ContextId
allocate_context_id(void);
#endif #endif
/* /*
@@ -646,9 +652,6 @@ MR_Context *
MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen) MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
{ {
MR_Context *c; MR_Context *c;
#if MR_THREADSCOPE
MR_Unsigned context_id;
#endif
MR_LOCK(&free_context_list_lock, "create_context"); MR_LOCK(&free_context_list_lock, "create_context");
@@ -681,9 +684,6 @@ MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
} else { } else {
c = NULL; c = NULL;
} }
#if MR_THREADSCOPE
context_id = MR_next_context_id++;
#endif
MR_UNLOCK(&free_context_list_lock, "create_context i"); MR_UNLOCK(&free_context_list_lock, "create_context i");
if (c == NULL) { if (c == NULL) {
@@ -701,7 +701,7 @@ MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
#endif #endif
} }
#ifdef MR_THREADSCOPE #ifdef MR_THREADSCOPE
c->MR_ctxt_num_id = context_id; c->MR_ctxt_num_id = allocate_context_id();
#endif #endif
MR_init_context_maybe_generator(c, id, gen); MR_init_context_maybe_generator(c, id, gen);
@@ -756,6 +756,13 @@ MR_destroy_context(MR_Context *c)
MR_UNLOCK(&free_context_list_lock, "destroy_context"); MR_UNLOCK(&free_context_list_lock, "destroy_context");
} }
#ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
static MR_ContextId
allocate_context_id(void) {
return MR_atomic_add_and_fetch_int(&MR_next_context_id, 1);
}
#endif
#ifdef MR_LL_PARALLEL_CONJ #ifdef MR_LL_PARALLEL_CONJ
static void static void
@@ -1238,6 +1245,11 @@ MR_define_entry(MR_do_runnext);
MR_load_context(MR_ENGINE(MR_eng_this_context)); MR_load_context(MR_ENGINE(MR_eng_this_context));
} else { } else {
#ifdef MR_THREADSCOPE #ifdef MR_THREADSCOPE
/*
** Allocate a new context Id so that someone looking at the threadscope
** profile sees this as new work.
*/
MR_ENGINE(MR_eng_this_context)->MR_ctxt_num_id = allocate_context_id();
MR_threadscope_post_run_context(); MR_threadscope_post_run_context();
#endif #endif
} }

View File

@@ -38,7 +38,7 @@ typedef struct MR_threadscope_event_buffer MR_threadscope_event_buffer_t;
typedef MR_uint_least16_t MR_EngineId; typedef MR_uint_least16_t MR_EngineId;
typedef MR_uint_least16_t MR_ContextStopReason; typedef MR_uint_least16_t MR_ContextStopReason;
typedef MR_uint_least32_t MR_ContextId; typedef MR_Integer MR_ContextId;
/* /*
** This must be called by the primordial thread before starting any other ** This must be called by the primordial thread before starting any other