When an engine steals a spark and executes it using the context it is

currently holding it did not allocate a new context ID.  A user looking at
this behaviour from threadscope would see thread 27 (for instance) finish, and
then immediately begin executing again.  Therefore we now allocates a new
context ID when a context is reused making the context look different from
threadscope's point of view.  New context IDs are already allocated to
contexts that are allocated from the free context lists.

runtime/mercury_context.c:
    As above.

    The next context id variable is now accessed atomically rather than being
    protected by the free context list lock.

runtime/mercury_atomic_ops.h:
runtime/mercury_atomic_ops.c:
    Implement a new atomic operation, MR_atomic_add_and_fetch_int, this is
    used to allocate context ids.

    Reimplement MR_atomic_add_int in terms of MR_atomic_add_and_fetch_int when
    handwritten assembler support is not available.

runtime/mercury_atomic_ops.c:
    Re-order atomic operations to match the order in the header file.

runtime/mercury_atomic_ops.h:
    Place the definition of the MR_ATOMIC_PAUSE macro before the other atomic
    operations since MR_atomic_add_and_fetch_int depends on it.  This also
    conforms with the coding standard.

runtime/mercury_threadscope.h:
    Make the Context ID type a MR_Integer to match the argument size on the
    available atomic operations.
This commit is contained in:
Paul Bone
2010-02-17 02:37:45 +00:00
parent 8db94039a6
commit 6b2bc6a66a
4 changed files with 136 additions and 66 deletions

View File

@@ -33,20 +33,11 @@ MR_OUTLINE_DEFN(
)
MR_OUTLINE_DEFN(
void
MR_atomic_inc_int(volatile MR_Integer *addr)
MR_Integer
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
,
{
MR_ATOMIC_INC_INT_BODY;
}
)
MR_OUTLINE_DEFN(
void
MR_atomic_dec_int(volatile MR_Integer *addr)
,
{
MR_ATOMIC_DEC_INT_BODY;
MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
}
)
@@ -68,6 +59,24 @@ MR_OUTLINE_DEFN(
}
)
MR_OUTLINE_DEFN(
void
MR_atomic_inc_int(volatile MR_Integer *addr)
,
{
MR_ATOMIC_INC_INT_BODY;
}
)
MR_OUTLINE_DEFN(
void
MR_atomic_dec_int(volatile MR_Integer *addr)
,
{
MR_ATOMIC_DEC_INT_BODY;
}
)
MR_OUTLINE_DEFN(
MR_bool
MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr)

View File

@@ -21,6 +21,48 @@
#if defined(MR_LL_PARALLEL_CONJ)
/*
* Intel and AMD support a pause instruction that is roughly equivalent
* to a no-op. Intel recommend that it is used in spin-loops to improve
* performance. Without a pause instruction multiple simultaneous
* read-requests will be in-flight for the synchronization variable from a
* single thread. Giving the pause instruction causes these to be executed
* in sequence allowing the processor to handle the change in the
* synchronization variable more easily.
*
* On some chips it may cause the spin-loop to use less power.
*
* This instruction was introduced with the Pentium 4 but is backwards
* compatible, This works because the two byte instruction for PAUSE is
* equivalent to the NOP instruction prefixed by REPE. Therefore older
* processors perform a no-op.
*
* This is not really an atomic instruction but we name it
* MR_ATOMIC_PAUSE for consistency.
*
* References: Intel and AMD documentation for PAUSE, Intel optimisation
* guide.
*/
#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
!defined(MR_DO_NOT_USE_CPU_RELAX)
#define MR_ATOMIC_PAUSE \
do { \
__asm__ __volatile__("pause"); \
} while(0)
#else
/* Fall back to a no-op */
#define MR_ATOMIC_PAUSE \
do { \
; \
} while(0)
#endif
/*---------------------------------------------------------------------------*/
/*
** Declarations for inline atomic operations.
*/
@@ -33,6 +75,13 @@ MR_EXTERN_INLINE MR_bool
MR_compare_and_swap_word(volatile MR_Integer *addr, MR_Integer old,
MR_Integer new_val);
/*
** Atomically add to an integer in memory and retrieve the result. In other
** words an atomic pre-increment operation.
*/
MR_EXTERN_INLINE MR_Integer
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend);
/*
** Atomically add the second argument to the memory pointed to by the first
** argument.
@@ -66,6 +115,11 @@ MR_atomic_dec_int(volatile MR_Integer *addr);
MR_EXTERN_INLINE MR_bool
MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
/*
** For information about GCC's builtins for atomic operations see:
** http://gcc.gnu.org/onlinedocs/gcc-4.2.4/gcc/Atomic-Builtins.html
*/
/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
@@ -122,6 +176,43 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
/*---------------------------------------------------------------------------*/
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && \
!defined(MR_AVOID_COMPILER_INTRINSICS)
#define MR_ATOMIC_ADD_AND_FETCH_INT_BODY \
do { \
return __sync_add_and_fetch(addr, addend); \
} while (0)
#elif defined(MR_COMPARE_AND_SWAP_WORD_BODY)
/*
** If there is no GCC builtin for this then it can be implemented in terms
** of compare and swap, assuming that that has been implemented in
** assembler for this architecture.
*/
#define MR_ATOMIC_ADD_AND_FETCH_INT_BODY \
do { \
MR_Integer temp; \
temp = *addr; \
while (!MR_compare_and_swap_word(addr, temp, temp+addend)) { \
MR_ATOMIC_PAUSE; \
temp = *addr; \
} \
return temp+addend; \
} while (0)
#endif
#ifdef MR_ATOMIC_ADD_AND_FETCH_INT_BODY
MR_EXTERN_INLINE MR_Integer
MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
{
MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
}
#endif
/*---------------------------------------------------------------------------*/
#if defined(__GNUC__) && defined(__x86_64__) && \
!defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
@@ -145,11 +236,11 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
); \
} while (0)
#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
#elif defined(MR_ATOMIC_ADD_AND_FETCH_INT_BODY)
#define MR_ATOMIC_ADD_INT_BODY \
do { \
__sync_add_and_fetch(addr, addend); \
MR_atomic_add_and_fetch_int(addr, addend); \
} while (0)
#endif
@@ -358,48 +449,6 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/
/*
* Intel and AMD support a pause instruction that is roughly equivalent
* to a no-op. Intel recommend that it is used in spin-loops to improve
* performance. Without a pause instruction multiple simultaneous
* read-requests will be in-flight for the synchronization variable from a
* single thread. Giving the pause instruction causes these to be executed
* in sequence allowing the processor to handle the change in the
* synchronization variable more easily.
*
* On some chips it may cause the spin-loop to use less power.
*
* This instruction was introduced with the Pentium 4 but is backwards
* compatible, This works because the two byte instruction for PAUSE is
* equivalent to the NOP instruction prefixed by REPE. Therefore older
* processors perform a no-op.
*
* This is not really an atomic instruction but we name it
* MR_ATOMIC_PAUSE for consistency.
*
* References: Intel and AMD documentation for PAUSE, Intel optimisation
* guide.
*/
#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
!defined(MR_DO_NOT_USE_CPU_RELAX)
#define MR_ATOMIC_PAUSE \
do { \
__asm__ __volatile__("pause"); \
} while(0)
#else
/* Fall back to a no-op */
#define MR_ATOMIC_PAUSE \
do { \
; \
} while(0)
#endif
/*---------------------------------------------------------------------------*/
/*
** Memory fence operations.
*/

View File

@@ -120,10 +120,16 @@ static MR_Integer MR_primordial_thread_cpu = -1;
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
/*
** This is used to give each context its own unique ID. It is protected by the
** free_context_list_lock.
** This is used to give each context its own unique ID. It is accessed with
** atomic operations.
*/
static MR_ContextId MR_next_context_id = 0;
/*
** Allocate a context ID.
*/
static MR_ContextId
allocate_context_id(void);
#endif
/*
@@ -646,9 +652,6 @@ MR_Context *
MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
{
MR_Context *c;
#if MR_THREADSCOPE
MR_Unsigned context_id;
#endif
MR_LOCK(&free_context_list_lock, "create_context");
@@ -681,9 +684,6 @@ MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
} else {
c = NULL;
}
#if MR_THREADSCOPE
context_id = MR_next_context_id++;
#endif
MR_UNLOCK(&free_context_list_lock, "create_context i");
if (c == NULL) {
@@ -701,7 +701,7 @@ MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
#endif
}
#ifdef MR_THREADSCOPE
c->MR_ctxt_num_id = context_id;
c->MR_ctxt_num_id = allocate_context_id();
#endif
MR_init_context_maybe_generator(c, id, gen);
@@ -756,6 +756,13 @@ MR_destroy_context(MR_Context *c)
MR_UNLOCK(&free_context_list_lock, "destroy_context");
}
#ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
static MR_ContextId
allocate_context_id(void) {
return MR_atomic_add_and_fetch_int(&MR_next_context_id, 1);
}
#endif
#ifdef MR_LL_PARALLEL_CONJ
static void
@@ -1238,6 +1245,11 @@ MR_define_entry(MR_do_runnext);
MR_load_context(MR_ENGINE(MR_eng_this_context));
} else {
#ifdef MR_THREADSCOPE
/*
** Allocate a new context Id so that someone looking at the threadscope
** profile sees this as new work.
*/
MR_ENGINE(MR_eng_this_context)->MR_ctxt_num_id = allocate_context_id();
MR_threadscope_post_run_context();
#endif
}

View File

@@ -38,7 +38,7 @@ typedef struct MR_threadscope_event_buffer MR_threadscope_event_buffer_t;
typedef MR_uint_least16_t MR_EngineId;
typedef MR_uint_least16_t MR_ContextStopReason;
typedef MR_uint_least32_t MR_ContextId;
typedef MR_Integer MR_ContextId;
/*
** This must be called by the primordial thread before starting any other