When an engine steals a spark and executes it using the context it is

currently holding it did not allocate a new context ID. A user looking at this behaviour from threadscope would see thread 27 (for instance) finish, and then immediately begin executing again. Therefore we now allocates a new context ID when a context is reused making the context look different from threadscope's point of view. New context IDs are already allocated to contexts that are allocated from the free context lists. runtime/mercury_context.c: As above. The next context id variable is now accessed atomically rather than being protected by the free context list lock. runtime/mercury_atomic_ops.h: runtime/mercury_atomic_ops.c: Implement a new atomic operation, MR_atomic_add_and_fetch_int, this is used to allocate context ids. Reimplement MR_atomic_add_int in terms of MR_atomic_add_and_fetch_int when handwritten assembler support is not available. runtime/mercury_atomic_ops.c: Re-order atomic operations to match the order in the header file. runtime/mercury_atomic_ops.h: Place the definition of the MR_ATOMIC_PAUSE macro before the other atomic operations since MR_atomic_add_and_fetch_int depends on it. This also conforms with the coding standard. runtime/mercury_threadscope.h: Make the Context ID type a MR_Integer to match the argument size on the available atomic operations.
2025-12-19 07:45:09 +00:00 · 2010-02-17 02:37:45 +00:00
parent 8db94039a6
commit 6b2bc6a66a
4 changed files with 136 additions and 66 deletions
--- a/runtime/mercury_atomic_ops.c
+++ b/runtime/mercury_atomic_ops.c
@@ -33,20 +33,11 @@ MR_OUTLINE_DEFN(
 )
 MR_OUTLINE_DEFN(
-    void 
+    MR_Integer 
-    MR_atomic_inc_int(volatile MR_Integer *addr)
+    MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
 ,
    {
-        MR_ATOMIC_INC_INT_BODY;
+        MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
    }
 )
 MR_OUTLINE_DEFN(
    void 
    MR_atomic_dec_int(volatile MR_Integer *addr)
 ,
    {
        MR_ATOMIC_DEC_INT_BODY;
    }
 )
@@ -68,6 +59,24 @@ MR_OUTLINE_DEFN(
    }
 )
 MR_OUTLINE_DEFN(
    void 
    MR_atomic_inc_int(volatile MR_Integer *addr)
 ,
    {
        MR_ATOMIC_INC_INT_BODY;
    }
 )
 MR_OUTLINE_DEFN(
    void 
    MR_atomic_dec_int(volatile MR_Integer *addr)
 ,
    {
        MR_ATOMIC_DEC_INT_BODY;
    }
 )
 MR_OUTLINE_DEFN(
    MR_bool
    MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr)
--- a/runtime/mercury_atomic_ops.h
+++ b/runtime/mercury_atomic_ops.h
@@ -21,6 +21,48 @@
 #if defined(MR_LL_PARALLEL_CONJ)
 /*
 * Intel and AMD support a pause instruction that is roughly equivalent
 * to a no-op.  Intel recommend that it is used in spin-loops to improve
 * performance.  Without a pause instruction multiple simultaneous
 * read-requests will be in-flight for the synchronization variable from a
 * single thread.  Giving the pause instruction causes these to be executed
 * in sequence allowing the processor to handle the change in the
 * synchronization variable more easily.
 *
 * On some chips it may cause the spin-loop to use less power.
 *
 * This instruction was introduced with the Pentium 4 but is backwards
 * compatible, This works because the two byte instruction for PAUSE is
 * equivalent to the NOP instruction prefixed by REPE.  Therefore older
 * processors perform a no-op.
 *
 * This is not really an atomic instruction but we name it
 * MR_ATOMIC_PAUSE for consistency.
 *
 * References: Intel and AMD documentation for PAUSE, Intel optimisation
 * guide.
 */
 #if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
    !defined(MR_DO_NOT_USE_CPU_RELAX)
    #define MR_ATOMIC_PAUSE                                                 \
        do {                                                                \
            __asm__ __volatile__("pause");                                  \
        } while(0)
 #else
    /* Fall back to a no-op */
    #define MR_ATOMIC_PAUSE                                                 \
        do {                                                                \
            ;                                                               \
        } while(0)
 #endif
 /*---------------------------------------------------------------------------*/
 /*
 ** Declarations for inline atomic operations.
 */
@@ -33,6 +75,13 @@ MR_EXTERN_INLINE MR_bool
 MR_compare_and_swap_word(volatile MR_Integer *addr, MR_Integer old,
        MR_Integer new_val);
 /*
 ** Atomically add to an integer in memory and retrieve the result.  In other
 ** words an atomic pre-increment operation.
 */
 MR_EXTERN_INLINE MR_Integer 
 MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend);
 /*
 ** Atomically add the second argument to the memory pointed to by the first
 ** argument.
@@ -66,6 +115,11 @@ MR_atomic_dec_int(volatile MR_Integer *addr);
 MR_EXTERN_INLINE MR_bool 
 MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
 /*
 ** For information about GCC's builtins for atomic operations see:
 ** http://gcc.gnu.org/onlinedocs/gcc-4.2.4/gcc/Atomic-Builtins.html
 */ 
 /*---------------------------------------------------------------------------*/
 /*---------------------------------------------------------------------------*/
@@ -122,6 +176,43 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
 /*---------------------------------------------------------------------------*/
 #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && \
    !defined(MR_AVOID_COMPILER_INTRINSICS)
    #define MR_ATOMIC_ADD_AND_FETCH_INT_BODY                                \
        do {                                                                \
            return __sync_add_and_fetch(addr, addend);                      \
        } while (0)
 #elif defined(MR_COMPARE_AND_SWAP_WORD_BODY)
    /*
    ** If there is no GCC builtin for this then it can be implemented in terms
    ** of compare and swap, assuming that that has been implemented in
    ** assembler for this architecture.
    */
    #define MR_ATOMIC_ADD_AND_FETCH_INT_BODY                                \
        do {                                                                \
            MR_Integer temp;                                                \
            temp = *addr;                                                   \
            while (!MR_compare_and_swap_word(addr, temp, temp+addend)) {    \
                MR_ATOMIC_PAUSE;                                            \
                temp = *addr;                                               \
            }                                                               \
            return temp+addend;                                             \
        } while (0)
 #endif
 #ifdef MR_ATOMIC_ADD_AND_FETCH_INT_BODY
    MR_EXTERN_INLINE MR_Integer
    MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
    {
        MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
    }
 #endif
 /*---------------------------------------------------------------------------*/
 #if defined(__GNUC__) && defined(__x86_64__) && \
    !defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)
@@ -145,11 +236,11 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
                );                                                          \
        } while (0)
-#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+#elif defined(MR_ATOMIC_ADD_AND_FETCH_INT_BODY)
    #define MR_ATOMIC_ADD_INT_BODY                                          \
        do {                                                                \
-            __sync_add_and_fetch(addr, addend);                             \
+            MR_atomic_add_and_fetch_int(addr, addend);                      \
        } while (0)
 #endif
@@ -358,48 +449,6 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
 /*---------------------------------------------------------------------------*/
 /*---------------------------------------------------------------------------*/
 /*
 * Intel and AMD support a pause instruction that is roughly equivalent
 * to a no-op.  Intel recommend that it is used in spin-loops to improve
 * performance.  Without a pause instruction multiple simultaneous
 * read-requests will be in-flight for the synchronization variable from a
 * single thread.  Giving the pause instruction causes these to be executed
 * in sequence allowing the processor to handle the change in the
 * synchronization variable more easily.
 *
 * On some chips it may cause the spin-loop to use less power.
 *
 * This instruction was introduced with the Pentium 4 but is backwards
 * compatible, This works because the two byte instruction for PAUSE is
 * equivalent to the NOP instruction prefixed by REPE.  Therefore older
 * processors perform a no-op.
 *
 * This is not really an atomic instruction but we name it
 * MR_ATOMIC_PAUSE for consistency.
 *
 * References: Intel and AMD documentation for PAUSE, Intel optimisation
 * guide.
 */
 #if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
    !defined(MR_DO_NOT_USE_CPU_RELAX)
    #define MR_ATOMIC_PAUSE                                                 \
        do {                                                                \
            __asm__ __volatile__("pause");                                  \
        } while(0)
 #else
    /* Fall back to a no-op */
    #define MR_ATOMIC_PAUSE                                                 \
        do {                                                                \
            ;                                                               \
        } while(0)
 #endif
 /*---------------------------------------------------------------------------*/
 /*
 ** Memory fence operations.
 */
--- a/runtime/mercury_context.c
+++ b/runtime/mercury_context.c
@@ -120,10 +120,16 @@ static MR_Integer       MR_primordial_thread_cpu = -1;
 #if defined(MR_LL_PARALLEL_CONJ) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
 /*
-** This is used to give each context its own unique ID.  It is protected by the
+** This is used to give each context its own unique ID.  It is accessed with
-** free_context_list_lock.
+** atomic operations.
 */
 static MR_ContextId     MR_next_context_id = 0;
 /*
 ** Allocate a context ID.
 */
 static MR_ContextId
 allocate_context_id(void);
 #endif
 /*
@@ -646,9 +652,6 @@ MR_Context *
 MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
 {
    MR_Context  *c;
 #if MR_THREADSCOPE
    MR_Unsigned context_id;
 #endif
    MR_LOCK(&free_context_list_lock, "create_context");
@@ -681,9 +684,6 @@ MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
    } else {
        c = NULL;
    }
 #if MR_THREADSCOPE
    context_id = MR_next_context_id++;
 #endif
    MR_UNLOCK(&free_context_list_lock, "create_context i");
    if (c == NULL) {
@@ -701,7 +701,7 @@ MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
 #endif
    }
 #ifdef MR_THREADSCOPE
-    c->MR_ctxt_num_id = context_id;
+    c->MR_ctxt_num_id = allocate_context_id();
 #endif
    MR_init_context_maybe_generator(c, id, gen);
@@ -756,6 +756,13 @@ MR_destroy_context(MR_Context *c)
    MR_UNLOCK(&free_context_list_lock, "destroy_context");
 }
 #ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
 static MR_ContextId
 allocate_context_id(void) {
    return MR_atomic_add_and_fetch_int(&MR_next_context_id, 1);
 }
 #endif
 #ifdef MR_LL_PARALLEL_CONJ
 static void
@@ -1238,6 +1245,11 @@ MR_define_entry(MR_do_runnext);
        MR_load_context(MR_ENGINE(MR_eng_this_context));
    } else {
 #ifdef MR_THREADSCOPE
        /*
        ** Allocate a new context Id so that someone looking at the threadscope
        ** profile sees this as new work.
        */
        MR_ENGINE(MR_eng_this_context)->MR_ctxt_num_id = allocate_context_id();
        MR_threadscope_post_run_context();
 #endif
    }
--- a/runtime/mercury_threadscope.h
+++ b/runtime/mercury_threadscope.h
@@ -38,7 +38,7 @@ typedef struct MR_threadscope_event_buffer MR_threadscope_event_buffer_t;
 typedef MR_uint_least16_t   MR_EngineId;
 typedef MR_uint_least16_t   MR_ContextStopReason;
-typedef MR_uint_least32_t   MR_ContextId;
+typedef MR_Integer          MR_ContextId;
 /*
 ** This must be called by the primordial thread before starting any other