When an engine steals a spark and executes it using the context it is

currently holding it did not allocate a new context ID. A user looking at this behaviour from threadscope would see thread 27 (for instance) finish, and then immediately begin executing again. Therefore we now allocates a new context ID when a context is reused making the context look different from threadscope's point of view. New context IDs are already allocated to contexts that are allocated from the free context lists. runtime/mercury_context.c: As above. The next context id variable is now accessed atomically rather than being protected by the free context list lock. runtime/mercury_atomic_ops.h: runtime/mercury_atomic_ops.c: Implement a new atomic operation, MR_atomic_add_and_fetch_int, this is used to allocate context ids. Reimplement MR_atomic_add_int in terms of MR_atomic_add_and_fetch_int when handwritten assembler support is not available. runtime/mercury_atomic_ops.c: Re-order atomic operations to match the order in the header file. runtime/mercury_atomic_ops.h: Place the definition of the MR_ATOMIC_PAUSE macro before the other atomic operations since MR_atomic_add_and_fetch_int depends on it. This also conforms with the coding standard. runtime/mercury_threadscope.h: Make the Context ID type a MR_Integer to match the argument size on the available atomic operations.
2025-12-14 13:23:53 +00:00 · 2010-02-17 02:37:45 +00:00
parent 8db94039a6
commit 6b2bc6a66a
4 changed files with 136 additions and 66 deletions
--- a/runtime/mercury_atomic_ops.c
+++ b/runtime/mercury_atomic_ops.c
@@ -33,20 +33,11 @@ MR_OUTLINE_DEFN(
 )

 MR_OUTLINE_DEFN(
-    void 
-    MR_atomic_inc_int(volatile MR_Integer *addr)
+    MR_Integer 
+    MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
 ,
    {
-        MR_ATOMIC_INC_INT_BODY;
-    }
-)
-
-MR_OUTLINE_DEFN(
-    void 
-    MR_atomic_dec_int(volatile MR_Integer *addr)
-,
-    {
-        MR_ATOMIC_DEC_INT_BODY;
+        MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
    }
 )

@@ -68,6 +59,24 @@ MR_OUTLINE_DEFN(
    }
 )

+MR_OUTLINE_DEFN(
+    void 
+    MR_atomic_inc_int(volatile MR_Integer *addr)
+,
+    {
+        MR_ATOMIC_INC_INT_BODY;
+    }
+)
+
+MR_OUTLINE_DEFN(
+    void 
+    MR_atomic_dec_int(volatile MR_Integer *addr)
+,
+    {
+        MR_ATOMIC_DEC_INT_BODY;
+    }
+)
+
 MR_OUTLINE_DEFN(
    MR_bool
    MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr)
--- a/runtime/mercury_atomic_ops.h
+++ b/runtime/mercury_atomic_ops.h
@@ -21,6 +21,48 @@

 #if defined(MR_LL_PARALLEL_CONJ)

+/*
+ * Intel and AMD support a pause instruction that is roughly equivalent
+ * to a no-op.  Intel recommend that it is used in spin-loops to improve
+ * performance.  Without a pause instruction multiple simultaneous
+ * read-requests will be in-flight for the synchronization variable from a
+ * single thread.  Giving the pause instruction causes these to be executed
+ * in sequence allowing the processor to handle the change in the
+ * synchronization variable more easily.
+ *
+ * On some chips it may cause the spin-loop to use less power.
+ *
+ * This instruction was introduced with the Pentium 4 but is backwards
+ * compatible, This works because the two byte instruction for PAUSE is
+ * equivalent to the NOP instruction prefixed by REPE.  Therefore older
+ * processors perform a no-op.
+ *
+ * This is not really an atomic instruction but we name it
+ * MR_ATOMIC_PAUSE for consistency.
+ *
+ * References: Intel and AMD documentation for PAUSE, Intel optimisation
+ * guide.
+ */
+#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
+    !defined(MR_DO_NOT_USE_CPU_RELAX)
+
+    #define MR_ATOMIC_PAUSE                                                 \
+        do {                                                                \
+            __asm__ __volatile__("pause");                                  \
+        } while(0)
+
+#else
+
+    /* Fall back to a no-op */
+    #define MR_ATOMIC_PAUSE                                                 \
+        do {                                                                \
+            ;                                                               \
+        } while(0)
+
+#endif
+
+/*---------------------------------------------------------------------------*/
+
 /*
 ** Declarations for inline atomic operations.
 */
@@ -33,6 +75,13 @@ MR_EXTERN_INLINE MR_bool
 MR_compare_and_swap_word(volatile MR_Integer *addr, MR_Integer old,
        MR_Integer new_val);

+/*
+** Atomically add to an integer in memory and retrieve the result.  In other
+** words an atomic pre-increment operation.
+*/
+MR_EXTERN_INLINE MR_Integer 
+MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend);
+
 /*
 ** Atomically add the second argument to the memory pointed to by the first
 ** argument.
@@ -66,6 +115,11 @@ MR_atomic_dec_int(volatile MR_Integer *addr);
 MR_EXTERN_INLINE MR_bool 
 MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);

+/*
+** For information about GCC's builtins for atomic operations see:
+** http://gcc.gnu.org/onlinedocs/gcc-4.2.4/gcc/Atomic-Builtins.html
+*/ 
+
 /*---------------------------------------------------------------------------*/
 /*---------------------------------------------------------------------------*/

@@ -122,6 +176,43 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);

 /*---------------------------------------------------------------------------*/

+#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && \
+    !defined(MR_AVOID_COMPILER_INTRINSICS)
+
+    #define MR_ATOMIC_ADD_AND_FETCH_INT_BODY                                \
+        do {                                                                \
+            return __sync_add_and_fetch(addr, addend);                      \
+        } while (0)
+
+#elif defined(MR_COMPARE_AND_SWAP_WORD_BODY)
+    /*
+    ** If there is no GCC builtin for this then it can be implemented in terms
+    ** of compare and swap, assuming that that has been implemented in
+    ** assembler for this architecture.
+    */
+    #define MR_ATOMIC_ADD_AND_FETCH_INT_BODY                                \
+        do {                                                                \
+            MR_Integer temp;                                                \
+            temp = *addr;                                                   \
+            while (!MR_compare_and_swap_word(addr, temp, temp+addend)) {    \
+                MR_ATOMIC_PAUSE;                                            \
+                temp = *addr;                                               \
+            }                                                               \
+            return temp+addend;                                             \
+        } while (0)
+
+#endif
+
+#ifdef MR_ATOMIC_ADD_AND_FETCH_INT_BODY
+    MR_EXTERN_INLINE MR_Integer
+    MR_atomic_add_and_fetch_int(volatile MR_Integer *addr, MR_Integer addend)
+    {
+        MR_ATOMIC_ADD_AND_FETCH_INT_BODY;
+    }
+#endif
+
+/*---------------------------------------------------------------------------*/
+
 #if defined(__GNUC__) && defined(__x86_64__) && \
    !defined(MR_AVOID_HANDWRITTEN_ASSEMBLER)

@@ -145,11 +236,11 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
                );                                                          \
        } while (0)

-#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+#elif defined(MR_ATOMIC_ADD_AND_FETCH_INT_BODY)

    #define MR_ATOMIC_ADD_INT_BODY                                          \
        do {                                                                \
-            __sync_add_and_fetch(addr, addend);                             \
+            MR_atomic_add_and_fetch_int(addr, addend);                      \
        } while (0)

 #endif
@@ -358,48 +449,6 @@ MR_atomic_dec_int_and_is_zero(volatile MR_Integer *addr);
 /*---------------------------------------------------------------------------*/
 /*---------------------------------------------------------------------------*/

-/*
- * Intel and AMD support a pause instruction that is roughly equivalent
- * to a no-op.  Intel recommend that it is used in spin-loops to improve
- * performance.  Without a pause instruction multiple simultaneous
- * read-requests will be in-flight for the synchronization variable from a
- * single thread.  Giving the pause instruction causes these to be executed
- * in sequence allowing the processor to handle the change in the
- * synchronization variable more easily.
- *
- * On some chips it may cause the spin-loop to use less power.
- *
- * This instruction was introduced with the Pentium 4 but is backwards
- * compatible, This works because the two byte instruction for PAUSE is
- * equivalent to the NOP instruction prefixed by REPE.  Therefore older
- * processors perform a no-op.
- *
- * This is not really an atomic instruction but we name it
- * MR_ATOMIC_PAUSE for consistency.
- *
- * References: Intel and AMD documentation for PAUSE, Intel optimisation
- * guide.
- */
-#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) && \
-    !defined(MR_DO_NOT_USE_CPU_RELAX)
-
-    #define MR_ATOMIC_PAUSE                                                 \
-        do {                                                                \
-            __asm__ __volatile__("pause");                                  \
-        } while(0)
-
-#else
-
-    /* Fall back to a no-op */
-    #define MR_ATOMIC_PAUSE                                                 \
-        do {                                                                \
-            ;                                                               \
-        } while(0)
-
-#endif
-
-/*---------------------------------------------------------------------------*/
-
 /*
 ** Memory fence operations.
 */
--- a/runtime/mercury_context.c
+++ b/runtime/mercury_context.c
@@ -120,10 +120,16 @@ static MR_Integer       MR_primordial_thread_cpu = -1;

 #if defined(MR_LL_PARALLEL_CONJ) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
 /*
-** This is used to give each context its own unique ID.  It is protected by the
-** free_context_list_lock.
+** This is used to give each context its own unique ID.  It is accessed with
+** atomic operations.
 */
 static MR_ContextId     MR_next_context_id = 0;
+
+/*
+** Allocate a context ID.
+*/
+static MR_ContextId
+allocate_context_id(void);
 #endif

 /*
@@ -646,9 +652,6 @@ MR_Context *
 MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
 {
    MR_Context  *c;
-#if MR_THREADSCOPE
-    MR_Unsigned context_id;
-#endif

    MR_LOCK(&free_context_list_lock, "create_context");

@@ -681,9 +684,6 @@ MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
    } else {
        c = NULL;
    }
-#if MR_THREADSCOPE
-    context_id = MR_next_context_id++;
-#endif
    MR_UNLOCK(&free_context_list_lock, "create_context i");

    if (c == NULL) {
@@ -701,7 +701,7 @@ MR_create_context(const char *id, MR_ContextSize ctxt_size, MR_Generator *gen)
 #endif
    }
 #ifdef MR_THREADSCOPE
-    c->MR_ctxt_num_id = context_id;
+    c->MR_ctxt_num_id = allocate_context_id();
 #endif
    
    MR_init_context_maybe_generator(c, id, gen);
@@ -756,6 +756,13 @@ MR_destroy_context(MR_Context *c)
    MR_UNLOCK(&free_context_list_lock, "destroy_context");
 }

+#ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
+static MR_ContextId
+allocate_context_id(void) {
+    return MR_atomic_add_and_fetch_int(&MR_next_context_id, 1);
+}
+#endif
+
 #ifdef MR_LL_PARALLEL_CONJ

 static void
@@ -1238,6 +1245,11 @@ MR_define_entry(MR_do_runnext);
        MR_load_context(MR_ENGINE(MR_eng_this_context));
    } else {
 #ifdef MR_THREADSCOPE
+        /*
+        ** Allocate a new context Id so that someone looking at the threadscope
+        ** profile sees this as new work.
+        */
+        MR_ENGINE(MR_eng_this_context)->MR_ctxt_num_id = allocate_context_id();
        MR_threadscope_post_run_context();
 #endif
    }
--- a/runtime/mercury_threadscope.h
+++ b/runtime/mercury_threadscope.h
@@ -38,7 +38,7 @@ typedef struct MR_threadscope_event_buffer MR_threadscope_event_buffer_t;

 typedef MR_uint_least16_t   MR_EngineId;
 typedef MR_uint_least16_t   MR_ContextStopReason;
-typedef MR_uint_least32_t   MR_ContextId;
+typedef MR_Integer          MR_ContextId;

 /*
 ** This must be called by the primordial thread before starting any other