Files
mercury/runtime/mercury_context.h
Julien Fischer b57cfb54a5 Update references to configure.in.
configure.ac:
compiler/notes/overall_design.html:
deep_profiler/conf.m:
runtime/mercury_context.h:
runtime/mercury_goto.h:
runtime/mercury_grade.h:
runtime/mercury_regs.h:
    As above -- the configure template has been named configure.ac
    for a long time now.
2020-10-25 14:45:35 +11:00

931 lines
44 KiB
C

// vim: ts=4 sw=4 expandtab ft=c
// Copyright (C) 1997-2007, 2009-2011 The University of Melbourne.
// Copyright (C) 2014-2016, 2018, 2020 The Mercury team.
// This file is distributed under the terms specified in COPYING.LIB.
// mercury_context.h - defines Mercury multithreading stuff.
//
// A "context" is a Mercury thread. (We use a different term than "thread"
// to avoid confusing Mercury threads and POSIX threads.)
// Each context is represented by a value of type MR_Context,
// which contains a detstack, a nondetstack, a trail (if needed), the various
// pointers that refer to them, a succip, and a thread-resumption continuation.
// Contexts are initially stored in a free-list.
// When one is running, the POSIX thread that is executing it has a pointer
// to its context structure `this_context'. (WARNING: code that manipulates
// contexts must set this_context itself; it cannot rely on the generic
// mechanisms below to set it.) When a context suspends, it calls
// `MR_save_context(context_ptr)' which copies the context from the
// various registers and global variables into the structure referred to
// by `context_ptr'. The context contains no rN or fN registers - all
// registers are "context save" (by analogy to caller-save).
//
// When a new context is created for a parallel conjunction, information is
// passed to and from the new context via the stack frame of the procedure that
// originated the parallel conjunction. The code of a parallel conjunct has
// access to that original stack frame via the `parent_sp' register.
//
// Contexts can migrate transparently between multiple POSIX threads.
//
// Each POSIX thread has its own heap and solutions heap (both allocated
// in shared memory). This makes GC harder, but enables heap allocation
// to be done without locking which is very important for performance.
// Each context has a copy of the heap pointer that is taken when it is
// switched out. If the POSIX thread's heap pointer is the same as the
// copied one when the context is switched back in, then it is safe for
// the context to do heap reclamation on failure.
//
// If MR_THREAD_SAFE is not defined, then everything gets executed within a
// single POSIX thread. No locking is required.
#ifndef MERCURY_CONTEXT_H
#define MERCURY_CONTEXT_H
#include "mercury_regs.h" // for MR_hp, etc.
// Must come before system headers.
#include <stdio.h>
#include "mercury_types.h" // for MR_Word, MR_Code, etc
#include "mercury_trail.h" // for MR_TrailEntry
#include "mercury_memory.h" // for MR_MemoryZone
#include "mercury_thread.h" // for MercuryLock
#include "mercury_goto.h" // for MR_GOTO()
#include "mercury_conf.h" // for MR_CONSERVATIVE_GC
#include "mercury_backjump.h" // for MR_BackJumpHandler, etc
#include "mercury_atomic_ops.h" // for MR_atomic_*
#ifdef MR_THREAD_SAFE
#define MR_IF_THREAD_SAFE(x) x
#define MR_IF_NOT_THREAD_SAFE(x)
#else
#define MR_IF_THREAD_SAFE(x)
#define MR_IF_NOT_THREAD_SAFE(x) x
#endif
// Each engine has one MR_Context structure loaded into it (in the engine field
// named MR_eng_context) from a context which is pointed to by the engine's
// MR_eng_this_context field. Fields which can be expected to be accessed at
// least several times between context switches are accessed via MR_eng_context
// while the rest are accessed via MR_eng_this_context (which requires
// following an extra pointer). Note that some fields are further cached
// in abstract machine registers, and some in fact are only ever accessed
// via these abstract machine registers. The saved copies of some these
// abstract machine registers are kept not in the named fields below, but in
// the engine's fake reg array.
//
// All fields accessed via MR_eng_context and via abstract machine registers
// should be mentioned in the MR_save_context and MR_load_context macros.
// All fields accessed via MR_eng_this_context should be mentioned in the
// MR_copy_eng_this_context_fields macro. All fields accessed via direct
// specification of the context need explicit code to set them in all places
// where we create new contexts: in the mercury_thread module for parallelism,
// and in the mercury_mm_own_stacks module for minimal model tabling.
//
// The context structure has the following fields. The documentation of each
// field says how it is accessed, but please take this info with a pinch of
// salt; I (zs) don't guarantee its accuracy.
//
// id A string to identify the context for humans who want to
// debug the handling of contexts.
// (Not accessed.)
//
// size Whether this context has regular-sized stacks or smaller
// stacks. Some parallel programs can allocate many contexts
// and most parallel computations should not require very
// large stacks. We allocate contexts with "smaller" stacks
// for parallel computations (although whether they are
// actually smaller is up to the user).
// (Accessed only when directly specifying the context.)
//
// next If this context is in the free-list `next' will point to
// the next free context. If this context is suspended waiting
// for a variable to become bound, `next' will point to the
// next waiting context. If this context is runnable but not
// currently running then `next' points to the next runnable
// context in the runqueue.
// (Accessed only when directly specifying the context.)
//
// exclusive_engine
// Either MR_ENGINE_ID_NONE, or else the exclusive engine
// that this context belongs to. A context with an exclusive
// engine may only be run on that engine. This restriction
// may be relaxed in the future so that it only applies when
// entering some foreign procs.
// (Accessed only when directly specifying the context.)
//
// resume A pointer to the code at which execution should resume
// when this context is next scheduled.
// (Accessed via MR_eng_this_context.)
//
// resume_engine
// When resuming a context this is the engine that it prefers
// or is required to be resumed on. Doing so can avoid cache
// misses as the engine's cache may already be warm.
// (Accessed only when directly specifying the context.)
//
// resume_engine_required
// resume_c_depth
// If resume_engine_required is MR_FALSE then resume_engine is
// simply a preference, and the resume_c_depth field has no
// meaning. If resume_engine_required is MR_TRUE then
// resume_engine and resume_c_depth must match the engine's id
// and c_depth, to ensure that when we enter a Mercury engine
// from C we return to the same engine. See the comments in
// mercury_engine.h.
// (Both accessed only when directly specifying the context.)
//
// resume_stack
// A stack used to record the Mercury engines on which this
// context executed some C calls that called back into
// Mercury. We must execute this context in the correct
// engine when returning to those C calls. See the comments
// in mercury_engine.h.
// (Accessed via MR_eng_this_context.)
//
// succip The succip for this context.
// (Accessed via abstract machine register.)
//
// detstack_zone The current detstack zone for this context.
// prev_detstack_zones
// A list of any previous detstack zones for this context.
// (Both accessed via MR_eng_context.)
// sp The saved sp for this context.
// (Accessed via abstract machine register.)
//
// nondetstack_zone The current nondetstack zone for this context.
// prev_nondetstack_zones
// A list of any previous nondetstack zones for this context.
// (Both accessed via MR_eng_context.)
// curfr The saved curfr for this context.
// maxfr The saved maxfr for this context.
// (Both accessed via abstract machine register.)
//
// genstack_zone The generator stack zone for this context.
// (Accessed via MR_eng_context.)
// gen_next The saved gen_next for this context.
// (Accessed via abstract machine register.)
//
// cutstack_zone The cut stack zone for this context.
// (Accessed via MR_eng_context.)
// cut_next The saved cut_next for this context.
// (Accessed via abstract machine register.)
//
// pnegstack_zone The possibly_negated_context stack zone for this context.
// (Accessed via MR_eng_context.)
// pneg_next The saved pneg_next for this context.
// (Accessed via abstract machine register.)
//
// parent_sp The saved parent_sp for this context.
// (Accessed via abstract machine register.)
//
// trail_zone The trail zone for this context.
// prev_trail_zones A list of any previous trail zones for this context.
// (Accessed via MR_eng_context.)
//
// trail_ptr The saved MR_trail_ptr for this context.
// ticket_counter The saved MR_ticket_counter for this context.
// ticket_highwater The saved MR_ticket_high_water for this context.
// (All accessed via abstract machine register.)
//
// backjump_handler The backjump handler for this context.
// backjump_next_choice_id The next available backjump choice id counter
// for this context.
// (All accessed via MR_eng_context.)
//
// hp The saved hp for this context.
// (Accessed via abstract machine register.)
//
// min_hp_rec This pointer marks the minimum value of MR_hp to which
// we can truncate the heap on backtracking. See comments
// before the macro MR_set_min_heap_reclamation_point below.
// (Accessed via abstract machine register.)
//
// thread_local_mutables
// The array of thread-local mutable values for this context.
// (Accessed via MR_eng_this_context.)
typedef struct MR_Context_Struct MR_Context;
typedef enum {
MR_CONTEXT_SIZE_REGULAR,
// Stack segment grades don't need differently sized contexts.
#ifndef MR_STACK_SEGMENTS
MR_CONTEXT_SIZE_SMALL
#endif
} MR_ContextSize;
#ifdef MR_STACK_SEGMENTS
#define MR_CONTEXT_SIZE_FOR_SPARK MR_CONTEXT_SIZE_REGULAR
#define MR_CONTEXT_SIZE_FOR_LOOP_CONTROL_WORKER MR_CONTEXT_SIZE_REGULAR
#else
#define MR_CONTEXT_SIZE_FOR_SPARK MR_CONTEXT_SIZE_SMALL
#define MR_CONTEXT_SIZE_FOR_LOOP_CONTROL_WORKER MR_CONTEXT_SIZE_SMALL
#endif
#ifdef MR_THREAD_SAFE
typedef struct MR_ResumeStack_Struct MR_ResumeStack;
struct MR_ResumeStack_Struct {
MR_EngineId MR_resume_engine;
MR_Unsigned MR_resume_c_depth;
MR_ResumeStack *MR_resume_stack_next;
};
#endif
#ifdef MR_LL_PARALLEL_CONJ
typedef struct MR_SyncTerm_Struct MR_SyncTerm;
typedef struct MR_Spark_Struct MR_Spark;
typedef struct MR_SparkDeque_Struct MR_SparkDeque;
typedef struct MR_SparkArray_Struct MR_SparkArray;
// A spark contains just enough information to begin execution of a parallel
// conjunct. A spark will either be executed in the same context (same
// detstack, etc.) as the code that generated the spark, or it may be stolen
// from its deque and executed by any idle engine in a different context.
struct MR_Spark_Struct {
MR_SyncTerm *MR_spark_sync_term;
MR_Code *MR_spark_resume;
MR_ThreadLocalMuts *MR_spark_thread_local_mutables;
#ifdef MR_THREADSCOPE
// XXX this is not wide enough for higher engine ids
MR_uint_least32_t MR_spark_id;
#endif
};
#define CACHE_LINE_SIZE 64
#define PAD_CACHE_LINE(s) \
((CACHE_LINE_SIZE) > (s) ? (CACHE_LINE_SIZE) - (s) : 0)
struct MR_SparkDeque_Struct {
// The top index is modified by thiefs; the other fields are modified by
// the owner. Therefore we pad out the structure to reduce false
// sharing.
volatile MR_Integer MR_sd_top;
char padding[PAD_CACHE_LINE(sizeof(MR_Integer))];
volatile MR_Integer MR_sd_bottom;
volatile MR_SparkArray *MR_sd_active_array;
};
#endif // !MR_LL_PARALLEL_CONJ
struct MR_Context_Struct {
const char *MR_ctxt_id;
#ifdef MR_THREADSCOPE
MR_Unsigned MR_ctxt_num_id;
#endif
MR_ContextSize MR_ctxt_size;
MR_Context *MR_ctxt_next;
#ifdef MR_LL_PARALLEL_CONJ
// The value of this field is used for synchronization.
MR_Code * volatile MR_ctxt_resume;
#else
MR_Code *MR_ctxt_resume;
#endif
#ifdef MR_THREAD_SAFE
MR_EngineId MR_ctxt_exclusive_engine;
MR_EngineId MR_ctxt_resume_engine;
MR_bool MR_ctxt_resume_engine_required;
MR_Unsigned MR_ctxt_resume_c_depth;
MR_ResumeStack *MR_ctxt_resume_stack;
#endif
#ifndef MR_HIGHLEVEL_CODE
MR_Code *MR_ctxt_succip;
MR_MemoryZone *MR_ctxt_detstack_zone;
MR_MemoryZones *MR_ctxt_prev_detstack_zones;
MR_Word *MR_ctxt_sp;
MR_MemoryZone *MR_ctxt_nondetstack_zone;
MR_MemoryZones *MR_ctxt_prev_nondetstack_zones;
MR_Word *MR_ctxt_maxfr;
MR_Word *MR_ctxt_curfr;
#ifdef MR_USE_MINIMAL_MODEL_STACK_COPY
MR_MemoryZone *MR_ctxt_genstack_zone;
MR_Integer MR_ctxt_gen_next;
MR_MemoryZone *MR_ctxt_cutstack_zone;
MR_Integer MR_ctxt_cut_next;
MR_MemoryZone *MR_ctxt_pnegstack_zone;
MR_Integer MR_ctxt_pneg_next;
#endif // MR_USE_MINIMAL_MODEL_STACK_COPY
#ifdef MR_USE_MINIMAL_MODEL_OWN_STACKS
MR_Generator *MR_ctxt_owner_generator;
#endif // MR_USE_MINIMAL_MODEL_OWN_STACKS
#ifdef MR_LL_PARALLEL_CONJ
MR_Word *MR_ctxt_parent_sp;
#endif
#endif // !MR_HIGHLEVEL_CODE
#ifdef MR_USE_TRAIL
MR_MemoryZone *MR_ctxt_trail_zone;
#ifndef MR_USE_FIXED_SIZE_TRAIL
MR_MemoryZones *MR_ctxt_prev_trail_zones;
#endif
MR_TrailEntry *MR_ctxt_trail_ptr;
MR_ChoicepointId MR_ctxt_ticket_counter;
MR_ChoicepointId MR_ctxt_ticket_high_water;
#endif
#ifndef MR_HIGHLEVEL_CODE
MR_BackJumpHandler *MR_ctxt_backjump_handler;
MR_BackJumpChoiceId MR_ctxt_backjump_next_choice_id;
#endif
#ifndef MR_CONSERVATIVE_GC
MR_Word *MR_ctxt_hp;
MR_Word *MR_ctxt_min_hp_rec;
#endif
#ifdef MR_EXEC_TRACE_INFO_IN_CONTEXT
MR_Unsigned MR_ctxt_call_seqno;
MR_Unsigned MR_ctxt_call_depth;
MR_Unsigned MR_ctxt_event_number;
#endif
MR_ThreadLocalMuts *MR_ctxt_thread_local_mutables;
};
// The runqueue is a linked list of contexts that are runnable.
extern MR_Context *MR_runqueue_head;
extern MR_Context *MR_runqueue_tail;
#ifdef MR_THREAD_SAFE
extern MercuryLock MR_runqueue_lock;
extern MercuryCond MR_runqueue_cond;
#endif
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
extern MR_bool MR_thread_pinning;
#endif
#ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
extern MR_bool MR_profile_parallel_execution;
// XXX: This is currently unused, we plan to use it in the future. -pbone
extern MR_Stats MR_profile_parallel_executed_local_sparks;
#endif
// As well as the runqueue, we maintain a linked list of contexts
// and associated file descriptors that are suspended blocked for
// reads/writes/exceptions. When the runqueue becomes empty, if
// this list is not empty then we call select and block until one
// or more of the file descriptors become ready for I/O, then
// wake the appropriate context.
// In addition, we should periodically check to see if the list of blocked
// contexts is non-empty and if so, poll to wake any contexts that
// can unblock. This, while not yielding true fairness (since this
// requires the current context to perform some yield-like action),
// ensures that it is possible for programmers to write concurrent
// programs with continuous computation and interleaved I/O dependent
// computation in a straight-forward manner. This polling is not
// currently implemented.
typedef enum {
MR_PENDING_READ = 0x01,
MR_PENDING_WRITE = 0x02,
MR_PENDING_EXEC = 0x04
} MR_WaitingMode;
typedef struct MR_PendingContext_Struct {
struct MR_PendingContext_Struct *next;
MR_Context *context;
int fd;
MR_WaitingMode waiting_mode;
} MR_PendingContext;
extern MR_PendingContext *MR_pending_contexts;
#ifdef MR_THREAD_SAFE
extern MercuryLock MR_pending_contexts_lock;
#endif
#ifdef MR_LL_PARALLEL_CONJ
// The number of work-stealing engines waiting for work.
// We don't protect it with a separate lock, but updates to it are made while
// holding the MR_runqueue_lock. Reads are made without the lock.
// XXX We may need to use atomic instructions or memory fences on some
// architectures.
extern volatile MR_Integer MR_num_idle_ws_engines;
// Spark deques for work stealing, These are made visible so that they can
// be initialised by code in mercury_thread.c.
extern MR_SparkDeque **MR_spark_deques;
#endif // !MR_LL_PARALLEL_CONJ
////////////////////////////////////////////////////////////////////////////
#ifdef MR_THREAD_SAFE
// Return the number of processors available to this process or 0 if unknown.
// This function is not directly related to contexts, but shares code with the
// code to count the number of Mercury engines to start.
extern unsigned MR_get_num_processors(void);
#endif
////////////////////////////////////////////////////////////////////////////
// Allocates and initializes a new context structure, and gives it
// the given id. If gen is non-NULL, the context is for the given generator.
// The `MR_ctxt_thread_local_mutables' member must be initialised separately.
extern MR_Context *MR_create_context(const char *id,
MR_ContextSize ctxt_size, MR_Generator *gen);
// MR_release_context(context) returns the pointed-to context structure
// to the free list, and releases resources as necessary.
//
// VERY IMPORTANT: Call MR_save_context() before you call MR_destroy_context().
// Contexts are cached and calling MR_save_context() saves important
// book-keeping information, like the stack pointer and current stack segment.
// If you do not call these then an old, and since freed (or re-used elsewhere)
// stack segment may still be referenced by the context. If that context
// is reused later, then it will clobber another context's stack!
extern void MR_release_context(MR_Context *context);
// MR_init_context_stuff() initializes the lock structures for the runqueue,
// and detects the number of threads to use on the LLC backend.
extern void MR_init_context_stuff(void);
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
// MR_pin_thread() pins the current thread to the next available processor ID,
// if thread pinning is enabled.
// MR_pin_primordial_thread() is a special case for the primordial thread.
// It should only be executed once, and only by the primordial thread _before_
// the other threads are started.
//
// Both functions return the CPU number that the thread is pinned to or would
// be pinned to if pinning was both enabled and supported. That is a valid
// value is always returned even if the thread is not actually pinned.
extern int MR_pin_primordial_thread(void);
extern int MR_pin_thread(void);
// Free resources no longer required after thread pinning is done.
extern void MR_done_thread_pinning(void);
#endif
#ifdef MR_LL_PARALLEL_CONJ
// Shutdown all the work-stealing engines.
// (Exclusive engines shut down by themselves.)
extern void MR_shutdown_ws_engines(void);
#endif
// MR_finalize_context_stuff() finalizes the lock structures for the runqueue
// among other things setup by MR_init_context_stuff().
extern void MR_finalize_context_stuff(void);
// MR_flounder() aborts with a runtime error message. It is called if
// the runqueue becomes empty and none of the running processes are
// working, which means that the computation has floundered.
extern void MR_flounder(void);
// Relinquish the processor voluntarily without blocking.
extern void MR_sched_yield(void);
// Append the given context onto the end of the run queue.
extern void MR_schedule_context(MR_Context *ctxt);
#ifndef MR_HIGHLEVEL_CODE
// MR_idle() should be called by an engine without a context that is looking
// for more work.
MR_declare_entry(MR_do_idle);
#define MR_idle() \
do { \
MR_GOTO(MR_ENTRY(MR_do_idle)); \
} while (0)
#endif
#ifndef MR_CONSERVATIVE_GC
// To figure out the maximum amount of heap we can reclaim on backtracking,
// we compare MR_hp with the MR_ctxt_hp.
//
// If MR_ctxt_hp == NULL then this is the first time this context has been
// scheduled, so the furthest back down the heap we can reclaim is to the
// current value of MR_hp.
//
// If MR_hp > MR_ctxt_hp, another context has allocated data on the heap
// since we were last scheduled, so the furthest back that we can reclaim is
// to the current value of MR_hp, so we set MR_min_hp_rec and the
// field of the same name in our context structure.
//
// If MR_hp < MR_ctxt_hp, then another context has truncated the heap on
// failure. For this to happen, it must be the case that last time we were
// that other context was the last one to allocate data on the heap, and we
// scheduled, did not allocate any heap during that period of execution.
// That being the case, the furthest back to which we can reset the heap is
// to the current value of hp. This is a conservative approximation - it is
// possible that the current value of hp is the same as some previous value
// that we held, and we are now contiguous with our older data, so this
// algorithm will lead to holes in the heap, though GC will reclaim these.
//
// If hp == MR_ctxt_hp then no other process has allocated any heap since we
// were last scheduled, so we can proceed as if we had not stopped, and the
// furthest back that we can backtrack is the same as it was last time we
// were executing.
#define MR_set_min_heap_reclamation_point(ctxt) \
do { \
if (MR_hp != (ctxt)->MR_ctxt_hp || (ctxt)->MR_ctxt_hp == NULL) { \
MR_min_hp_rec = MR_hp; \
(ctxt)->MR_ctxt_min_hp_rec = MR_hp; \
} else { \
MR_min_hp_rec = (ctxt)->MR_ctxt_min_hp_rec; \
} \
} while (0)
#define MR_save_hp_in_context(ctxt) \
do { \
(ctxt)->MR_ctxt_hp = MR_hp; \
(ctxt)->MR_ctxt_min_hp_rec = MR_min_hp_rec; \
} while (0)
#else
#define MR_set_min_heap_reclamation_point(ctxt) do { } while (0)
#define MR_save_hp_in_context(ctxt) do { } while (0)
#endif
#ifdef MR_USE_TRAIL
#define MR_IF_USE_TRAIL(x) x
#else
#define MR_IF_USE_TRAIL(x)
#endif
#ifdef MR_USE_MINIMAL_MODEL_STACK_COPY
#define MR_IF_USE_MINIMAL_MODEL_STACK_COPY(x) x
#else
#define MR_IF_USE_MINIMAL_MODEL_STACK_COPY(x)
#endif
#ifdef MR_EXEC_TRACE_INFO_IN_CONTEXT
#define MR_IF_EXEC_TRACE_INFO_IN_CONTEXT(x) x
#else
#define MR_IF_EXEC_TRACE_INFO_IN_CONTEXT(x)
#endif
#ifndef MR_HIGHLEVEL_CODE
#define MR_IF_NOT_HIGHLEVEL_CODE(x) x
#else
#define MR_IF_NOT_HIGHLEVEL_CODE(x)
#endif
#ifdef MR_THREADSCOPE
#define MR_IF_THREADSCOPE(x) x
#else
#define MR_IF_THREADSCOPE(x)
#endif
#ifdef MR_WORKSTEAL_POLLING
#define MR_IF_NOT_WORKSTEAL_POLLING(x)
#else
#define MR_IF_NOT_WORKSTEAL_POLLING(x) x
#endif
#define MR_load_context(cptr) \
do { \
MR_Context *load_context_c; \
\
load_context_c = (cptr); \
MR_IF_NOT_HIGHLEVEL_CODE( \
MR_succip_word = (MR_Word) load_context_c->MR_ctxt_succip; \
MR_sp_word = (MR_Word) load_context_c->MR_ctxt_sp; \
MR_maxfr_word = (MR_Word) load_context_c->MR_ctxt_maxfr; \
MR_curfr_word = (MR_Word) load_context_c->MR_ctxt_curfr; \
MR_IF_USE_MINIMAL_MODEL_STACK_COPY( \
MR_gen_next = load_context_c->MR_ctxt_gen_next; \
MR_cut_next = load_context_c->MR_ctxt_cut_next; \
MR_pneg_next = load_context_c->MR_ctxt_pneg_next; \
) \
MR_IF_THREAD_SAFE( \
MR_parent_sp = load_context_c->MR_ctxt_parent_sp; \
) \
) \
MR_IF_USE_TRAIL( \
MR_IF_NOT_THREAD_SAFE( \
MR_trail_zone = load_context_c->MR_ctxt_trail_zone; \
) \
MR_IF_THREAD_SAFE( \
MR_ENGINE(MR_eng_context).MR_ctxt_trail_zone = \
load_context_c->MR_ctxt_trail_zone; \
) \
MR_trail_ptr = load_context_c->MR_ctxt_trail_ptr; \
MR_ticket_counter = load_context_c->MR_ctxt_ticket_counter; \
MR_ticket_high_water = load_context_c->MR_ctxt_ticket_high_water; \
) \
MR_IF_NOT_HIGHLEVEL_CODE( \
MR_ENGINE(MR_eng_context).MR_ctxt_detstack_zone = \
load_context_c->MR_ctxt_detstack_zone; \
MR_ENGINE(MR_eng_context).MR_ctxt_prev_detstack_zones = \
load_context_c->MR_ctxt_prev_detstack_zones; \
MR_ENGINE(MR_eng_context).MR_ctxt_nondetstack_zone = \
load_context_c->MR_ctxt_nondetstack_zone; \
MR_ENGINE(MR_eng_context).MR_ctxt_prev_nondetstack_zones = \
load_context_c->MR_ctxt_prev_nondetstack_zones; \
MR_IF_USE_MINIMAL_MODEL_STACK_COPY( \
MR_ENGINE(MR_eng_context).MR_ctxt_genstack_zone = \
load_context_c->MR_ctxt_genstack_zone; \
MR_ENGINE(MR_eng_context).MR_ctxt_cutstack_zone = \
load_context_c->MR_ctxt_cutstack_zone; \
MR_ENGINE(MR_eng_context).MR_ctxt_pnegstack_zone = \
load_context_c->MR_ctxt_pnegstack_zone; \
MR_gen_stack = (MR_GenStackFrame *) \
MR_ENGINE(MR_eng_context).MR_ctxt_genstack_zone-> \
MR_zone_min; \
MR_cut_stack = (MR_CutStackFrame *) \
MR_ENGINE(MR_eng_context).MR_ctxt_cutstack_zone-> \
MR_zone_min; \
MR_pneg_stack = (MR_PNegStackFrame *) \
MR_ENGINE(MR_eng_context).MR_ctxt_pnegstack_zone-> \
MR_zone_min; \
) \
MR_IF_EXEC_TRACE_INFO_IN_CONTEXT( \
MR_trace_call_seqno = load_context_c->MR_ctxt_call_seqno; \
MR_trace_call_depth = load_context_c->MR_ctxt_call_depth; \
MR_trace_event_number = load_context_c->MR_ctxt_event_number; \
) \
) \
MR_set_min_heap_reclamation_point(load_context_c); \
} while (0)
#define MR_save_context(cptr) \
do { \
MR_Context *save_context_c; \
\
save_context_c = (cptr); \
MR_IF_NOT_HIGHLEVEL_CODE( \
save_context_c->MR_ctxt_succip = MR_succip; \
save_context_c->MR_ctxt_sp = MR_sp; \
save_context_c->MR_ctxt_maxfr = MR_maxfr; \
save_context_c->MR_ctxt_curfr = MR_curfr; \
MR_IF_USE_MINIMAL_MODEL_STACK_COPY( \
save_context_c->MR_ctxt_gen_next = MR_gen_next; \
save_context_c->MR_ctxt_cut_next = MR_cut_next; \
save_context_c->MR_ctxt_pneg_next = MR_pneg_next; \
) \
MR_IF_THREAD_SAFE( \
save_context_c->MR_ctxt_parent_sp = MR_parent_sp; \
) \
) \
MR_IF_USE_TRAIL( \
MR_IF_NOT_THREAD_SAFE( \
save_context_c->MR_ctxt_trail_zone = MR_trail_zone; \
) \
MR_IF_THREAD_SAFE( \
save_context_c->MR_ctxt_trail_zone = \
MR_ENGINE(MR_eng_context).MR_ctxt_trail_zone; \
) \
save_context_c->MR_ctxt_trail_ptr = MR_trail_ptr; \
save_context_c->MR_ctxt_ticket_counter = MR_ticket_counter; \
save_context_c->MR_ctxt_ticket_high_water = MR_ticket_high_water; \
) \
MR_IF_NOT_HIGHLEVEL_CODE( \
save_context_c->MR_ctxt_detstack_zone = \
MR_ENGINE(MR_eng_context).MR_ctxt_detstack_zone; \
save_context_c->MR_ctxt_prev_detstack_zones = \
MR_ENGINE(MR_eng_context).MR_ctxt_prev_detstack_zones; \
save_context_c->MR_ctxt_nondetstack_zone = \
MR_ENGINE(MR_eng_context).MR_ctxt_nondetstack_zone; \
save_context_c->MR_ctxt_prev_nondetstack_zones = \
MR_ENGINE(MR_eng_context).MR_ctxt_prev_nondetstack_zones; \
MR_IF_USE_MINIMAL_MODEL_STACK_COPY( \
save_context_c->MR_ctxt_genstack_zone = \
MR_ENGINE(MR_eng_context).MR_ctxt_genstack_zone; \
save_context_c->MR_ctxt_cutstack_zone = \
MR_ENGINE(MR_eng_context).MR_ctxt_cutstack_zone; \
save_context_c->MR_ctxt_pnegstack_zone = \
MR_ENGINE(MR_eng_context).MR_ctxt_pnegstack_zone; \
MR_assert(MR_gen_stack == (MR_GenStackFrame *) \
MR_ENGINE(MR_eng_context).MR_ctxt_genstack_zone-> \
MR_zone_min); \
MR_assert(MR_cut_stack == (MR_CutStackFrame *) \
MR_ENGINE(MR_eng_context).MR_ctxt_cutstack_zone-> \
MR_zone_min); \
MR_assert(MR_pneg_stack == (MR_PNegStackFrame *) \
MR_ENGINE(MR_eng_context).MR_ctxt_pnegstack_zone-> \
MR_zone_min); \
) \
MR_IF_EXEC_TRACE_INFO_IN_CONTEXT( \
save_context_c->MR_ctxt_call_seqno = MR_trace_call_seqno; \
save_context_c->MR_ctxt_call_depth = MR_trace_call_depth; \
save_context_c->MR_ctxt_event_number = MR_trace_event_number; \
) \
) \
MR_save_hp_in_context(save_context_c); \
} while (0)
#define MR_copy_eng_this_context_fields(to_cptr, from_cptr) \
do { \
/* It wouldn't be appropriate to copy the resume field. */ \
to_cptr->MR_ctxt_thread_local_mutables = \
from_cptr->MR_ctxt_thread_local_mutables; \
/* It wouldn't be appropriate to copy the spark_deque field. */ \
/* It wouldn't be appropriate to copy the saved_owners field. */ \
} while (0)
////////////////////////////////////////////////////////////////////////////
#ifdef MR_LL_PARALLEL_CONJ
// If you change MR_SyncTerm_Struct you need to update configure.ac.
//
// MR_st_count is manipulated via atomic operations, therefore it is declared
// as volatile.
struct MR_SyncTerm_Struct {
MR_Context *MR_st_orig_context;
MR_Word *MR_st_parent_sp;
volatile MR_Unsigned MR_st_count;
};
MR_STATIC_ASSERT(mercury_context,
MR_SYNC_TERM_SIZE == MR_bytes_to_words(sizeof(struct MR_SyncTerm_Struct)));
#ifdef MR_THREADSCOPE
#define MR_init_sync_term(sync_term, nbranches, static_conj_id) \
do { \
MR_SyncTerm *init_st = (MR_SyncTerm *) &(sync_term); \
\
init_st->MR_st_orig_context = MR_ENGINE(MR_eng_this_context); \
init_st->MR_st_parent_sp = MR_parent_sp; \
init_st->MR_st_count = (nbranches); \
MR_threadscope_post_start_par_conj(&(sync_term), static_conj_id); \
} while (0)
#else
#define MR_init_sync_term(sync_term, nbranches, static_conj_id) \
do { \
MR_SyncTerm *init_st = (MR_SyncTerm *) &(sync_term); \
\
init_st->MR_st_orig_context = MR_ENGINE(MR_eng_this_context); \
init_st->MR_st_parent_sp = MR_parent_sp; \
init_st->MR_st_count = (nbranches); \
} while (0)
#endif
// fork_new_child(MR_SyncTerm st, MR_Code *child):
//
// Create a new spark to execute the code at `child'. The new spark is put
// on the context's spark queue. The current context resumes at `parent'.
// MR_parent_sp must already be set appropriately before this instruction
// is executed.
#define MR_fork_new_child(sync_term, child) \
do { \
MR_Spark fnc_spark; \
MR_SparkDeque *fnc_deque; \
MR_EngineId engine_id = MR_ENGINE(MR_eng_id); \
MR_IF_THREADSCOPE( \
MR_uint_least32_t id; \
) \
\
fnc_spark.MR_spark_sync_term = (MR_SyncTerm*) &(sync_term); \
fnc_spark.MR_spark_resume = (child); \
fnc_spark.MR_spark_thread_local_mutables = MR_THREAD_LOCAL_MUTABLES; \
MR_IF_THREADSCOPE( \
id = MR_ENGINE(MR_eng_next_spark_id)++; \
fnc_spark.MR_spark_id = (engine_id << 24)|(id & 0xFFFFFF); \
) \
fnc_deque = MR_ENGINE(MR_eng_spark_deque); \
MR_wsdeque_push_bottom(fnc_deque, &fnc_spark); \
MR_IF_THREADSCOPE( \
MR_threadscope_post_sparking(&(sync_term), fnc_spark.MR_spark_id); \
) \
MR_IF_NOT_WORKSTEAL_POLLING( \
if (MR_ENGINE(MR_eng_this_context)->MR_ctxt_exclusive_engine \
== MR_ENGINE_ID_NONE && MR_num_idle_ws_engines > 0) \
{ \
union MR_engine_wake_action_data action_data; \
action_data.MR_ewa_worksteal_engine = MR_ENGINE(MR_eng_id); \
MR_try_wake_ws_engine(MR_ENGINE(MR_eng_id), \
MR_ENGINE_ACTION_WORKSTEAL_ADVICE, \
&action_data, NULL); \
} \
) \
} while (0)
// This macro may be used as conditions for runtime parallelism decisions.
// They return nonzero when parallelism is recommended (because there are
// enough CPUs to assign work to).
//
// This test calculates the length of a wsdeque each time it is called.
// The test will usually execute more often than the length of the
// queue changes. Therefore, it makes sense to update a protected counter
// each time a spark is pushed, popped or stolen from the queue. However I
// believe that these atomic operations could be more expensive than
// necessary.
//
// The current implementation computes the length of the queue each time this
// macro is evaluated, this requires no atomic operations and contains only
// one extra memory dereference whose cache line is probably already hot in
// the first-level cache.
#define MR_par_cond_local_wsdeque_length \
(MR_wsdeque_length(MR_ENGINE(MR_eng_spark_deque)) < \
MR_granularity_wsdeque_length)
extern MR_Code*
MR_do_join_and_continue(MR_SyncTerm *sync_term, MR_Code *join_label);
#define MR_join_and_continue(sync_term, join_label) \
do { \
MR_Code *jump_target; \
jump_target = \
MR_do_join_and_continue((MR_SyncTerm*) &(sync_term), join_label); \
MR_GOTO(jump_target); \
} while (0)
// This needs to come after the definition of MR_SparkDeque_Struct.
#include "mercury_wsdeque.h"
// This structure and function can be used to wake up a sleeping engine,
// it is exported here for use by the MR_fork_new_child macro above.
#define MR_ENGINE_ACTION_NONE 0x0000
// ACTION_CONTEXT applies when an engine is being given a context directly
#define MR_ENGINE_ACTION_CONTEXT 0x0001
#define MR_ENGINE_ACTION_SHUTDOWN 0x0002
#define MR_ENGINE_ACTION_WORKSTEAL_ADVICE 0x0004
// ACTION_CONTEXT_ADVICE applies when a context is on the run queue that
// this engine should check.
#define MR_ENGINE_ACTION_CONTEXT_ADVICE 0x0008
union MR_engine_wake_action_data {
// This is provided for workstealing actions, to let the engine know
// where to look for work to steal.
MR_EngineId MR_ewa_worksteal_engine;
// This is provided for context actions.
MR_Context *MR_ewa_context;
};
// Try to wake a sleeping work-stealing engine.
//
// preferred_engine - The engine we'd like to wake up, a nearby engine will
// often be chosen so it's okay to name the current engine
// in this field.
//
// action - The action to run, see the macros above.
//
// action_data - Extra data for the action, if not applicable pass NULL.
//
// target_engine - If the call succeeds and this parameter is non-null, the
// ID of the engine that received this message is written to
// this address.
//
// This returns MR_TRUE if successful, MR_FALSE otherwise.
extern MR_bool MR_try_wake_ws_engine(MR_EngineId perferred_engine,
int action,
union MR_engine_wake_action_data *action_data,
MR_EngineId *target_engine);
extern void MR_verify_initial_engine_sleep_sync(MR_EngineId id);
extern void MR_verify_final_engine_sleep_sync(MR_EngineId id,
MR_EngineType engine_type);
#ifdef MR_DEBUG_RUNTIME_GRANULARITY_CONTROL
// These functions can be used to debug the runtime granularity control
// methods implemented above.
// decision is 1 if we choose to parallelise something and 0 if code should
// be run sequentially.
// This is not (yet) thread safe.
extern void MR_record_conditional_parallelism_decision(
MR_Unsigned decision);
// flush and close the log of conditional parallelism decisions
// This is not thread safe.
// This is a no-op if no parallelism decisions have been recorded.
extern void MR_write_out_conditional_parallelism_log(void);
#endif // MR_DEBUG_RUNTIME_GRANULARITY_CONTROL
#endif // MR_LL_PARALLEL_CONJ
#endif // not MERCURY_CONTEXT_H