mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-15 01:13:30 +00:00
configure.ac:
compiler/notes/overall_design.html:
deep_profiler/conf.m:
runtime/mercury_context.h:
runtime/mercury_goto.h:
runtime/mercury_grade.h:
runtime/mercury_regs.h:
As above -- the configure template has been named configure.ac
for a long time now.
931 lines
44 KiB
C
931 lines
44 KiB
C
// vim: ts=4 sw=4 expandtab ft=c
|
|
|
|
// Copyright (C) 1997-2007, 2009-2011 The University of Melbourne.
|
|
// Copyright (C) 2014-2016, 2018, 2020 The Mercury team.
|
|
// This file is distributed under the terms specified in COPYING.LIB.
|
|
|
|
// mercury_context.h - defines Mercury multithreading stuff.
|
|
//
|
|
// A "context" is a Mercury thread. (We use a different term than "thread"
|
|
// to avoid confusing Mercury threads and POSIX threads.)
|
|
// Each context is represented by a value of type MR_Context,
|
|
// which contains a detstack, a nondetstack, a trail (if needed), the various
|
|
// pointers that refer to them, a succip, and a thread-resumption continuation.
|
|
// Contexts are initially stored in a free-list.
|
|
// When one is running, the POSIX thread that is executing it has a pointer
|
|
// to its context structure `this_context'. (WARNING: code that manipulates
|
|
// contexts must set this_context itself; it cannot rely on the generic
|
|
// mechanisms below to set it.) When a context suspends, it calls
|
|
// `MR_save_context(context_ptr)' which copies the context from the
|
|
// various registers and global variables into the structure referred to
|
|
// by `context_ptr'. The context contains no rN or fN registers - all
|
|
// registers are "context save" (by analogy to caller-save).
|
|
//
|
|
// When a new context is created for a parallel conjunction, information is
|
|
// passed to and from the new context via the stack frame of the procedure that
|
|
// originated the parallel conjunction. The code of a parallel conjunct has
|
|
// access to that original stack frame via the `parent_sp' register.
|
|
//
|
|
// Contexts can migrate transparently between multiple POSIX threads.
|
|
//
|
|
// Each POSIX thread has its own heap and solutions heap (both allocated
|
|
// in shared memory). This makes GC harder, but enables heap allocation
|
|
// to be done without locking which is very important for performance.
|
|
// Each context has a copy of the heap pointer that is taken when it is
|
|
// switched out. If the POSIX thread's heap pointer is the same as the
|
|
// copied one when the context is switched back in, then it is safe for
|
|
// the context to do heap reclamation on failure.
|
|
//
|
|
// If MR_THREAD_SAFE is not defined, then everything gets executed within a
|
|
// single POSIX thread. No locking is required.
|
|
|
|
#ifndef MERCURY_CONTEXT_H
|
|
#define MERCURY_CONTEXT_H
|
|
|
|
#include "mercury_regs.h" // for MR_hp, etc.
|
|
// Must come before system headers.
|
|
|
|
#include <stdio.h>
|
|
|
|
#include "mercury_types.h" // for MR_Word, MR_Code, etc
|
|
#include "mercury_trail.h" // for MR_TrailEntry
|
|
#include "mercury_memory.h" // for MR_MemoryZone
|
|
#include "mercury_thread.h" // for MercuryLock
|
|
#include "mercury_goto.h" // for MR_GOTO()
|
|
#include "mercury_conf.h" // for MR_CONSERVATIVE_GC
|
|
#include "mercury_backjump.h" // for MR_BackJumpHandler, etc
|
|
#include "mercury_atomic_ops.h" // for MR_atomic_*
|
|
|
|
#ifdef MR_THREAD_SAFE
|
|
#define MR_IF_THREAD_SAFE(x) x
|
|
#define MR_IF_NOT_THREAD_SAFE(x)
|
|
#else
|
|
#define MR_IF_THREAD_SAFE(x)
|
|
#define MR_IF_NOT_THREAD_SAFE(x) x
|
|
#endif
|
|
|
|
// Each engine has one MR_Context structure loaded into it (in the engine field
|
|
// named MR_eng_context) from a context which is pointed to by the engine's
|
|
// MR_eng_this_context field. Fields which can be expected to be accessed at
|
|
// least several times between context switches are accessed via MR_eng_context
|
|
// while the rest are accessed via MR_eng_this_context (which requires
|
|
// following an extra pointer). Note that some fields are further cached
|
|
// in abstract machine registers, and some in fact are only ever accessed
|
|
// via these abstract machine registers. The saved copies of some these
|
|
// abstract machine registers are kept not in the named fields below, but in
|
|
// the engine's fake reg array.
|
|
//
|
|
// All fields accessed via MR_eng_context and via abstract machine registers
|
|
// should be mentioned in the MR_save_context and MR_load_context macros.
|
|
// All fields accessed via MR_eng_this_context should be mentioned in the
|
|
// MR_copy_eng_this_context_fields macro. All fields accessed via direct
|
|
// specification of the context need explicit code to set them in all places
|
|
// where we create new contexts: in the mercury_thread module for parallelism,
|
|
// and in the mercury_mm_own_stacks module for minimal model tabling.
|
|
//
|
|
// The context structure has the following fields. The documentation of each
|
|
// field says how it is accessed, but please take this info with a pinch of
|
|
// salt; I (zs) don't guarantee its accuracy.
|
|
//
|
|
// id A string to identify the context for humans who want to
|
|
// debug the handling of contexts.
|
|
// (Not accessed.)
|
|
//
|
|
// size Whether this context has regular-sized stacks or smaller
|
|
// stacks. Some parallel programs can allocate many contexts
|
|
// and most parallel computations should not require very
|
|
// large stacks. We allocate contexts with "smaller" stacks
|
|
// for parallel computations (although whether they are
|
|
// actually smaller is up to the user).
|
|
// (Accessed only when directly specifying the context.)
|
|
//
|
|
// next If this context is in the free-list `next' will point to
|
|
// the next free context. If this context is suspended waiting
|
|
// for a variable to become bound, `next' will point to the
|
|
// next waiting context. If this context is runnable but not
|
|
// currently running then `next' points to the next runnable
|
|
// context in the runqueue.
|
|
// (Accessed only when directly specifying the context.)
|
|
//
|
|
// exclusive_engine
|
|
// Either MR_ENGINE_ID_NONE, or else the exclusive engine
|
|
// that this context belongs to. A context with an exclusive
|
|
// engine may only be run on that engine. This restriction
|
|
// may be relaxed in the future so that it only applies when
|
|
// entering some foreign procs.
|
|
// (Accessed only when directly specifying the context.)
|
|
//
|
|
// resume A pointer to the code at which execution should resume
|
|
// when this context is next scheduled.
|
|
// (Accessed via MR_eng_this_context.)
|
|
//
|
|
// resume_engine
|
|
// When resuming a context this is the engine that it prefers
|
|
// or is required to be resumed on. Doing so can avoid cache
|
|
// misses as the engine's cache may already be warm.
|
|
// (Accessed only when directly specifying the context.)
|
|
//
|
|
// resume_engine_required
|
|
// resume_c_depth
|
|
// If resume_engine_required is MR_FALSE then resume_engine is
|
|
// simply a preference, and the resume_c_depth field has no
|
|
// meaning. If resume_engine_required is MR_TRUE then
|
|
// resume_engine and resume_c_depth must match the engine's id
|
|
// and c_depth, to ensure that when we enter a Mercury engine
|
|
// from C we return to the same engine. See the comments in
|
|
// mercury_engine.h.
|
|
// (Both accessed only when directly specifying the context.)
|
|
//
|
|
// resume_stack
|
|
// A stack used to record the Mercury engines on which this
|
|
// context executed some C calls that called back into
|
|
// Mercury. We must execute this context in the correct
|
|
// engine when returning to those C calls. See the comments
|
|
// in mercury_engine.h.
|
|
// (Accessed via MR_eng_this_context.)
|
|
//
|
|
// succip The succip for this context.
|
|
// (Accessed via abstract machine register.)
|
|
//
|
|
// detstack_zone The current detstack zone for this context.
|
|
// prev_detstack_zones
|
|
// A list of any previous detstack zones for this context.
|
|
// (Both accessed via MR_eng_context.)
|
|
// sp The saved sp for this context.
|
|
// (Accessed via abstract machine register.)
|
|
//
|
|
// nondetstack_zone The current nondetstack zone for this context.
|
|
// prev_nondetstack_zones
|
|
// A list of any previous nondetstack zones for this context.
|
|
// (Both accessed via MR_eng_context.)
|
|
// curfr The saved curfr for this context.
|
|
// maxfr The saved maxfr for this context.
|
|
// (Both accessed via abstract machine register.)
|
|
//
|
|
// genstack_zone The generator stack zone for this context.
|
|
// (Accessed via MR_eng_context.)
|
|
// gen_next The saved gen_next for this context.
|
|
// (Accessed via abstract machine register.)
|
|
//
|
|
// cutstack_zone The cut stack zone for this context.
|
|
// (Accessed via MR_eng_context.)
|
|
// cut_next The saved cut_next for this context.
|
|
// (Accessed via abstract machine register.)
|
|
//
|
|
// pnegstack_zone The possibly_negated_context stack zone for this context.
|
|
// (Accessed via MR_eng_context.)
|
|
// pneg_next The saved pneg_next for this context.
|
|
// (Accessed via abstract machine register.)
|
|
//
|
|
// parent_sp The saved parent_sp for this context.
|
|
// (Accessed via abstract machine register.)
|
|
//
|
|
// trail_zone The trail zone for this context.
|
|
// prev_trail_zones A list of any previous trail zones for this context.
|
|
// (Accessed via MR_eng_context.)
|
|
//
|
|
// trail_ptr The saved MR_trail_ptr for this context.
|
|
// ticket_counter The saved MR_ticket_counter for this context.
|
|
// ticket_highwater The saved MR_ticket_high_water for this context.
|
|
// (All accessed via abstract machine register.)
|
|
//
|
|
// backjump_handler The backjump handler for this context.
|
|
// backjump_next_choice_id The next available backjump choice id counter
|
|
// for this context.
|
|
// (All accessed via MR_eng_context.)
|
|
//
|
|
// hp The saved hp for this context.
|
|
// (Accessed via abstract machine register.)
|
|
//
|
|
// min_hp_rec This pointer marks the minimum value of MR_hp to which
|
|
// we can truncate the heap on backtracking. See comments
|
|
// before the macro MR_set_min_heap_reclamation_point below.
|
|
// (Accessed via abstract machine register.)
|
|
//
|
|
// thread_local_mutables
|
|
// The array of thread-local mutable values for this context.
|
|
// (Accessed via MR_eng_this_context.)
|
|
|
|
typedef struct MR_Context_Struct MR_Context;
|
|
|
|
typedef enum {
|
|
MR_CONTEXT_SIZE_REGULAR,
|
|
// Stack segment grades don't need differently sized contexts.
|
|
|
|
#ifndef MR_STACK_SEGMENTS
|
|
MR_CONTEXT_SIZE_SMALL
|
|
#endif
|
|
} MR_ContextSize;
|
|
|
|
#ifdef MR_STACK_SEGMENTS
|
|
#define MR_CONTEXT_SIZE_FOR_SPARK MR_CONTEXT_SIZE_REGULAR
|
|
#define MR_CONTEXT_SIZE_FOR_LOOP_CONTROL_WORKER MR_CONTEXT_SIZE_REGULAR
|
|
#else
|
|
#define MR_CONTEXT_SIZE_FOR_SPARK MR_CONTEXT_SIZE_SMALL
|
|
#define MR_CONTEXT_SIZE_FOR_LOOP_CONTROL_WORKER MR_CONTEXT_SIZE_SMALL
|
|
#endif
|
|
|
|
#ifdef MR_THREAD_SAFE
|
|
typedef struct MR_ResumeStack_Struct MR_ResumeStack;
|
|
|
|
struct MR_ResumeStack_Struct {
|
|
MR_EngineId MR_resume_engine;
|
|
MR_Unsigned MR_resume_c_depth;
|
|
MR_ResumeStack *MR_resume_stack_next;
|
|
};
|
|
#endif
|
|
|
|
#ifdef MR_LL_PARALLEL_CONJ
|
|
typedef struct MR_SyncTerm_Struct MR_SyncTerm;
|
|
typedef struct MR_Spark_Struct MR_Spark;
|
|
typedef struct MR_SparkDeque_Struct MR_SparkDeque;
|
|
typedef struct MR_SparkArray_Struct MR_SparkArray;
|
|
|
|
// A spark contains just enough information to begin execution of a parallel
|
|
// conjunct. A spark will either be executed in the same context (same
|
|
// detstack, etc.) as the code that generated the spark, or it may be stolen
|
|
// from its deque and executed by any idle engine in a different context.
|
|
|
|
struct MR_Spark_Struct {
|
|
MR_SyncTerm *MR_spark_sync_term;
|
|
MR_Code *MR_spark_resume;
|
|
MR_ThreadLocalMuts *MR_spark_thread_local_mutables;
|
|
#ifdef MR_THREADSCOPE
|
|
// XXX this is not wide enough for higher engine ids
|
|
MR_uint_least32_t MR_spark_id;
|
|
#endif
|
|
};
|
|
|
|
#define CACHE_LINE_SIZE 64
|
|
#define PAD_CACHE_LINE(s) \
|
|
((CACHE_LINE_SIZE) > (s) ? (CACHE_LINE_SIZE) - (s) : 0)
|
|
|
|
struct MR_SparkDeque_Struct {
|
|
// The top index is modified by thiefs; the other fields are modified by
|
|
// the owner. Therefore we pad out the structure to reduce false
|
|
// sharing.
|
|
|
|
volatile MR_Integer MR_sd_top;
|
|
char padding[PAD_CACHE_LINE(sizeof(MR_Integer))];
|
|
|
|
volatile MR_Integer MR_sd_bottom;
|
|
volatile MR_SparkArray *MR_sd_active_array;
|
|
};
|
|
#endif // !MR_LL_PARALLEL_CONJ
|
|
|
|
struct MR_Context_Struct {
|
|
const char *MR_ctxt_id;
|
|
#ifdef MR_THREADSCOPE
|
|
MR_Unsigned MR_ctxt_num_id;
|
|
#endif
|
|
MR_ContextSize MR_ctxt_size;
|
|
MR_Context *MR_ctxt_next;
|
|
#ifdef MR_LL_PARALLEL_CONJ
|
|
// The value of this field is used for synchronization.
|
|
MR_Code * volatile MR_ctxt_resume;
|
|
#else
|
|
MR_Code *MR_ctxt_resume;
|
|
#endif
|
|
#ifdef MR_THREAD_SAFE
|
|
MR_EngineId MR_ctxt_exclusive_engine;
|
|
MR_EngineId MR_ctxt_resume_engine;
|
|
MR_bool MR_ctxt_resume_engine_required;
|
|
MR_Unsigned MR_ctxt_resume_c_depth;
|
|
MR_ResumeStack *MR_ctxt_resume_stack;
|
|
#endif
|
|
|
|
#ifndef MR_HIGHLEVEL_CODE
|
|
MR_Code *MR_ctxt_succip;
|
|
|
|
MR_MemoryZone *MR_ctxt_detstack_zone;
|
|
MR_MemoryZones *MR_ctxt_prev_detstack_zones;
|
|
MR_Word *MR_ctxt_sp;
|
|
|
|
MR_MemoryZone *MR_ctxt_nondetstack_zone;
|
|
MR_MemoryZones *MR_ctxt_prev_nondetstack_zones;
|
|
MR_Word *MR_ctxt_maxfr;
|
|
MR_Word *MR_ctxt_curfr;
|
|
|
|
#ifdef MR_USE_MINIMAL_MODEL_STACK_COPY
|
|
MR_MemoryZone *MR_ctxt_genstack_zone;
|
|
MR_Integer MR_ctxt_gen_next;
|
|
|
|
MR_MemoryZone *MR_ctxt_cutstack_zone;
|
|
MR_Integer MR_ctxt_cut_next;
|
|
|
|
MR_MemoryZone *MR_ctxt_pnegstack_zone;
|
|
MR_Integer MR_ctxt_pneg_next;
|
|
|
|
#endif // MR_USE_MINIMAL_MODEL_STACK_COPY
|
|
#ifdef MR_USE_MINIMAL_MODEL_OWN_STACKS
|
|
MR_Generator *MR_ctxt_owner_generator;
|
|
#endif // MR_USE_MINIMAL_MODEL_OWN_STACKS
|
|
|
|
#ifdef MR_LL_PARALLEL_CONJ
|
|
MR_Word *MR_ctxt_parent_sp;
|
|
#endif
|
|
#endif // !MR_HIGHLEVEL_CODE
|
|
|
|
#ifdef MR_USE_TRAIL
|
|
MR_MemoryZone *MR_ctxt_trail_zone;
|
|
#ifndef MR_USE_FIXED_SIZE_TRAIL
|
|
MR_MemoryZones *MR_ctxt_prev_trail_zones;
|
|
#endif
|
|
MR_TrailEntry *MR_ctxt_trail_ptr;
|
|
MR_ChoicepointId MR_ctxt_ticket_counter;
|
|
MR_ChoicepointId MR_ctxt_ticket_high_water;
|
|
#endif
|
|
|
|
#ifndef MR_HIGHLEVEL_CODE
|
|
MR_BackJumpHandler *MR_ctxt_backjump_handler;
|
|
MR_BackJumpChoiceId MR_ctxt_backjump_next_choice_id;
|
|
#endif
|
|
|
|
#ifndef MR_CONSERVATIVE_GC
|
|
MR_Word *MR_ctxt_hp;
|
|
MR_Word *MR_ctxt_min_hp_rec;
|
|
#endif
|
|
|
|
#ifdef MR_EXEC_TRACE_INFO_IN_CONTEXT
|
|
MR_Unsigned MR_ctxt_call_seqno;
|
|
MR_Unsigned MR_ctxt_call_depth;
|
|
MR_Unsigned MR_ctxt_event_number;
|
|
#endif
|
|
|
|
MR_ThreadLocalMuts *MR_ctxt_thread_local_mutables;
|
|
};
|
|
|
|
// The runqueue is a linked list of contexts that are runnable.
|
|
|
|
extern MR_Context *MR_runqueue_head;
|
|
extern MR_Context *MR_runqueue_tail;
|
|
#ifdef MR_THREAD_SAFE
|
|
extern MercuryLock MR_runqueue_lock;
|
|
extern MercuryCond MR_runqueue_cond;
|
|
#endif
|
|
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
|
|
extern MR_bool MR_thread_pinning;
|
|
#endif
|
|
|
|
#ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
|
|
extern MR_bool MR_profile_parallel_execution;
|
|
|
|
// XXX: This is currently unused, we plan to use it in the future. -pbone
|
|
extern MR_Stats MR_profile_parallel_executed_local_sparks;
|
|
#endif
|
|
|
|
// As well as the runqueue, we maintain a linked list of contexts
|
|
// and associated file descriptors that are suspended blocked for
|
|
// reads/writes/exceptions. When the runqueue becomes empty, if
|
|
// this list is not empty then we call select and block until one
|
|
// or more of the file descriptors become ready for I/O, then
|
|
// wake the appropriate context.
|
|
// In addition, we should periodically check to see if the list of blocked
|
|
// contexts is non-empty and if so, poll to wake any contexts that
|
|
// can unblock. This, while not yielding true fairness (since this
|
|
// requires the current context to perform some yield-like action),
|
|
// ensures that it is possible for programmers to write concurrent
|
|
// programs with continuous computation and interleaved I/O dependent
|
|
// computation in a straight-forward manner. This polling is not
|
|
// currently implemented.
|
|
|
|
typedef enum {
|
|
MR_PENDING_READ = 0x01,
|
|
MR_PENDING_WRITE = 0x02,
|
|
MR_PENDING_EXEC = 0x04
|
|
} MR_WaitingMode;
|
|
|
|
typedef struct MR_PendingContext_Struct {
|
|
struct MR_PendingContext_Struct *next;
|
|
MR_Context *context;
|
|
int fd;
|
|
MR_WaitingMode waiting_mode;
|
|
} MR_PendingContext;
|
|
|
|
extern MR_PendingContext *MR_pending_contexts;
|
|
#ifdef MR_THREAD_SAFE
|
|
extern MercuryLock MR_pending_contexts_lock;
|
|
#endif
|
|
|
|
#ifdef MR_LL_PARALLEL_CONJ
|
|
// The number of work-stealing engines waiting for work.
|
|
// We don't protect it with a separate lock, but updates to it are made while
|
|
// holding the MR_runqueue_lock. Reads are made without the lock.
|
|
// XXX We may need to use atomic instructions or memory fences on some
|
|
// architectures.
|
|
|
|
extern volatile MR_Integer MR_num_idle_ws_engines;
|
|
|
|
// Spark deques for work stealing, These are made visible so that they can
|
|
// be initialised by code in mercury_thread.c.
|
|
|
|
extern MR_SparkDeque **MR_spark_deques;
|
|
#endif // !MR_LL_PARALLEL_CONJ
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifdef MR_THREAD_SAFE
|
|
// Return the number of processors available to this process or 0 if unknown.
|
|
// This function is not directly related to contexts, but shares code with the
|
|
// code to count the number of Mercury engines to start.
|
|
extern unsigned MR_get_num_processors(void);
|
|
#endif
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Allocates and initializes a new context structure, and gives it
|
|
// the given id. If gen is non-NULL, the context is for the given generator.
|
|
// The `MR_ctxt_thread_local_mutables' member must be initialised separately.
|
|
|
|
extern MR_Context *MR_create_context(const char *id,
|
|
MR_ContextSize ctxt_size, MR_Generator *gen);
|
|
|
|
// MR_release_context(context) returns the pointed-to context structure
|
|
// to the free list, and releases resources as necessary.
|
|
//
|
|
// VERY IMPORTANT: Call MR_save_context() before you call MR_destroy_context().
|
|
// Contexts are cached and calling MR_save_context() saves important
|
|
// book-keeping information, like the stack pointer and current stack segment.
|
|
// If you do not call these then an old, and since freed (or re-used elsewhere)
|
|
// stack segment may still be referenced by the context. If that context
|
|
// is reused later, then it will clobber another context's stack!
|
|
|
|
extern void MR_release_context(MR_Context *context);
|
|
|
|
// MR_init_context_stuff() initializes the lock structures for the runqueue,
|
|
// and detects the number of threads to use on the LLC backend.
|
|
|
|
extern void MR_init_context_stuff(void);
|
|
|
|
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
|
|
// MR_pin_thread() pins the current thread to the next available processor ID,
|
|
// if thread pinning is enabled.
|
|
// MR_pin_primordial_thread() is a special case for the primordial thread.
|
|
// It should only be executed once, and only by the primordial thread _before_
|
|
// the other threads are started.
|
|
//
|
|
// Both functions return the CPU number that the thread is pinned to or would
|
|
// be pinned to if pinning was both enabled and supported. That is a valid
|
|
// value is always returned even if the thread is not actually pinned.
|
|
extern int MR_pin_primordial_thread(void);
|
|
extern int MR_pin_thread(void);
|
|
|
|
// Free resources no longer required after thread pinning is done.
|
|
extern void MR_done_thread_pinning(void);
|
|
#endif
|
|
|
|
#ifdef MR_LL_PARALLEL_CONJ
|
|
// Shutdown all the work-stealing engines.
|
|
// (Exclusive engines shut down by themselves.)
|
|
extern void MR_shutdown_ws_engines(void);
|
|
#endif
|
|
|
|
// MR_finalize_context_stuff() finalizes the lock structures for the runqueue
|
|
// among other things setup by MR_init_context_stuff().
|
|
|
|
extern void MR_finalize_context_stuff(void);
|
|
|
|
// MR_flounder() aborts with a runtime error message. It is called if
|
|
// the runqueue becomes empty and none of the running processes are
|
|
// working, which means that the computation has floundered.
|
|
|
|
extern void MR_flounder(void);
|
|
|
|
// Relinquish the processor voluntarily without blocking.
|
|
|
|
extern void MR_sched_yield(void);
|
|
|
|
// Append the given context onto the end of the run queue.
|
|
|
|
extern void MR_schedule_context(MR_Context *ctxt);
|
|
|
|
#ifndef MR_HIGHLEVEL_CODE
|
|
// MR_idle() should be called by an engine without a context that is looking
|
|
// for more work.
|
|
|
|
MR_declare_entry(MR_do_idle);
|
|
#define MR_idle() \
|
|
do { \
|
|
MR_GOTO(MR_ENTRY(MR_do_idle)); \
|
|
} while (0)
|
|
#endif
|
|
|
|
#ifndef MR_CONSERVATIVE_GC
|
|
|
|
// To figure out the maximum amount of heap we can reclaim on backtracking,
|
|
// we compare MR_hp with the MR_ctxt_hp.
|
|
//
|
|
// If MR_ctxt_hp == NULL then this is the first time this context has been
|
|
// scheduled, so the furthest back down the heap we can reclaim is to the
|
|
// current value of MR_hp.
|
|
//
|
|
// If MR_hp > MR_ctxt_hp, another context has allocated data on the heap
|
|
// since we were last scheduled, so the furthest back that we can reclaim is
|
|
// to the current value of MR_hp, so we set MR_min_hp_rec and the
|
|
// field of the same name in our context structure.
|
|
//
|
|
// If MR_hp < MR_ctxt_hp, then another context has truncated the heap on
|
|
// failure. For this to happen, it must be the case that last time we were
|
|
// that other context was the last one to allocate data on the heap, and we
|
|
// scheduled, did not allocate any heap during that period of execution.
|
|
// That being the case, the furthest back to which we can reset the heap is
|
|
// to the current value of hp. This is a conservative approximation - it is
|
|
// possible that the current value of hp is the same as some previous value
|
|
// that we held, and we are now contiguous with our older data, so this
|
|
// algorithm will lead to holes in the heap, though GC will reclaim these.
|
|
//
|
|
// If hp == MR_ctxt_hp then no other process has allocated any heap since we
|
|
// were last scheduled, so we can proceed as if we had not stopped, and the
|
|
// furthest back that we can backtrack is the same as it was last time we
|
|
// were executing.
|
|
|
|
#define MR_set_min_heap_reclamation_point(ctxt) \
|
|
do { \
|
|
if (MR_hp != (ctxt)->MR_ctxt_hp || (ctxt)->MR_ctxt_hp == NULL) { \
|
|
MR_min_hp_rec = MR_hp; \
|
|
(ctxt)->MR_ctxt_min_hp_rec = MR_hp; \
|
|
} else { \
|
|
MR_min_hp_rec = (ctxt)->MR_ctxt_min_hp_rec; \
|
|
} \
|
|
} while (0)
|
|
|
|
#define MR_save_hp_in_context(ctxt) \
|
|
do { \
|
|
(ctxt)->MR_ctxt_hp = MR_hp; \
|
|
(ctxt)->MR_ctxt_min_hp_rec = MR_min_hp_rec; \
|
|
} while (0)
|
|
|
|
#else
|
|
|
|
#define MR_set_min_heap_reclamation_point(ctxt) do { } while (0)
|
|
|
|
#define MR_save_hp_in_context(ctxt) do { } while (0)
|
|
|
|
#endif
|
|
|
|
#ifdef MR_USE_TRAIL
|
|
#define MR_IF_USE_TRAIL(x) x
|
|
#else
|
|
#define MR_IF_USE_TRAIL(x)
|
|
#endif
|
|
|
|
#ifdef MR_USE_MINIMAL_MODEL_STACK_COPY
|
|
#define MR_IF_USE_MINIMAL_MODEL_STACK_COPY(x) x
|
|
#else
|
|
#define MR_IF_USE_MINIMAL_MODEL_STACK_COPY(x)
|
|
#endif
|
|
|
|
#ifdef MR_EXEC_TRACE_INFO_IN_CONTEXT
|
|
#define MR_IF_EXEC_TRACE_INFO_IN_CONTEXT(x) x
|
|
#else
|
|
#define MR_IF_EXEC_TRACE_INFO_IN_CONTEXT(x)
|
|
#endif
|
|
|
|
#ifndef MR_HIGHLEVEL_CODE
|
|
#define MR_IF_NOT_HIGHLEVEL_CODE(x) x
|
|
#else
|
|
#define MR_IF_NOT_HIGHLEVEL_CODE(x)
|
|
#endif
|
|
|
|
#ifdef MR_THREADSCOPE
|
|
#define MR_IF_THREADSCOPE(x) x
|
|
#else
|
|
#define MR_IF_THREADSCOPE(x)
|
|
#endif
|
|
|
|
#ifdef MR_WORKSTEAL_POLLING
|
|
#define MR_IF_NOT_WORKSTEAL_POLLING(x)
|
|
#else
|
|
#define MR_IF_NOT_WORKSTEAL_POLLING(x) x
|
|
#endif
|
|
|
|
#define MR_load_context(cptr) \
|
|
do { \
|
|
MR_Context *load_context_c; \
|
|
\
|
|
load_context_c = (cptr); \
|
|
MR_IF_NOT_HIGHLEVEL_CODE( \
|
|
MR_succip_word = (MR_Word) load_context_c->MR_ctxt_succip; \
|
|
MR_sp_word = (MR_Word) load_context_c->MR_ctxt_sp; \
|
|
MR_maxfr_word = (MR_Word) load_context_c->MR_ctxt_maxfr; \
|
|
MR_curfr_word = (MR_Word) load_context_c->MR_ctxt_curfr; \
|
|
MR_IF_USE_MINIMAL_MODEL_STACK_COPY( \
|
|
MR_gen_next = load_context_c->MR_ctxt_gen_next; \
|
|
MR_cut_next = load_context_c->MR_ctxt_cut_next; \
|
|
MR_pneg_next = load_context_c->MR_ctxt_pneg_next; \
|
|
) \
|
|
MR_IF_THREAD_SAFE( \
|
|
MR_parent_sp = load_context_c->MR_ctxt_parent_sp; \
|
|
) \
|
|
) \
|
|
MR_IF_USE_TRAIL( \
|
|
MR_IF_NOT_THREAD_SAFE( \
|
|
MR_trail_zone = load_context_c->MR_ctxt_trail_zone; \
|
|
) \
|
|
MR_IF_THREAD_SAFE( \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_trail_zone = \
|
|
load_context_c->MR_ctxt_trail_zone; \
|
|
) \
|
|
MR_trail_ptr = load_context_c->MR_ctxt_trail_ptr; \
|
|
MR_ticket_counter = load_context_c->MR_ctxt_ticket_counter; \
|
|
MR_ticket_high_water = load_context_c->MR_ctxt_ticket_high_water; \
|
|
) \
|
|
MR_IF_NOT_HIGHLEVEL_CODE( \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_detstack_zone = \
|
|
load_context_c->MR_ctxt_detstack_zone; \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_prev_detstack_zones = \
|
|
load_context_c->MR_ctxt_prev_detstack_zones; \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_nondetstack_zone = \
|
|
load_context_c->MR_ctxt_nondetstack_zone; \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_prev_nondetstack_zones = \
|
|
load_context_c->MR_ctxt_prev_nondetstack_zones; \
|
|
MR_IF_USE_MINIMAL_MODEL_STACK_COPY( \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_genstack_zone = \
|
|
load_context_c->MR_ctxt_genstack_zone; \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_cutstack_zone = \
|
|
load_context_c->MR_ctxt_cutstack_zone; \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_pnegstack_zone = \
|
|
load_context_c->MR_ctxt_pnegstack_zone; \
|
|
MR_gen_stack = (MR_GenStackFrame *) \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_genstack_zone-> \
|
|
MR_zone_min; \
|
|
MR_cut_stack = (MR_CutStackFrame *) \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_cutstack_zone-> \
|
|
MR_zone_min; \
|
|
MR_pneg_stack = (MR_PNegStackFrame *) \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_pnegstack_zone-> \
|
|
MR_zone_min; \
|
|
) \
|
|
MR_IF_EXEC_TRACE_INFO_IN_CONTEXT( \
|
|
MR_trace_call_seqno = load_context_c->MR_ctxt_call_seqno; \
|
|
MR_trace_call_depth = load_context_c->MR_ctxt_call_depth; \
|
|
MR_trace_event_number = load_context_c->MR_ctxt_event_number; \
|
|
) \
|
|
) \
|
|
MR_set_min_heap_reclamation_point(load_context_c); \
|
|
} while (0)
|
|
|
|
#define MR_save_context(cptr) \
|
|
do { \
|
|
MR_Context *save_context_c; \
|
|
\
|
|
save_context_c = (cptr); \
|
|
MR_IF_NOT_HIGHLEVEL_CODE( \
|
|
save_context_c->MR_ctxt_succip = MR_succip; \
|
|
save_context_c->MR_ctxt_sp = MR_sp; \
|
|
save_context_c->MR_ctxt_maxfr = MR_maxfr; \
|
|
save_context_c->MR_ctxt_curfr = MR_curfr; \
|
|
MR_IF_USE_MINIMAL_MODEL_STACK_COPY( \
|
|
save_context_c->MR_ctxt_gen_next = MR_gen_next; \
|
|
save_context_c->MR_ctxt_cut_next = MR_cut_next; \
|
|
save_context_c->MR_ctxt_pneg_next = MR_pneg_next; \
|
|
) \
|
|
MR_IF_THREAD_SAFE( \
|
|
save_context_c->MR_ctxt_parent_sp = MR_parent_sp; \
|
|
) \
|
|
) \
|
|
MR_IF_USE_TRAIL( \
|
|
MR_IF_NOT_THREAD_SAFE( \
|
|
save_context_c->MR_ctxt_trail_zone = MR_trail_zone; \
|
|
) \
|
|
MR_IF_THREAD_SAFE( \
|
|
save_context_c->MR_ctxt_trail_zone = \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_trail_zone; \
|
|
) \
|
|
save_context_c->MR_ctxt_trail_ptr = MR_trail_ptr; \
|
|
save_context_c->MR_ctxt_ticket_counter = MR_ticket_counter; \
|
|
save_context_c->MR_ctxt_ticket_high_water = MR_ticket_high_water; \
|
|
) \
|
|
MR_IF_NOT_HIGHLEVEL_CODE( \
|
|
save_context_c->MR_ctxt_detstack_zone = \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_detstack_zone; \
|
|
save_context_c->MR_ctxt_prev_detstack_zones = \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_prev_detstack_zones; \
|
|
save_context_c->MR_ctxt_nondetstack_zone = \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_nondetstack_zone; \
|
|
save_context_c->MR_ctxt_prev_nondetstack_zones = \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_prev_nondetstack_zones; \
|
|
MR_IF_USE_MINIMAL_MODEL_STACK_COPY( \
|
|
save_context_c->MR_ctxt_genstack_zone = \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_genstack_zone; \
|
|
save_context_c->MR_ctxt_cutstack_zone = \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_cutstack_zone; \
|
|
save_context_c->MR_ctxt_pnegstack_zone = \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_pnegstack_zone; \
|
|
MR_assert(MR_gen_stack == (MR_GenStackFrame *) \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_genstack_zone-> \
|
|
MR_zone_min); \
|
|
MR_assert(MR_cut_stack == (MR_CutStackFrame *) \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_cutstack_zone-> \
|
|
MR_zone_min); \
|
|
MR_assert(MR_pneg_stack == (MR_PNegStackFrame *) \
|
|
MR_ENGINE(MR_eng_context).MR_ctxt_pnegstack_zone-> \
|
|
MR_zone_min); \
|
|
) \
|
|
MR_IF_EXEC_TRACE_INFO_IN_CONTEXT( \
|
|
save_context_c->MR_ctxt_call_seqno = MR_trace_call_seqno; \
|
|
save_context_c->MR_ctxt_call_depth = MR_trace_call_depth; \
|
|
save_context_c->MR_ctxt_event_number = MR_trace_event_number; \
|
|
) \
|
|
) \
|
|
MR_save_hp_in_context(save_context_c); \
|
|
} while (0)
|
|
|
|
#define MR_copy_eng_this_context_fields(to_cptr, from_cptr) \
|
|
do { \
|
|
/* It wouldn't be appropriate to copy the resume field. */ \
|
|
to_cptr->MR_ctxt_thread_local_mutables = \
|
|
from_cptr->MR_ctxt_thread_local_mutables; \
|
|
/* It wouldn't be appropriate to copy the spark_deque field. */ \
|
|
/* It wouldn't be appropriate to copy the saved_owners field. */ \
|
|
} while (0)
|
|
|
|
////////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifdef MR_LL_PARALLEL_CONJ
|
|
|
|
// If you change MR_SyncTerm_Struct you need to update configure.ac.
|
|
//
|
|
// MR_st_count is manipulated via atomic operations, therefore it is declared
|
|
// as volatile.
|
|
|
|
struct MR_SyncTerm_Struct {
|
|
MR_Context *MR_st_orig_context;
|
|
MR_Word *MR_st_parent_sp;
|
|
volatile MR_Unsigned MR_st_count;
|
|
};
|
|
|
|
MR_STATIC_ASSERT(mercury_context,
|
|
MR_SYNC_TERM_SIZE == MR_bytes_to_words(sizeof(struct MR_SyncTerm_Struct)));
|
|
|
|
#ifdef MR_THREADSCOPE
|
|
#define MR_init_sync_term(sync_term, nbranches, static_conj_id) \
|
|
do { \
|
|
MR_SyncTerm *init_st = (MR_SyncTerm *) &(sync_term); \
|
|
\
|
|
init_st->MR_st_orig_context = MR_ENGINE(MR_eng_this_context); \
|
|
init_st->MR_st_parent_sp = MR_parent_sp; \
|
|
init_st->MR_st_count = (nbranches); \
|
|
MR_threadscope_post_start_par_conj(&(sync_term), static_conj_id); \
|
|
} while (0)
|
|
#else
|
|
#define MR_init_sync_term(sync_term, nbranches, static_conj_id) \
|
|
do { \
|
|
MR_SyncTerm *init_st = (MR_SyncTerm *) &(sync_term); \
|
|
\
|
|
init_st->MR_st_orig_context = MR_ENGINE(MR_eng_this_context); \
|
|
init_st->MR_st_parent_sp = MR_parent_sp; \
|
|
init_st->MR_st_count = (nbranches); \
|
|
} while (0)
|
|
#endif
|
|
|
|
// fork_new_child(MR_SyncTerm st, MR_Code *child):
|
|
//
|
|
// Create a new spark to execute the code at `child'. The new spark is put
|
|
// on the context's spark queue. The current context resumes at `parent'.
|
|
// MR_parent_sp must already be set appropriately before this instruction
|
|
// is executed.
|
|
|
|
#define MR_fork_new_child(sync_term, child) \
|
|
do { \
|
|
MR_Spark fnc_spark; \
|
|
MR_SparkDeque *fnc_deque; \
|
|
MR_EngineId engine_id = MR_ENGINE(MR_eng_id); \
|
|
MR_IF_THREADSCOPE( \
|
|
MR_uint_least32_t id; \
|
|
) \
|
|
\
|
|
fnc_spark.MR_spark_sync_term = (MR_SyncTerm*) &(sync_term); \
|
|
fnc_spark.MR_spark_resume = (child); \
|
|
fnc_spark.MR_spark_thread_local_mutables = MR_THREAD_LOCAL_MUTABLES; \
|
|
MR_IF_THREADSCOPE( \
|
|
id = MR_ENGINE(MR_eng_next_spark_id)++; \
|
|
fnc_spark.MR_spark_id = (engine_id << 24)|(id & 0xFFFFFF); \
|
|
) \
|
|
fnc_deque = MR_ENGINE(MR_eng_spark_deque); \
|
|
MR_wsdeque_push_bottom(fnc_deque, &fnc_spark); \
|
|
MR_IF_THREADSCOPE( \
|
|
MR_threadscope_post_sparking(&(sync_term), fnc_spark.MR_spark_id); \
|
|
) \
|
|
MR_IF_NOT_WORKSTEAL_POLLING( \
|
|
if (MR_ENGINE(MR_eng_this_context)->MR_ctxt_exclusive_engine \
|
|
== MR_ENGINE_ID_NONE && MR_num_idle_ws_engines > 0) \
|
|
{ \
|
|
union MR_engine_wake_action_data action_data; \
|
|
action_data.MR_ewa_worksteal_engine = MR_ENGINE(MR_eng_id); \
|
|
MR_try_wake_ws_engine(MR_ENGINE(MR_eng_id), \
|
|
MR_ENGINE_ACTION_WORKSTEAL_ADVICE, \
|
|
&action_data, NULL); \
|
|
} \
|
|
) \
|
|
} while (0)
|
|
|
|
// This macro may be used as conditions for runtime parallelism decisions.
|
|
// They return nonzero when parallelism is recommended (because there are
|
|
// enough CPUs to assign work to).
|
|
//
|
|
// This test calculates the length of a wsdeque each time it is called.
|
|
// The test will usually execute more often than the length of the
|
|
// queue changes. Therefore, it makes sense to update a protected counter
|
|
// each time a spark is pushed, popped or stolen from the queue. However I
|
|
// believe that these atomic operations could be more expensive than
|
|
// necessary.
|
|
//
|
|
// The current implementation computes the length of the queue each time this
|
|
// macro is evaluated, this requires no atomic operations and contains only
|
|
// one extra memory dereference whose cache line is probably already hot in
|
|
// the first-level cache.
|
|
|
|
#define MR_par_cond_local_wsdeque_length \
|
|
(MR_wsdeque_length(MR_ENGINE(MR_eng_spark_deque)) < \
|
|
MR_granularity_wsdeque_length)
|
|
|
|
extern MR_Code*
|
|
MR_do_join_and_continue(MR_SyncTerm *sync_term, MR_Code *join_label);
|
|
|
|
#define MR_join_and_continue(sync_term, join_label) \
|
|
do { \
|
|
MR_Code *jump_target; \
|
|
jump_target = \
|
|
MR_do_join_and_continue((MR_SyncTerm*) &(sync_term), join_label); \
|
|
MR_GOTO(jump_target); \
|
|
} while (0)
|
|
|
|
// This needs to come after the definition of MR_SparkDeque_Struct.
|
|
#include "mercury_wsdeque.h"
|
|
|
|
// This structure and function can be used to wake up a sleeping engine,
|
|
// it is exported here for use by the MR_fork_new_child macro above.
|
|
|
|
#define MR_ENGINE_ACTION_NONE 0x0000
|
|
// ACTION_CONTEXT applies when an engine is being given a context directly
|
|
|
|
#define MR_ENGINE_ACTION_CONTEXT 0x0001
|
|
#define MR_ENGINE_ACTION_SHUTDOWN 0x0002
|
|
#define MR_ENGINE_ACTION_WORKSTEAL_ADVICE 0x0004
|
|
// ACTION_CONTEXT_ADVICE applies when a context is on the run queue that
|
|
// this engine should check.
|
|
|
|
#define MR_ENGINE_ACTION_CONTEXT_ADVICE 0x0008
|
|
|
|
union MR_engine_wake_action_data {
|
|
// This is provided for workstealing actions, to let the engine know
|
|
// where to look for work to steal.
|
|
|
|
MR_EngineId MR_ewa_worksteal_engine;
|
|
|
|
// This is provided for context actions.
|
|
|
|
MR_Context *MR_ewa_context;
|
|
};
|
|
|
|
// Try to wake a sleeping work-stealing engine.
|
|
//
|
|
// preferred_engine - The engine we'd like to wake up, a nearby engine will
|
|
// often be chosen so it's okay to name the current engine
|
|
// in this field.
|
|
//
|
|
// action - The action to run, see the macros above.
|
|
//
|
|
// action_data - Extra data for the action, if not applicable pass NULL.
|
|
//
|
|
// target_engine - If the call succeeds and this parameter is non-null, the
|
|
// ID of the engine that received this message is written to
|
|
// this address.
|
|
//
|
|
// This returns MR_TRUE if successful, MR_FALSE otherwise.
|
|
|
|
extern MR_bool MR_try_wake_ws_engine(MR_EngineId perferred_engine,
|
|
int action,
|
|
union MR_engine_wake_action_data *action_data,
|
|
MR_EngineId *target_engine);
|
|
|
|
extern void MR_verify_initial_engine_sleep_sync(MR_EngineId id);
|
|
|
|
extern void MR_verify_final_engine_sleep_sync(MR_EngineId id,
|
|
MR_EngineType engine_type);
|
|
|
|
#ifdef MR_DEBUG_RUNTIME_GRANULARITY_CONTROL
|
|
|
|
// These functions can be used to debug the runtime granularity control
|
|
// methods implemented above.
|
|
|
|
// decision is 1 if we choose to parallelise something and 0 if code should
|
|
// be run sequentially.
|
|
// This is not (yet) thread safe.
|
|
|
|
extern void MR_record_conditional_parallelism_decision(
|
|
MR_Unsigned decision);
|
|
|
|
// flush and close the log of conditional parallelism decisions
|
|
// This is not thread safe.
|
|
// This is a no-op if no parallelism decisions have been recorded.
|
|
|
|
extern void MR_write_out_conditional_parallelism_log(void);
|
|
|
|
#endif // MR_DEBUG_RUNTIME_GRANULARITY_CONTROL
|
|
|
|
#endif // MR_LL_PARALLEL_CONJ
|
|
|
|
#endif // not MERCURY_CONTEXT_H
|