Improve thread pinning:

+ Now pins threads intelligently on SMT systems by balancing threads among
      cores.
    + performs fewer migrations when pinning threads (If a thread's current
      CPU is a valid CPU for pinning, then it is not migrated).
    + Handle cases where the user requests more threads than available CPUs.
    + Handle cases where the process is restricted to a subset of CPUs by its
      environment.  (for instance, Linux cpuset(7))

This is largely made possible by the hwloc library
http://www.open-mpi.org/projects/hwloc/  However, hwloc is not required and the
runtime system will fall back to sched_setaffinity(), it will simply be less
intelligent WRT SMT.

runtime/mercury_context.h:
runtime/mercury_context.c:
    Do thread pinning either via hwloc or sched_setaffinity.  Previously only
    sched_setaffinity was used.

    Update thread-pinning algorithm, this:

    Include the general thread pinning code only if MR_HAVE_THREAD_PINNING is
    defined.

    Use a combination of sysconf and sched_getaffinity to detect the number of
    processors when hwloc isn't available.  This makes the runtime compatible
    with Linux cpuset(7) when hwloc isn't available.

configure.in:
Mmake.common.in:
    Detect presence of the hwloc library.

configure.in:
    Detect sched_getaffinity()

aclocal.m4:
acinclude.m4:
    Move aclocal.m4 to acinclude.m4, the aclocal program will build aclocal.m4
    and retrieve macros from the system and the contents of acinclude.m4.

Mmakefile:
    Create a make target for aclocal.m4.

runtime/Mmakefile:
    Link the runtime with libhwloc in low-level C parallel grades.

    Include CFLAGS for libhwloc.

scripts/ml.in:
    Link programs and libraries with libhwloc in low-level C parallel grades.

runtime/mercury_conf.h.in:
    Define MR_HAVE_HWLOC when it is available.

    Define MR_HAVE_SCHED_GETAFFINITY when it is available.

runtime/mercury_conf_param.h:
    Define MR_HAVE_THREAD_PINNING if either hwloc or [sched_setaffinity and
    sched_getaffinity] are available.

runtime/mercury_thread.c:
runtime/mercury_wrapper.c:
    Only call MR_pin_thread and MR_pin_primordial_thread if
    MR_HAVE_THREAD_PINNING is defined.

runtime/mercury_thread.h:
runtime/mercury_context.h:
    Move the declaration of MR_pin_primordial_thread to mercury_context.h from
    mercury_thead.h since it's definition is in mercury_context.c.

    Require MR_HAVE_THREAD_PINNING for the declaration of
    MR_pin_primordial_thread.

runtime/mercury_wrapper.c:
    Conform to changes in mercury_context.h

INSTALL_CVS:
tools/test_mercury
    Run aclocal at the right times while testing Mercury.
This commit is contained in:
Paul Bone
2011-10-13 02:42:21 +00:00
parent 1d0cd8d04f
commit a071eaba53
15 changed files with 454 additions and 129 deletions

View File

@@ -9,7 +9,7 @@
# You also need autoconf (version 2.58 or later) (and hence GNU m4)
# installed.
#
# Step 0. autoconf
# Step 0. aclocal && autoconf
#
# Step 1. ./configure
#
@@ -40,6 +40,7 @@
parallel=-j3
aclocal &&
autoconf &&
./configure &&
touch Mmake.params &&

View File

@@ -1,5 +1,5 @@
#-----------------------------------------------------------------------------#
# Copyright (C) 1995-2006, 2009-2010 The University of Melbourne.
# Copyright (C) 1995-2006, 2009-2011 The University of Melbourne.
# This file may only be copied under the terms of the GNU General
# Public Licence - see the file COPYING in the Mercury distribution.
#-----------------------------------------------------------------------------#
@@ -234,6 +234,8 @@ NSL_LIBRARY=@NSL_LIBRARY@
DL_LIBRARY=@DL_LIBRARY@
READLINE_LIBRARIES=@READLINE_LIBRARIES@
TERMCAP_LIBRARY=@TERMCAP_LIBRARY@
HWLOC_CFLAGS=@HWLOC_CFLAGS@
HWLOC_LIBS=@HWLOC_LIBS@
# Extensions to use
O=@OBJ_SUFFIX@

View File

@@ -370,6 +370,9 @@ cleanint:
#-----------------------------------------------------------------------------#
aclocal.m4: configure.in acinclude.m4
aclocal
configure: configure.in aclocal.m4
autoconf

View File

@@ -1265,7 +1265,7 @@ mercury_check_for_functions \
grantpt unlockpt ptsname tcgetattr tcsetattr ioctl \
access sleep opendir readdir closedir mkdir symlink readlink \
gettimeofday setenv putenv _putenv posix_spawn sched_setaffinity \
sched_getcpu sched_yield mkstemp
sched_getaffinity sched_getcpu sched_yield mkstemp
#-----------------------------------------------------------------------------#
@@ -5106,6 +5106,30 @@ AC_SUBST(USE_MSVCRT)
MERCURY_CHECK_READLINE
#-----------------------------------------------------------------------------#
#
# Check for libhwloc, http://www.open-mpi.org/projects/hwloc/
#
PKG_PROG_PKG_CONFIG
PKG_CHECK_MODULES(libhwloc, hwloc >= 1.0,
[
AC_DEFINE(MR_HAVE_HWLOC)
],
[
case "$LIBGRADES" in
$BEST_LLDS_BASE_GRADE.par.gc*)
MERCURY_MSG(["Warning: libhwloc not found, thread pinning in"])
MERCURY_MSG(["low-level C parallel grades may be less accurate."])
;;
*)
;;
esac
])
HWLOC_LIBS="$libhwloc_LIBS"
HWLOC_CFLAGS="$libhwloc_CFLAGS"
AC_SUBST(HWLOC_LIBS)
AC_SUBST(HWLOC_CFLAGS)
#-----------------------------------------------------------------------------#
#
# Check for flex and bison

View File

@@ -1,5 +1,5 @@
#-----------------------------------------------------------------------------#
# Copyright (C) 1998-2010 The University of Melbourne.
# Copyright (C) 1998-2011 The University of Melbourne.
# This file may only be copied under the terms of the GNU General
# Public License - see the file COPYING in the Mercury distribution.
#-----------------------------------------------------------------------------#
@@ -252,10 +252,14 @@ LDLIBS = $(SHARED_GC_LIBS)
THREADLIBS = \
` case "$(GRADE)" in \
*.par*|*.mps*) echo "-lpthread" ;; \
esac \
*.mps*) echo $(THREAD_LIBS) ;; \
*.hlc.par*) echo $(THREAD_LIBS) ;; \
*.par*) echo "$(THREAD_LIBS) $(HWLOC_LIBS)" ;; \
esac \
`
CFLAGS += $(HWLOC_CFLAGS)
$(HDR_CHECK_OBJS): mercury_conf.h
#-----------------------------------------------------------------------------#

View File

@@ -274,6 +274,7 @@
** MR_HAVE_POSIX_SPAWN we have the posix_spawn() function.
** MR_HAVE_FESETROUND we have the fesetround() function.
** MR_HAVE_SCHED_SETAFFINITY we have the sched_setaffinity() function.
** MR_HAVE_SCHED_GETAFFINITY we have the sched_gettaffinity() function.
** MR_HAVE_SCHED_GETCPU we have the sched_getcpu() function (glibc specific).
** MR_HAVE_SCHED_YIELD we have the sched_yield() function.
** MR_HAVE_PTHREAD_MUTEXATTR_SETPSHARED we have the
@@ -342,6 +343,7 @@
#undef MR_HAVE_POSIX_SPAWN
#undef MR_HAVE_FESETROUND
#undef MR_HAVE_SCHED_SETAFFINITY
#undef MR_HAVE_SCHED_GETAFFINITY
#undef MR_HAVE_SCHED_GETCPU
#undef MR_HAVE_SCHED_YIELD
#undef MR_HAVE_PTHREAD_MUTEXATTR_SETPSHARED
@@ -419,6 +421,11 @@
#undef MR_THREAD_LOCAL_STORAGE
#undef MR_PTHREADS_WIN32
/*
** MR_HAVE_HWLOC is defined if the hwloc library is available.
*/
#undef MR_HAVE_HWLOC
/*
** The bytecode files represent floats in 64-bit IEEE format.
**

View File

@@ -1067,4 +1067,15 @@
/*---------------------------------------------------------------------------*/
/*
** MR_HAVE_THREAD_PINNING is defined if we can pin threads, either with
** sched_setaffinity or hwloc.
*/
#if (defined(MR_HAVE_SCHED_SETAFFINITY) && \
defined(MR_HAVE_SCHED_GETAFFINITY)) || defined(MR_HAVE_HWLOC)
#define MR_HAVE_THREAD_PINNING
#endif
/*---------------------------------------------------------------------------*/
#endif /* MERCURY_CONF_PARAM_H */

View File

@@ -46,6 +46,10 @@ ENDINIT
#include <sys/timeb.h> /* for _ftime() */
#endif
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_HWLOC)
#include <hwloc.h>
#endif
#include "mercury_memory_handlers.h"
#include "mercury_context.h"
#include "mercury_engine.h" /* for `MR_memdebug' */
@@ -161,12 +165,19 @@ static MR_Integer MR_profile_parallel_regular_context_kept = 0;
/*
** Local variables for thread pinning.
*/
#ifdef MR_LL_PARALLEL_CONJ
static MercuryLock MR_next_cpu_lock;
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
MR_bool MR_thread_pinning = MR_FALSE;
static MR_Unsigned MR_next_cpu = 0;
/* This is initialised the first the MR_pin_primordial_thread() is called */
static MercuryLock MR_thread_pinning_lock;
static unsigned MR_num_threads_left_to_pin;
static unsigned MR_num_processors;
MR_Unsigned MR_primordial_thread_cpu;
#ifdef MR_HAVE_HWLOC
static hwloc_topology_t MR_hw_topology;
static hwloc_cpuset_t MR_hw_available_pus = NULL;
#else /* MR_HAVE_SCHED_SETAFFINITY */
static cpu_set_t *MR_available_cpus;
#endif
#endif
#if defined(MR_LL_PARALLEL_CONJ) && \
@@ -217,7 +228,7 @@ MR_SparkDeque **MR_spark_deques = NULL;
#ifdef MR_LL_PARALLEL_CONJ
/*
** Try to wake up a sleeping message and tell it to do action. The engine
** Try to wake up a sleeping engine and tell it to do action. The engine
** is only woken if the engine is in one of the states in the bitfield states.
** If the engine is woken, this function returns MR_TRUE, otherwise it
** returns MR_FALSE.
@@ -233,9 +244,35 @@ try_wake_engine(MR_EngineId engine_id, int action,
static void
MR_write_out_profiling_parallel_execution(void);
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
#if defined(MR_LL_PARALLEL_CONJ)
static void
MR_setup_thread_pinning(void);
static MR_bool
MR_do_pin_thread(int cpu);
/*
** Determine which CPU this thread is currently running on.
*/
static int
MR_current_cpu(void);
/*
** Reset or initialize the cpuset that tracks which CPUs are available for
** binding.
*/
static void
MR_reset_available_cpus(void);
/*
** Mark the given CPU as unavailable for thread pinning. This may mark other
** CPUs as unavailable, if, for instance they share resources with this
** processor and we can place other tasks elsewhere to avoid this sharing.
** These resources are usually only considered for hardware threads that share
** cores.
*/
static void
MR_make_cpu_unavailable(int cpu);
#endif
/*---------------------------------------------------------------------------*/
@@ -253,9 +290,6 @@ MR_init_context_stuff(void)
pthread_mutex_init(&free_context_list_lock, MR_MUTEX_ATTR);
pthread_mutex_init(&MR_pending_contexts_lock, MR_MUTEX_ATTR);
#ifdef MR_LL_PARALLEL_CONJ
#ifdef MR_HAVE_SCHED_SETAFFINITY
pthread_mutex_init(&MR_next_cpu_lock, MR_MUTEX_ATTR);
#endif
#ifdef MR_DEBUG_RUNTIME_GRANULARITY_CONTROL
pthread_mutex_init(&MR_par_cond_stats_lock, MR_MUTEX_ATTR);
#endif
@@ -268,40 +302,10 @@ MR_init_context_stuff(void)
MR_KEY_CREATE(&MR_backjump_next_choice_id_key, (void *)0);
#endif
/*
** If MR_num_threads is unset, configure it to match number of processors
** on the system. If we do this, then we prepare to set processor
** affinities later on.
*/
if (MR_num_threads == 0) {
#if defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
long result;
result = sysconf(_SC_NPROCESSORS_ONLN);
if (result < 1) {
/* We couldn't determine the number of processors. */
MR_num_threads = 1;
} else {
MR_num_threads = result;
/*
** On systems that don't support sched_setaffinity, we don't try
** to automatically enable thread pinning. This prevents a runtime
** warning that could unnecessarily confuse the user.
**/
#if defined(MR_LL_PARALLEL_CONJ) && \
defined(MR_HAVE_SCHED_SETAFFINITY)
/*
** Comment this back in to enable thread pinning by default
** if we autodetected the correct number of CPUs.
*/
/* MR_thread_pinning = MR_TRUE; */
#endif
}
#else /* ! defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) */
MR_num_threads = 1;
#endif /* ! defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) */
}
#ifdef MR_LL_PARALLEL_CONJ
#if defined(MR_HAVE_THREAD_PINNING)
MR_setup_thread_pinning();
#endif
MR_granularity_wsdeque_length =
MR_granularity_wsdeque_length_factor * MR_num_threads;
@@ -329,102 +333,353 @@ MR_init_context_stuff(void)
** Pin the primordial thread first to the CPU it is currently using
** (if support is available for thread pinning).
*/
#if defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ)
unsigned
MR_pin_primordial_thread(void)
#if defined(MR_HAVE_THREAD_PINNING) && defined(MR_LL_PARALLEL_CONJ)
static unsigned
MR_pin_thread_no_locking(void)
{
unsigned cpu;
int temp;
unsigned i = 0;
/*
** We don't need locking to pin the primordial thread as it is called
** before any other threads exist.
*/
/*
** We go through the motions of thread pinning even when thread pinning is
** not supported as the allocation of CPUs to threads may be used later.
*/
#ifdef MR_HAVE_SCHED_GETCPU
temp = sched_getcpu();
if (temp == -1) {
MR_primordial_thread_cpu = 0;
#ifdef MR_HAVE_SCHED_SET_AFFINITY
if (MR_thread_pinning) {
perror("Warning: unable to determine the current CPU for "
"the primordial thread: ");
cpu = MR_current_cpu();
#ifdef MR_DEBUG_THREAD_PINNING
fprintf(stderr, "Currently running on cpu %d\n", cpu);
#endif
for (i = 0; i < MR_num_processors && MR_thread_pinning; i++) {
if (MR_do_pin_thread((cpu + i) % MR_num_processors)) {
#ifdef MR_DEBUG_THREAD_PINNING
fprintf(stderr, "Pinned to cpu %d\n", (cpu + i) % MR_num_processors);
fprintf(stderr, "Now running on cpu %d\n", MR_current_cpu());
#endif
MR_num_threads_left_to_pin--;
MR_make_cpu_unavailable((cpu + i) % MR_num_processors);
break;
}
if (!MR_thread_pinning) {
/*
** If MR_thread_pinning becomes false then an error prevented us
** from pinning the thread.
** When we fail to pin a thread but MR_thread_pinning remains true
** it means that that CPU has already had a thread pinned to it.
*/
fprintf(stderr, "Couldn't pin Mercury engine to processor");
break;
}
#endif
} else {
MR_primordial_thread_cpu = temp;
}
#else
MR_primordial_thread_cpu = 0;
#endif
#ifdef MR_HAVE_SCHED_SET_AFFINITY
if (MR_thread_pinning) {
MR_do_pin_thread(MR_primordial_thread_cpu);
}
#endif
return MR_primordial_thread_cpu;
return (cpu + 1) % MR_num_processors;
}
#endif /* defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ) */
#if defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ)
unsigned
MR_pin_thread(void)
{
unsigned cpu;
/*
** We go through the motions of thread pinning even when thread pinning
** is not supported, as the allocation of CPUs to threads may be
** used later.
*/
MR_LOCK(&MR_next_cpu_lock, "MR_pin_thread");
if (MR_next_cpu == MR_primordial_thread_cpu) {
/*
** Skip the CPU that the primordial thread was pinned on.
*/
MR_next_cpu++;
}
cpu = MR_next_cpu++;
MR_UNLOCK(&MR_next_cpu_lock, "MR_pin_thread");
#ifdef MR_HAVE_SCHED_SETAFFINITY
if (MR_thread_pinning) {
MR_do_pin_thread(cpu);
}
#endif
MR_LOCK(&MR_thread_pinning_lock, "MR_pin_thread");
cpu = MR_pin_thread_no_locking();
MR_UNLOCK(&MR_thread_pinning_lock, "MR_pin_thread");
return cpu;
}
#endif /* defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ) */
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
static void
void
MR_pin_primordial_thread(void)
{
/*
** We don't need locking to pin the primordial thread as it is called
** before any other threads exist.
*/
MR_primordial_thread_cpu = MR_pin_thread_no_locking();
}
static void MR_setup_thread_pinning(void)
{
unsigned num_processors;
#ifdef MR_HAVE_HWLOC
if (-1 == hwloc_topology_init(&MR_hw_topology)) {
MR_fatal_error("Error allocating libhwloc topology object");
}
if (-1 == hwloc_topology_load(MR_hw_topology)) {
MR_fatal_error("Error detecting hardware topology (hwloc)");
}
#endif
/*
** Setup num processors
*/
MR_reset_available_cpus();
#ifdef MR_HAVE_HWLOC
num_processors = hwloc_cpuset_weight(MR_hw_available_pus);
#elif defined(MR_HAVE_SCHED_GETAFFINITY)
/*
** This looks redundant but its not. MR_num_processors is a guess that was
** gathered by using sysconf. But the number of CPUs in the CPU_SET is the
** actual number of CPUs that this process is restricted to.
*/
#if defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
num_processors = sysconf(_SC_NPROCESSORS_ONLN);
#else
/*
** The user may have supplied MR_num_processors
*/
num_processors = (MR_num_processors > 1 ? MR_num_processors : 1)
#endif
num_processors = CPU_COUNT_S(num_processors, MR_available_cpus);
#endif
MR_num_processors = num_processors;
/*
** If MR_num_threads is unset, configure it to match number of processors
** on the system. If we do this, then we prepare to set processor
** affinities later on.
*/
if (MR_num_threads == 0) {
MR_num_threads = num_processors;
}
MR_num_threads_left_to_pin = MR_num_threads;
#ifdef MR_DEBUG_THREAD_PINNING
fprintf(stderr, "Detected %d available processors, will use %d threads\n",
MR_num_processors, MR_num_threads);
#endif
pthread_mutex_init(&MR_thread_pinning_lock, MR_MUTEX_ATTR);
/*
** Comment this back in to enable thread pinning by default
** if we autodetected the number of CPUs without error.
*/
#if 0
if (MR_num_threads > 1) {
MR_thread_pinning = MR_TRUE;
}
#endif
}
/*
** Determine which CPU this thread is currently running on.
*/
static int MR_current_cpu(void)
{
#if defined(MR_HAVE_SCHED_GETCPU)
int os_cpu;
#if defined(MR_HAVE_HWLOC)
hwloc_obj_t pu;
#endif
os_cpu = sched_getcpu();
if (-1 == os_cpu) {
os_cpu = 0;
if (MR_thread_pinning) {
perror("Warning: unable to determine the current CPU for "
"this thread: ");
}
}
#if defined(MR_HAVE_HWLOC)
pu = hwloc_get_pu_obj_by_os_index(MR_hw_topology, os_cpu);
return pu->logical_index;
#else
return os_cpu;
#endif
#else /* ! MR_HAVE_SCHED_GETCPU */
/* We have no idea! */
return 0;
#endif
}
static MR_bool
MR_do_pin_thread(int cpu)
{
cpu_set_t cpus;
/*
** Make sure that we're allowed to bind to this CPU.
*/
#if defined(MR_HAVE_HWLOC)
hwloc_obj_t pu;
if (cpu < CPU_SETSIZE) {
CPU_ZERO(&cpus);
CPU_SET(cpu, &cpus);
if (sched_setaffinity(0, sizeof(cpu_set_t), &cpus) == -1) {
perror("Warning: Couldn't set CPU affinity: ");
/*
** If this failed once, it will probably fail again,
** so we disable it.
*/
MR_thread_pinning = MR_FALSE;
}
} else {
perror("Warning: Couldn't set CPU affinity due to a static "
"system limit: ");
MR_thread_pinning = MR_FALSE;
if (hwloc_cpuset_iszero(MR_hw_available_pus)) {
/*
** Each available CPU already has a thread pinned to it. Reset the
** available_pus set so that we can oversubscribe CPUs but still
** attempt to balance load.
*/
MR_reset_available_cpus();
}
pu = hwloc_get_obj_by_type(MR_hw_topology, HWLOC_OBJ_PU, cpu);
if (!hwloc_cpuset_intersects(MR_hw_available_pus, pu->cpuset)) {
return MR_FALSE;
}
#elif defined(MR_HAVE_SCHED_SETAFFINITY)
if (CPU_COUNT_S(MR_num_processors, MR_available_cpus) == 0) {
/*
** As above, reset the available cpus.
*/
MR_reset_available_cpus();
}
if (!CPU_ISSET_S(cpu, MR_num_processors, MR_available_cpus)) {
return MR_FALSE;
}
#endif
#if defined(MR_HAVE_HWLOC)
errno = hwloc_set_cpubind(MR_hw_topology, pu->cpuset,
HWLOC_CPUBIND_THREAD);
if (errno != 0) {
perror("Warning: Couldn't set CPU affinity: ");
MR_thread_pinning = MR_FALSE;
return MR_FALSE;
}
#elif defined(MR_HAVE_SCHED_SETAFFINITY)
cpu_set_t *cpus;
cpus = CPU_ALLOC(MR_num_processors);
CPU_ZERO_S(MR_num_processors, cpus);
CPU_SET_S(cpu, MR_num_processors, cpus);
if (sched_setaffinity(0, CPU_ALLOC_SIZE(MR_num_processors), cpus) == -1) {
perror("Warning: Couldn't set CPU affinity: ");
/*
** If this failed once, it will probably fail again,
** so we disable it.
*/
MR_thread_pinning = MR_FALSE;
return MR_FALSE;
}
#endif
return MR_TRUE;
}
static void MR_reset_available_cpus(void)
{
#if defined(MR_HAVE_HWLOC)
hwloc_cpuset_t inherited_binding;
/*
** Gather the cpuset that our parent process bound this process to.
**
** (For information about how to deliberately restrict a process and it's
** sub-processors to a set of CPUs on Linux see cpuset(7).
*/
inherited_binding = hwloc_cpuset_alloc();
hwloc_get_cpubind(MR_hw_topology, inherited_binding, HWLOC_CPUBIND_PROCESS);
/*
** Set the available processors to the union of inherited_binding and the
** cpuset we're allowed to use as reported by libhwloc. In my tests with
** libhwloc_1.0-1 (Debian) hwloc reported that all cpus on the system are
** avaliable, it didn't exclude cpus not in the processor's cpuset(7).
*/
if (MR_hw_available_pus == NULL) {
MR_hw_available_pus = hwloc_cpuset_alloc();
}
hwloc_cpuset_and(MR_hw_available_pus, inherited_binding,
hwloc_topology_get_allowed_cpuset(MR_hw_topology));
hwloc_cpuset_free(inherited_binding);
#elif defined(MR_HAVE_SCHED_GETAFFINITY)
unsigned num_processors;
#if defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
num_processors = sysconf(_SC_NPROCESSORS_ONLN);
#else
/*
** The user may have supplied MR_num_processors
*/
num_processors = (MR_num_processors > 1 ? MR_num_processors : 1)
#endif
if (MR_available_cpus == NULL) {
MR_available_cpus = CPU_ALLOC(num_processors);
}
if (-1 == sched_getaffinity(0, CPU_ALLOC_SIZE(num_processors),
MR_available_cpus))
{
perror("Couldn't get CPU affinity");
MR_thread_pinning = MR_FALSE;
CPU_FREE(MR_available_cpus);
MR_available_cpus = NULL;
}
#endif
}
#if defined(MR_HAVE_HWLOC)
static MR_bool MR_make_pu_unavailable(const struct hwloc_obj *pu);
#endif
static void MR_make_cpu_unavailable(int cpu)
{
#if defined(MR_HAVE_HWLOC)
hwloc_obj_t pu;
pu = hwloc_get_obj_by_type(MR_hw_topology, HWLOC_OBJ_PU, cpu);
MR_make_pu_unavailable(pu);
#elif defined(MR_HAVE_SCHED_SETAFFINITY)
CPU_CLR_S(cpu, MR_num_processors, MR_available_cpus);
#endif
}
#if defined(MR_HAVE_HWLOC)
static MR_bool MR_make_pu_unavailable(const struct hwloc_obj *pu) {
hwloc_obj_t core;
static int siblings_to_make_unavailable;
int i;
#ifdef MR_DEBUG_THREAD_PINNING
char * cpusetstr;
hwloc_cpuset_asprintf(&cpusetstr, MR_hw_available_pus);
fprintf(stderr, "Old available CPU set: %s\n", cpusetstr);
free(cpusetstr);
hwloc_cpuset_asprintf(&cpusetstr, pu->cpuset);
fprintf(stderr, "Making this CPU set unavailable: %s\n", cpusetstr);
free(cpusetstr);
#endif
hwloc_cpuset_andnot(MR_hw_available_pus, MR_hw_available_pus, pu->cpuset);
#ifdef MR_DEBUG_THREAD_PINNING
hwloc_cpuset_asprintf(&cpusetstr, MR_hw_available_pus);
fprintf(stderr, "New available CPU set: %s\n", cpusetstr);
free(cpusetstr);
#endif
siblings_to_make_unavailable = hwloc_cpuset_weight(MR_hw_available_pus) -
MR_num_threads_left_to_pin;
if (siblings_to_make_unavailable > 0) {
/*
** Remove sibling processing units that share a core with the one we've just removed.
*/
core = pu->parent;
if (core->type != HWLOC_OBJ_CORE) {
return MR_FALSE;
}
for (i = 0;
(i < core->arity && siblings_to_make_unavailable > 0);
i++) {
if (core->children[i] == pu) {
continue;
}
if (hwloc_cpuset_intersects(core->children[i]->cpuset,
MR_hw_available_pus)) {
if (!MR_make_pu_unavailable(core->children[i])) {
return MR_FALSE;
}
}
}
}
return MR_TRUE;
}
#endif
#endif /* MR_HAVE_THREAD_PINNING && MR_LL_PARALLEL_CONJ */
void
MR_finalize_context_stuff(void)
{

View File

@@ -468,12 +468,19 @@ extern void MR_init_context_stuff(void);
** be pinned to if pinning was both enabled and supported. That is a valid
** value is always returned even if the thread is not actually pinned.
*/
#if defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ)
extern unsigned
#if defined(MR_LL_PARALLEL_CONJ)
#if defined(MR_HAVE_THREAD_PINNING)
extern void
MR_pin_primordial_thread(void);
extern unsigned
MR_pin_thread(void);
/*
** The CPU that the primordial thread is running on.
*/
extern MR_Unsigned MR_primordial_thread_cpu;
#endif
/*
** Shutdown all the engines.
*/

View File

@@ -115,7 +115,7 @@ MR_init_thread(MR_when_to_use when_to_use)
MercuryEngine *eng;
#ifdef MR_THREAD_SAFE
#ifdef MR_LL_PARALLEL_CONJ
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
unsigned cpu;
#endif
@@ -129,13 +129,17 @@ MR_init_thread(MR_when_to_use when_to_use)
#ifdef MR_LL_PARALLEL_CONJ
switch (when_to_use) {
case MR_use_later:
#ifdef MR_HAVE_THREAD_PINNING
cpu = MR_pin_thread();
#endif
break;
case MR_use_now:
/*
** Don't pin the primordial thread here, it's already been done.
*/
#ifdef MR_HAVE_THREAD_PINNING
cpu = MR_primordial_thread_cpu;
#endif
break;
/*
** TODO: We may use the cpu value here to determine which CPUs which

View File

@@ -211,11 +211,6 @@ MR_null_thread(void);
*/
extern MercuryThreadKey MR_exception_handler_key;
/*
** The CPU that the primordial thread is running on.
*/
extern MR_Unsigned MR_primordial_thread_cpu;
#else /* not MR_THREAD_SAFE */
#define MR_LOCK(nothing, from) do { } while (0)

View File

@@ -667,7 +667,9 @@ mercury_runtime_init(int argc, char **argv)
#else
#if defined(MR_LL_PARALLEL_CONJ)
#if defined(MR_HAVE_THREAD_PINNING)
MR_pin_primordial_thread();
#endif
#if defined(MR_THREADSCOPE)
/*
** We must setup threadscope before we setup the first engine.

View File

@@ -50,6 +50,7 @@ LDFLAGS_FOR_THREADS="@LDFLAGS_FOR_THREADS@"
LDFLAGS_FOR_TRACE="@LDFLAGS_FOR_TRACE@"
LD_LIBFLAGS_FOR_THREADS="@LD_LIBFLAGS_FOR_THREADS@"
THREAD_LIBS="@THREAD_LIBS@"
HWLOC_LIBS="@HWLOC_LIBS@"
TRACE_BASE_LIBS_SYSTEM="@TRACE_BASE_LIBS_SYSTEM@"
TMPDIR=${TMPDIR=/tmp}
@@ -370,6 +371,13 @@ case $use_thread_libs.$make_shared_lib in
THREAD_LIBS=""
;;
esac
case "$GRADE" in
hlc.*.par*)
;;
*.par*)
THREAD_LIBS="$THREAD_LIBS $HWLOC_LIBS"
;;
esac
# Set the correct flags if we're to use the MS Visual C runtime.
use_msvcrt=@USE_MSVCRT@

View File

@@ -735,6 +735,7 @@ esac
# XXX building the depend target in parallel sometimes fails so we don't
# do that at the moment - it's probably not worth doing anyway.
#
aclocal || { false; exit 1; }
autoconf || { false; exit 1; }
rm -f config.cache
./configure --prefix=$INSTALL_DIR $CONFIG_OPTS || { false; exit 1; }
@@ -862,6 +863,7 @@ case $HOST in $ROTD_HOST)
: > Mmake.params &&
rm -f so_locations &&
rm -f .enable_lib_grades &&
aclocal &&
autoconf &&
mercury_cv_low_tag_bits=2 \
mercury_cv_bits_per_word=32 \