mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-15 01:13:30 +00:00
Improve thread pinning:
+ Now pins threads intelligently on SMT systems by balancing threads among
cores.
+ performs fewer migrations when pinning threads (If a thread's current
CPU is a valid CPU for pinning, then it is not migrated).
+ Handle cases where the user requests more threads than available CPUs.
+ Handle cases where the process is restricted to a subset of CPUs by its
environment. (for instance, Linux cpuset(7))
This is largely made possible by the hwloc library
http://www.open-mpi.org/projects/hwloc/ However, hwloc is not required and the
runtime system will fall back to sched_setaffinity(), it will simply be less
intelligent WRT SMT.
runtime/mercury_context.h:
runtime/mercury_context.c:
Do thread pinning either via hwloc or sched_setaffinity. Previously only
sched_setaffinity was used.
Update thread-pinning algorithm, this:
Include the general thread pinning code only if MR_HAVE_THREAD_PINNING is
defined.
Use a combination of sysconf and sched_getaffinity to detect the number of
processors when hwloc isn't available. This makes the runtime compatible
with Linux cpuset(7) when hwloc isn't available.
configure.in:
Mmake.common.in:
Detect presence of the hwloc library.
configure.in:
Detect sched_getaffinity()
aclocal.m4:
acinclude.m4:
Move aclocal.m4 to acinclude.m4, the aclocal program will build aclocal.m4
and retrieve macros from the system and the contents of acinclude.m4.
Mmakefile:
Create a make target for aclocal.m4.
runtime/Mmakefile:
Link the runtime with libhwloc in low-level C parallel grades.
Include CFLAGS for libhwloc.
scripts/ml.in:
Link programs and libraries with libhwloc in low-level C parallel grades.
runtime/mercury_conf.h.in:
Define MR_HAVE_HWLOC when it is available.
Define MR_HAVE_SCHED_GETAFFINITY when it is available.
runtime/mercury_conf_param.h:
Define MR_HAVE_THREAD_PINNING if either hwloc or [sched_setaffinity and
sched_getaffinity] are available.
runtime/mercury_thread.c:
runtime/mercury_wrapper.c:
Only call MR_pin_thread and MR_pin_primordial_thread if
MR_HAVE_THREAD_PINNING is defined.
runtime/mercury_thread.h:
runtime/mercury_context.h:
Move the declaration of MR_pin_primordial_thread to mercury_context.h from
mercury_thead.h since it's definition is in mercury_context.c.
Require MR_HAVE_THREAD_PINNING for the declaration of
MR_pin_primordial_thread.
runtime/mercury_wrapper.c:
Conform to changes in mercury_context.h
INSTALL_CVS:
tools/test_mercury
Run aclocal at the right times while testing Mercury.
This commit is contained in:
@@ -9,7 +9,7 @@
|
||||
# You also need autoconf (version 2.58 or later) (and hence GNU m4)
|
||||
# installed.
|
||||
#
|
||||
# Step 0. autoconf
|
||||
# Step 0. aclocal && autoconf
|
||||
#
|
||||
# Step 1. ./configure
|
||||
#
|
||||
@@ -40,6 +40,7 @@
|
||||
|
||||
parallel=-j3
|
||||
|
||||
aclocal &&
|
||||
autoconf &&
|
||||
./configure &&
|
||||
touch Mmake.params &&
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#-----------------------------------------------------------------------------#
|
||||
# Copyright (C) 1995-2006, 2009-2010 The University of Melbourne.
|
||||
# Copyright (C) 1995-2006, 2009-2011 The University of Melbourne.
|
||||
# This file may only be copied under the terms of the GNU General
|
||||
# Public Licence - see the file COPYING in the Mercury distribution.
|
||||
#-----------------------------------------------------------------------------#
|
||||
@@ -234,6 +234,8 @@ NSL_LIBRARY=@NSL_LIBRARY@
|
||||
DL_LIBRARY=@DL_LIBRARY@
|
||||
READLINE_LIBRARIES=@READLINE_LIBRARIES@
|
||||
TERMCAP_LIBRARY=@TERMCAP_LIBRARY@
|
||||
HWLOC_CFLAGS=@HWLOC_CFLAGS@
|
||||
HWLOC_LIBS=@HWLOC_LIBS@
|
||||
|
||||
# Extensions to use
|
||||
O=@OBJ_SUFFIX@
|
||||
|
||||
@@ -370,6 +370,9 @@ cleanint:
|
||||
|
||||
#-----------------------------------------------------------------------------#
|
||||
|
||||
aclocal.m4: configure.in acinclude.m4
|
||||
aclocal
|
||||
|
||||
configure: configure.in aclocal.m4
|
||||
autoconf
|
||||
|
||||
|
||||
26
configure.in
26
configure.in
@@ -1265,7 +1265,7 @@ mercury_check_for_functions \
|
||||
grantpt unlockpt ptsname tcgetattr tcsetattr ioctl \
|
||||
access sleep opendir readdir closedir mkdir symlink readlink \
|
||||
gettimeofday setenv putenv _putenv posix_spawn sched_setaffinity \
|
||||
sched_getcpu sched_yield mkstemp
|
||||
sched_getaffinity sched_getcpu sched_yield mkstemp
|
||||
|
||||
#-----------------------------------------------------------------------------#
|
||||
|
||||
@@ -5106,6 +5106,30 @@ AC_SUBST(USE_MSVCRT)
|
||||
|
||||
MERCURY_CHECK_READLINE
|
||||
|
||||
#-----------------------------------------------------------------------------#
|
||||
#
|
||||
# Check for libhwloc, http://www.open-mpi.org/projects/hwloc/
|
||||
#
|
||||
PKG_PROG_PKG_CONFIG
|
||||
PKG_CHECK_MODULES(libhwloc, hwloc >= 1.0,
|
||||
[
|
||||
AC_DEFINE(MR_HAVE_HWLOC)
|
||||
],
|
||||
[
|
||||
case "$LIBGRADES" in
|
||||
$BEST_LLDS_BASE_GRADE.par.gc*)
|
||||
MERCURY_MSG(["Warning: libhwloc not found, thread pinning in"])
|
||||
MERCURY_MSG(["low-level C parallel grades may be less accurate."])
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
])
|
||||
HWLOC_LIBS="$libhwloc_LIBS"
|
||||
HWLOC_CFLAGS="$libhwloc_CFLAGS"
|
||||
AC_SUBST(HWLOC_LIBS)
|
||||
AC_SUBST(HWLOC_CFLAGS)
|
||||
|
||||
#-----------------------------------------------------------------------------#
|
||||
#
|
||||
# Check for flex and bison
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#-----------------------------------------------------------------------------#
|
||||
# Copyright (C) 1998-2010 The University of Melbourne.
|
||||
# Copyright (C) 1998-2011 The University of Melbourne.
|
||||
# This file may only be copied under the terms of the GNU General
|
||||
# Public License - see the file COPYING in the Mercury distribution.
|
||||
#-----------------------------------------------------------------------------#
|
||||
@@ -252,10 +252,14 @@ LDLIBS = $(SHARED_GC_LIBS)
|
||||
|
||||
THREADLIBS = \
|
||||
` case "$(GRADE)" in \
|
||||
*.par*|*.mps*) echo "-lpthread" ;; \
|
||||
esac \
|
||||
*.mps*) echo $(THREAD_LIBS) ;; \
|
||||
*.hlc.par*) echo $(THREAD_LIBS) ;; \
|
||||
*.par*) echo "$(THREAD_LIBS) $(HWLOC_LIBS)" ;; \
|
||||
esac \
|
||||
`
|
||||
|
||||
CFLAGS += $(HWLOC_CFLAGS)
|
||||
|
||||
$(HDR_CHECK_OBJS): mercury_conf.h
|
||||
|
||||
#-----------------------------------------------------------------------------#
|
||||
|
||||
@@ -274,6 +274,7 @@
|
||||
** MR_HAVE_POSIX_SPAWN we have the posix_spawn() function.
|
||||
** MR_HAVE_FESETROUND we have the fesetround() function.
|
||||
** MR_HAVE_SCHED_SETAFFINITY we have the sched_setaffinity() function.
|
||||
** MR_HAVE_SCHED_GETAFFINITY we have the sched_gettaffinity() function.
|
||||
** MR_HAVE_SCHED_GETCPU we have the sched_getcpu() function (glibc specific).
|
||||
** MR_HAVE_SCHED_YIELD we have the sched_yield() function.
|
||||
** MR_HAVE_PTHREAD_MUTEXATTR_SETPSHARED we have the
|
||||
@@ -342,6 +343,7 @@
|
||||
#undef MR_HAVE_POSIX_SPAWN
|
||||
#undef MR_HAVE_FESETROUND
|
||||
#undef MR_HAVE_SCHED_SETAFFINITY
|
||||
#undef MR_HAVE_SCHED_GETAFFINITY
|
||||
#undef MR_HAVE_SCHED_GETCPU
|
||||
#undef MR_HAVE_SCHED_YIELD
|
||||
#undef MR_HAVE_PTHREAD_MUTEXATTR_SETPSHARED
|
||||
@@ -419,6 +421,11 @@
|
||||
#undef MR_THREAD_LOCAL_STORAGE
|
||||
#undef MR_PTHREADS_WIN32
|
||||
|
||||
/*
|
||||
** MR_HAVE_HWLOC is defined if the hwloc library is available.
|
||||
*/
|
||||
#undef MR_HAVE_HWLOC
|
||||
|
||||
/*
|
||||
** The bytecode files represent floats in 64-bit IEEE format.
|
||||
**
|
||||
|
||||
@@ -1067,4 +1067,15 @@
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/*
|
||||
** MR_HAVE_THREAD_PINNING is defined if we can pin threads, either with
|
||||
** sched_setaffinity or hwloc.
|
||||
*/
|
||||
#if (defined(MR_HAVE_SCHED_SETAFFINITY) && \
|
||||
defined(MR_HAVE_SCHED_GETAFFINITY)) || defined(MR_HAVE_HWLOC)
|
||||
#define MR_HAVE_THREAD_PINNING
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
#endif /* MERCURY_CONF_PARAM_H */
|
||||
|
||||
@@ -46,6 +46,10 @@ ENDINIT
|
||||
#include <sys/timeb.h> /* for _ftime() */
|
||||
#endif
|
||||
|
||||
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_HWLOC)
|
||||
#include <hwloc.h>
|
||||
#endif
|
||||
|
||||
#include "mercury_memory_handlers.h"
|
||||
#include "mercury_context.h"
|
||||
#include "mercury_engine.h" /* for `MR_memdebug' */
|
||||
@@ -161,12 +165,19 @@ static MR_Integer MR_profile_parallel_regular_context_kept = 0;
|
||||
/*
|
||||
** Local variables for thread pinning.
|
||||
*/
|
||||
#ifdef MR_LL_PARALLEL_CONJ
|
||||
static MercuryLock MR_next_cpu_lock;
|
||||
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
|
||||
MR_bool MR_thread_pinning = MR_FALSE;
|
||||
static MR_Unsigned MR_next_cpu = 0;
|
||||
/* This is initialised the first the MR_pin_primordial_thread() is called */
|
||||
|
||||
static MercuryLock MR_thread_pinning_lock;
|
||||
static unsigned MR_num_threads_left_to_pin;
|
||||
static unsigned MR_num_processors;
|
||||
MR_Unsigned MR_primordial_thread_cpu;
|
||||
#ifdef MR_HAVE_HWLOC
|
||||
static hwloc_topology_t MR_hw_topology;
|
||||
static hwloc_cpuset_t MR_hw_available_pus = NULL;
|
||||
#else /* MR_HAVE_SCHED_SETAFFINITY */
|
||||
static cpu_set_t *MR_available_cpus;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(MR_LL_PARALLEL_CONJ) && \
|
||||
@@ -217,7 +228,7 @@ MR_SparkDeque **MR_spark_deques = NULL;
|
||||
|
||||
#ifdef MR_LL_PARALLEL_CONJ
|
||||
/*
|
||||
** Try to wake up a sleeping message and tell it to do action. The engine
|
||||
** Try to wake up a sleeping engine and tell it to do action. The engine
|
||||
** is only woken if the engine is in one of the states in the bitfield states.
|
||||
** If the engine is woken, this function returns MR_TRUE, otherwise it
|
||||
** returns MR_FALSE.
|
||||
@@ -233,9 +244,35 @@ try_wake_engine(MR_EngineId engine_id, int action,
|
||||
static void
|
||||
MR_write_out_profiling_parallel_execution(void);
|
||||
|
||||
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
|
||||
#if defined(MR_LL_PARALLEL_CONJ)
|
||||
static void
|
||||
MR_setup_thread_pinning(void);
|
||||
|
||||
static MR_bool
|
||||
MR_do_pin_thread(int cpu);
|
||||
|
||||
/*
|
||||
** Determine which CPU this thread is currently running on.
|
||||
*/
|
||||
static int
|
||||
MR_current_cpu(void);
|
||||
|
||||
/*
|
||||
** Reset or initialize the cpuset that tracks which CPUs are available for
|
||||
** binding.
|
||||
*/
|
||||
static void
|
||||
MR_reset_available_cpus(void);
|
||||
|
||||
/*
|
||||
** Mark the given CPU as unavailable for thread pinning. This may mark other
|
||||
** CPUs as unavailable, if, for instance they share resources with this
|
||||
** processor and we can place other tasks elsewhere to avoid this sharing.
|
||||
** These resources are usually only considered for hardware threads that share
|
||||
** cores.
|
||||
*/
|
||||
static void
|
||||
MR_make_cpu_unavailable(int cpu);
|
||||
#endif
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
@@ -253,9 +290,6 @@ MR_init_context_stuff(void)
|
||||
pthread_mutex_init(&free_context_list_lock, MR_MUTEX_ATTR);
|
||||
pthread_mutex_init(&MR_pending_contexts_lock, MR_MUTEX_ATTR);
|
||||
#ifdef MR_LL_PARALLEL_CONJ
|
||||
#ifdef MR_HAVE_SCHED_SETAFFINITY
|
||||
pthread_mutex_init(&MR_next_cpu_lock, MR_MUTEX_ATTR);
|
||||
#endif
|
||||
#ifdef MR_DEBUG_RUNTIME_GRANULARITY_CONTROL
|
||||
pthread_mutex_init(&MR_par_cond_stats_lock, MR_MUTEX_ATTR);
|
||||
#endif
|
||||
@@ -268,40 +302,10 @@ MR_init_context_stuff(void)
|
||||
MR_KEY_CREATE(&MR_backjump_next_choice_id_key, (void *)0);
|
||||
#endif
|
||||
|
||||
/*
|
||||
** If MR_num_threads is unset, configure it to match number of processors
|
||||
** on the system. If we do this, then we prepare to set processor
|
||||
** affinities later on.
|
||||
*/
|
||||
if (MR_num_threads == 0) {
|
||||
#if defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
|
||||
long result;
|
||||
|
||||
result = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
if (result < 1) {
|
||||
/* We couldn't determine the number of processors. */
|
||||
MR_num_threads = 1;
|
||||
} else {
|
||||
MR_num_threads = result;
|
||||
/*
|
||||
** On systems that don't support sched_setaffinity, we don't try
|
||||
** to automatically enable thread pinning. This prevents a runtime
|
||||
** warning that could unnecessarily confuse the user.
|
||||
**/
|
||||
#if defined(MR_LL_PARALLEL_CONJ) && \
|
||||
defined(MR_HAVE_SCHED_SETAFFINITY)
|
||||
/*
|
||||
** Comment this back in to enable thread pinning by default
|
||||
** if we autodetected the correct number of CPUs.
|
||||
*/
|
||||
/* MR_thread_pinning = MR_TRUE; */
|
||||
#endif
|
||||
}
|
||||
#else /* ! defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) */
|
||||
MR_num_threads = 1;
|
||||
#endif /* ! defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) */
|
||||
}
|
||||
#ifdef MR_LL_PARALLEL_CONJ
|
||||
#if defined(MR_HAVE_THREAD_PINNING)
|
||||
MR_setup_thread_pinning();
|
||||
#endif
|
||||
MR_granularity_wsdeque_length =
|
||||
MR_granularity_wsdeque_length_factor * MR_num_threads;
|
||||
|
||||
@@ -329,102 +333,353 @@ MR_init_context_stuff(void)
|
||||
** Pin the primordial thread first to the CPU it is currently using
|
||||
** (if support is available for thread pinning).
|
||||
*/
|
||||
#if defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ)
|
||||
unsigned
|
||||
MR_pin_primordial_thread(void)
|
||||
#if defined(MR_HAVE_THREAD_PINNING) && defined(MR_LL_PARALLEL_CONJ)
|
||||
static unsigned
|
||||
MR_pin_thread_no_locking(void)
|
||||
{
|
||||
unsigned cpu;
|
||||
int temp;
|
||||
unsigned i = 0;
|
||||
|
||||
/*
|
||||
** We don't need locking to pin the primordial thread as it is called
|
||||
** before any other threads exist.
|
||||
*/
|
||||
/*
|
||||
** We go through the motions of thread pinning even when thread pinning is
|
||||
** not supported as the allocation of CPUs to threads may be used later.
|
||||
*/
|
||||
#ifdef MR_HAVE_SCHED_GETCPU
|
||||
temp = sched_getcpu();
|
||||
if (temp == -1) {
|
||||
MR_primordial_thread_cpu = 0;
|
||||
#ifdef MR_HAVE_SCHED_SET_AFFINITY
|
||||
if (MR_thread_pinning) {
|
||||
perror("Warning: unable to determine the current CPU for "
|
||||
"the primordial thread: ");
|
||||
cpu = MR_current_cpu();
|
||||
#ifdef MR_DEBUG_THREAD_PINNING
|
||||
fprintf(stderr, "Currently running on cpu %d\n", cpu);
|
||||
#endif
|
||||
|
||||
for (i = 0; i < MR_num_processors && MR_thread_pinning; i++) {
|
||||
if (MR_do_pin_thread((cpu + i) % MR_num_processors)) {
|
||||
#ifdef MR_DEBUG_THREAD_PINNING
|
||||
fprintf(stderr, "Pinned to cpu %d\n", (cpu + i) % MR_num_processors);
|
||||
fprintf(stderr, "Now running on cpu %d\n", MR_current_cpu());
|
||||
#endif
|
||||
MR_num_threads_left_to_pin--;
|
||||
MR_make_cpu_unavailable((cpu + i) % MR_num_processors);
|
||||
break;
|
||||
}
|
||||
if (!MR_thread_pinning) {
|
||||
/*
|
||||
** If MR_thread_pinning becomes false then an error prevented us
|
||||
** from pinning the thread.
|
||||
** When we fail to pin a thread but MR_thread_pinning remains true
|
||||
** it means that that CPU has already had a thread pinned to it.
|
||||
*/
|
||||
fprintf(stderr, "Couldn't pin Mercury engine to processor");
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
MR_primordial_thread_cpu = temp;
|
||||
}
|
||||
#else
|
||||
MR_primordial_thread_cpu = 0;
|
||||
#endif
|
||||
#ifdef MR_HAVE_SCHED_SET_AFFINITY
|
||||
if (MR_thread_pinning) {
|
||||
MR_do_pin_thread(MR_primordial_thread_cpu);
|
||||
}
|
||||
#endif
|
||||
return MR_primordial_thread_cpu;
|
||||
return (cpu + 1) % MR_num_processors;
|
||||
}
|
||||
#endif /* defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ) */
|
||||
|
||||
#if defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ)
|
||||
unsigned
|
||||
MR_pin_thread(void)
|
||||
{
|
||||
unsigned cpu;
|
||||
|
||||
/*
|
||||
** We go through the motions of thread pinning even when thread pinning
|
||||
** is not supported, as the allocation of CPUs to threads may be
|
||||
** used later.
|
||||
*/
|
||||
MR_LOCK(&MR_next_cpu_lock, "MR_pin_thread");
|
||||
if (MR_next_cpu == MR_primordial_thread_cpu) {
|
||||
/*
|
||||
** Skip the CPU that the primordial thread was pinned on.
|
||||
*/
|
||||
MR_next_cpu++;
|
||||
}
|
||||
cpu = MR_next_cpu++;
|
||||
MR_UNLOCK(&MR_next_cpu_lock, "MR_pin_thread");
|
||||
|
||||
#ifdef MR_HAVE_SCHED_SETAFFINITY
|
||||
if (MR_thread_pinning) {
|
||||
MR_do_pin_thread(cpu);
|
||||
}
|
||||
#endif
|
||||
MR_LOCK(&MR_thread_pinning_lock, "MR_pin_thread");
|
||||
cpu = MR_pin_thread_no_locking();
|
||||
MR_UNLOCK(&MR_thread_pinning_lock, "MR_pin_thread");
|
||||
|
||||
return cpu;
|
||||
}
|
||||
#endif /* defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ) */
|
||||
|
||||
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
|
||||
static void
|
||||
void
|
||||
MR_pin_primordial_thread(void)
|
||||
{
|
||||
/*
|
||||
** We don't need locking to pin the primordial thread as it is called
|
||||
** before any other threads exist.
|
||||
*/
|
||||
MR_primordial_thread_cpu = MR_pin_thread_no_locking();
|
||||
}
|
||||
|
||||
static void MR_setup_thread_pinning(void)
|
||||
{
|
||||
unsigned num_processors;
|
||||
|
||||
#ifdef MR_HAVE_HWLOC
|
||||
if (-1 == hwloc_topology_init(&MR_hw_topology)) {
|
||||
MR_fatal_error("Error allocating libhwloc topology object");
|
||||
}
|
||||
if (-1 == hwloc_topology_load(MR_hw_topology)) {
|
||||
MR_fatal_error("Error detecting hardware topology (hwloc)");
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Setup num processors
|
||||
*/
|
||||
MR_reset_available_cpus();
|
||||
#ifdef MR_HAVE_HWLOC
|
||||
num_processors = hwloc_cpuset_weight(MR_hw_available_pus);
|
||||
#elif defined(MR_HAVE_SCHED_GETAFFINITY)
|
||||
/*
|
||||
** This looks redundant but its not. MR_num_processors is a guess that was
|
||||
** gathered by using sysconf. But the number of CPUs in the CPU_SET is the
|
||||
** actual number of CPUs that this process is restricted to.
|
||||
*/
|
||||
#if defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
|
||||
num_processors = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#else
|
||||
/*
|
||||
** The user may have supplied MR_num_processors
|
||||
*/
|
||||
num_processors = (MR_num_processors > 1 ? MR_num_processors : 1)
|
||||
#endif
|
||||
num_processors = CPU_COUNT_S(num_processors, MR_available_cpus);
|
||||
#endif
|
||||
MR_num_processors = num_processors;
|
||||
|
||||
/*
|
||||
** If MR_num_threads is unset, configure it to match number of processors
|
||||
** on the system. If we do this, then we prepare to set processor
|
||||
** affinities later on.
|
||||
*/
|
||||
if (MR_num_threads == 0) {
|
||||
MR_num_threads = num_processors;
|
||||
}
|
||||
MR_num_threads_left_to_pin = MR_num_threads;
|
||||
|
||||
#ifdef MR_DEBUG_THREAD_PINNING
|
||||
fprintf(stderr, "Detected %d available processors, will use %d threads\n",
|
||||
MR_num_processors, MR_num_threads);
|
||||
#endif
|
||||
|
||||
pthread_mutex_init(&MR_thread_pinning_lock, MR_MUTEX_ATTR);
|
||||
|
||||
/*
|
||||
** Comment this back in to enable thread pinning by default
|
||||
** if we autodetected the number of CPUs without error.
|
||||
*/
|
||||
#if 0
|
||||
if (MR_num_threads > 1) {
|
||||
MR_thread_pinning = MR_TRUE;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
** Determine which CPU this thread is currently running on.
|
||||
*/
|
||||
static int MR_current_cpu(void)
|
||||
{
|
||||
#if defined(MR_HAVE_SCHED_GETCPU)
|
||||
int os_cpu;
|
||||
#if defined(MR_HAVE_HWLOC)
|
||||
hwloc_obj_t pu;
|
||||
#endif
|
||||
|
||||
os_cpu = sched_getcpu();
|
||||
if (-1 == os_cpu) {
|
||||
os_cpu = 0;
|
||||
|
||||
if (MR_thread_pinning) {
|
||||
perror("Warning: unable to determine the current CPU for "
|
||||
"this thread: ");
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(MR_HAVE_HWLOC)
|
||||
pu = hwloc_get_pu_obj_by_os_index(MR_hw_topology, os_cpu);
|
||||
return pu->logical_index;
|
||||
#else
|
||||
return os_cpu;
|
||||
#endif
|
||||
|
||||
#else /* ! MR_HAVE_SCHED_GETCPU */
|
||||
/* We have no idea! */
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static MR_bool
|
||||
MR_do_pin_thread(int cpu)
|
||||
{
|
||||
cpu_set_t cpus;
|
||||
/*
|
||||
** Make sure that we're allowed to bind to this CPU.
|
||||
*/
|
||||
#if defined(MR_HAVE_HWLOC)
|
||||
hwloc_obj_t pu;
|
||||
|
||||
if (cpu < CPU_SETSIZE) {
|
||||
CPU_ZERO(&cpus);
|
||||
CPU_SET(cpu, &cpus);
|
||||
if (sched_setaffinity(0, sizeof(cpu_set_t), &cpus) == -1) {
|
||||
perror("Warning: Couldn't set CPU affinity: ");
|
||||
/*
|
||||
** If this failed once, it will probably fail again,
|
||||
** so we disable it.
|
||||
*/
|
||||
MR_thread_pinning = MR_FALSE;
|
||||
}
|
||||
} else {
|
||||
perror("Warning: Couldn't set CPU affinity due to a static "
|
||||
"system limit: ");
|
||||
MR_thread_pinning = MR_FALSE;
|
||||
if (hwloc_cpuset_iszero(MR_hw_available_pus)) {
|
||||
/*
|
||||
** Each available CPU already has a thread pinned to it. Reset the
|
||||
** available_pus set so that we can oversubscribe CPUs but still
|
||||
** attempt to balance load.
|
||||
*/
|
||||
MR_reset_available_cpus();
|
||||
}
|
||||
|
||||
pu = hwloc_get_obj_by_type(MR_hw_topology, HWLOC_OBJ_PU, cpu);
|
||||
if (!hwloc_cpuset_intersects(MR_hw_available_pus, pu->cpuset)) {
|
||||
return MR_FALSE;
|
||||
}
|
||||
#elif defined(MR_HAVE_SCHED_SETAFFINITY)
|
||||
if (CPU_COUNT_S(MR_num_processors, MR_available_cpus) == 0) {
|
||||
/*
|
||||
** As above, reset the available cpus.
|
||||
*/
|
||||
MR_reset_available_cpus();
|
||||
}
|
||||
if (!CPU_ISSET_S(cpu, MR_num_processors, MR_available_cpus)) {
|
||||
return MR_FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(MR_HAVE_HWLOC)
|
||||
errno = hwloc_set_cpubind(MR_hw_topology, pu->cpuset,
|
||||
HWLOC_CPUBIND_THREAD);
|
||||
if (errno != 0) {
|
||||
perror("Warning: Couldn't set CPU affinity: ");
|
||||
MR_thread_pinning = MR_FALSE;
|
||||
return MR_FALSE;
|
||||
}
|
||||
#elif defined(MR_HAVE_SCHED_SETAFFINITY)
|
||||
cpu_set_t *cpus;
|
||||
|
||||
cpus = CPU_ALLOC(MR_num_processors);
|
||||
|
||||
CPU_ZERO_S(MR_num_processors, cpus);
|
||||
CPU_SET_S(cpu, MR_num_processors, cpus);
|
||||
if (sched_setaffinity(0, CPU_ALLOC_SIZE(MR_num_processors), cpus) == -1) {
|
||||
perror("Warning: Couldn't set CPU affinity: ");
|
||||
/*
|
||||
** If this failed once, it will probably fail again,
|
||||
** so we disable it.
|
||||
*/
|
||||
MR_thread_pinning = MR_FALSE;
|
||||
return MR_FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
return MR_TRUE;
|
||||
}
|
||||
|
||||
static void MR_reset_available_cpus(void)
|
||||
{
|
||||
#if defined(MR_HAVE_HWLOC)
|
||||
hwloc_cpuset_t inherited_binding;
|
||||
|
||||
/*
|
||||
** Gather the cpuset that our parent process bound this process to.
|
||||
**
|
||||
** (For information about how to deliberately restrict a process and it's
|
||||
** sub-processors to a set of CPUs on Linux see cpuset(7).
|
||||
*/
|
||||
inherited_binding = hwloc_cpuset_alloc();
|
||||
hwloc_get_cpubind(MR_hw_topology, inherited_binding, HWLOC_CPUBIND_PROCESS);
|
||||
|
||||
/*
|
||||
** Set the available processors to the union of inherited_binding and the
|
||||
** cpuset we're allowed to use as reported by libhwloc. In my tests with
|
||||
** libhwloc_1.0-1 (Debian) hwloc reported that all cpus on the system are
|
||||
** avaliable, it didn't exclude cpus not in the processor's cpuset(7).
|
||||
*/
|
||||
if (MR_hw_available_pus == NULL) {
|
||||
MR_hw_available_pus = hwloc_cpuset_alloc();
|
||||
}
|
||||
hwloc_cpuset_and(MR_hw_available_pus, inherited_binding,
|
||||
hwloc_topology_get_allowed_cpuset(MR_hw_topology));
|
||||
|
||||
hwloc_cpuset_free(inherited_binding);
|
||||
#elif defined(MR_HAVE_SCHED_GETAFFINITY)
|
||||
unsigned num_processors;
|
||||
|
||||
#if defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
|
||||
num_processors = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#else
|
||||
/*
|
||||
** The user may have supplied MR_num_processors
|
||||
*/
|
||||
num_processors = (MR_num_processors > 1 ? MR_num_processors : 1)
|
||||
#endif
|
||||
|
||||
if (MR_available_cpus == NULL) {
|
||||
MR_available_cpus = CPU_ALLOC(num_processors);
|
||||
}
|
||||
|
||||
if (-1 == sched_getaffinity(0, CPU_ALLOC_SIZE(num_processors),
|
||||
MR_available_cpus))
|
||||
{
|
||||
perror("Couldn't get CPU affinity");
|
||||
MR_thread_pinning = MR_FALSE;
|
||||
CPU_FREE(MR_available_cpus);
|
||||
MR_available_cpus = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(MR_HAVE_HWLOC)
|
||||
static MR_bool MR_make_pu_unavailable(const struct hwloc_obj *pu);
|
||||
#endif
|
||||
|
||||
static void MR_make_cpu_unavailable(int cpu)
|
||||
{
|
||||
#if defined(MR_HAVE_HWLOC)
|
||||
hwloc_obj_t pu;
|
||||
pu = hwloc_get_obj_by_type(MR_hw_topology, HWLOC_OBJ_PU, cpu);
|
||||
MR_make_pu_unavailable(pu);
|
||||
#elif defined(MR_HAVE_SCHED_SETAFFINITY)
|
||||
CPU_CLR_S(cpu, MR_num_processors, MR_available_cpus);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(MR_HAVE_HWLOC)
|
||||
static MR_bool MR_make_pu_unavailable(const struct hwloc_obj *pu) {
|
||||
hwloc_obj_t core;
|
||||
static int siblings_to_make_unavailable;
|
||||
int i;
|
||||
|
||||
#ifdef MR_DEBUG_THREAD_PINNING
|
||||
char * cpusetstr;
|
||||
|
||||
hwloc_cpuset_asprintf(&cpusetstr, MR_hw_available_pus);
|
||||
fprintf(stderr, "Old available CPU set: %s\n", cpusetstr);
|
||||
free(cpusetstr);
|
||||
hwloc_cpuset_asprintf(&cpusetstr, pu->cpuset);
|
||||
fprintf(stderr, "Making this CPU set unavailable: %s\n", cpusetstr);
|
||||
free(cpusetstr);
|
||||
#endif
|
||||
|
||||
hwloc_cpuset_andnot(MR_hw_available_pus, MR_hw_available_pus, pu->cpuset);
|
||||
|
||||
#ifdef MR_DEBUG_THREAD_PINNING
|
||||
hwloc_cpuset_asprintf(&cpusetstr, MR_hw_available_pus);
|
||||
fprintf(stderr, "New available CPU set: %s\n", cpusetstr);
|
||||
free(cpusetstr);
|
||||
#endif
|
||||
|
||||
siblings_to_make_unavailable = hwloc_cpuset_weight(MR_hw_available_pus) -
|
||||
MR_num_threads_left_to_pin;
|
||||
|
||||
if (siblings_to_make_unavailable > 0) {
|
||||
/*
|
||||
** Remove sibling processing units that share a core with the one we've just removed.
|
||||
*/
|
||||
core = pu->parent;
|
||||
if (core->type != HWLOC_OBJ_CORE) {
|
||||
return MR_FALSE;
|
||||
}
|
||||
|
||||
for (i = 0;
|
||||
(i < core->arity && siblings_to_make_unavailable > 0);
|
||||
i++) {
|
||||
if (core->children[i] == pu) {
|
||||
continue;
|
||||
}
|
||||
if (hwloc_cpuset_intersects(core->children[i]->cpuset,
|
||||
MR_hw_available_pus)) {
|
||||
if (!MR_make_pu_unavailable(core->children[i])) {
|
||||
return MR_FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return MR_TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* MR_HAVE_THREAD_PINNING && MR_LL_PARALLEL_CONJ */
|
||||
|
||||
void
|
||||
MR_finalize_context_stuff(void)
|
||||
{
|
||||
|
||||
@@ -468,12 +468,19 @@ extern void MR_init_context_stuff(void);
|
||||
** be pinned to if pinning was both enabled and supported. That is a valid
|
||||
** value is always returned even if the thread is not actually pinned.
|
||||
*/
|
||||
#if defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ)
|
||||
extern unsigned
|
||||
#if defined(MR_LL_PARALLEL_CONJ)
|
||||
#if defined(MR_HAVE_THREAD_PINNING)
|
||||
extern void
|
||||
MR_pin_primordial_thread(void);
|
||||
extern unsigned
|
||||
MR_pin_thread(void);
|
||||
|
||||
/*
|
||||
** The CPU that the primordial thread is running on.
|
||||
*/
|
||||
extern MR_Unsigned MR_primordial_thread_cpu;
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Shutdown all the engines.
|
||||
*/
|
||||
|
||||
@@ -115,7 +115,7 @@ MR_init_thread(MR_when_to_use when_to_use)
|
||||
MercuryEngine *eng;
|
||||
|
||||
#ifdef MR_THREAD_SAFE
|
||||
#ifdef MR_LL_PARALLEL_CONJ
|
||||
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
|
||||
unsigned cpu;
|
||||
#endif
|
||||
|
||||
@@ -129,13 +129,17 @@ MR_init_thread(MR_when_to_use when_to_use)
|
||||
#ifdef MR_LL_PARALLEL_CONJ
|
||||
switch (when_to_use) {
|
||||
case MR_use_later:
|
||||
#ifdef MR_HAVE_THREAD_PINNING
|
||||
cpu = MR_pin_thread();
|
||||
#endif
|
||||
break;
|
||||
case MR_use_now:
|
||||
/*
|
||||
** Don't pin the primordial thread here, it's already been done.
|
||||
*/
|
||||
#ifdef MR_HAVE_THREAD_PINNING
|
||||
cpu = MR_primordial_thread_cpu;
|
||||
#endif
|
||||
break;
|
||||
/*
|
||||
** TODO: We may use the cpu value here to determine which CPUs which
|
||||
|
||||
@@ -211,11 +211,6 @@ MR_null_thread(void);
|
||||
*/
|
||||
extern MercuryThreadKey MR_exception_handler_key;
|
||||
|
||||
/*
|
||||
** The CPU that the primordial thread is running on.
|
||||
*/
|
||||
extern MR_Unsigned MR_primordial_thread_cpu;
|
||||
|
||||
#else /* not MR_THREAD_SAFE */
|
||||
|
||||
#define MR_LOCK(nothing, from) do { } while (0)
|
||||
|
||||
@@ -667,7 +667,9 @@ mercury_runtime_init(int argc, char **argv)
|
||||
#else
|
||||
|
||||
#if defined(MR_LL_PARALLEL_CONJ)
|
||||
#if defined(MR_HAVE_THREAD_PINNING)
|
||||
MR_pin_primordial_thread();
|
||||
#endif
|
||||
#if defined(MR_THREADSCOPE)
|
||||
/*
|
||||
** We must setup threadscope before we setup the first engine.
|
||||
|
||||
@@ -50,6 +50,7 @@ LDFLAGS_FOR_THREADS="@LDFLAGS_FOR_THREADS@"
|
||||
LDFLAGS_FOR_TRACE="@LDFLAGS_FOR_TRACE@"
|
||||
LD_LIBFLAGS_FOR_THREADS="@LD_LIBFLAGS_FOR_THREADS@"
|
||||
THREAD_LIBS="@THREAD_LIBS@"
|
||||
HWLOC_LIBS="@HWLOC_LIBS@"
|
||||
TRACE_BASE_LIBS_SYSTEM="@TRACE_BASE_LIBS_SYSTEM@"
|
||||
|
||||
TMPDIR=${TMPDIR=/tmp}
|
||||
@@ -370,6 +371,13 @@ case $use_thread_libs.$make_shared_lib in
|
||||
THREAD_LIBS=""
|
||||
;;
|
||||
esac
|
||||
case "$GRADE" in
|
||||
hlc.*.par*)
|
||||
;;
|
||||
*.par*)
|
||||
THREAD_LIBS="$THREAD_LIBS $HWLOC_LIBS"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Set the correct flags if we're to use the MS Visual C runtime.
|
||||
use_msvcrt=@USE_MSVCRT@
|
||||
|
||||
@@ -735,6 +735,7 @@ esac
|
||||
# XXX building the depend target in parallel sometimes fails so we don't
|
||||
# do that at the moment - it's probably not worth doing anyway.
|
||||
#
|
||||
aclocal || { false; exit 1; }
|
||||
autoconf || { false; exit 1; }
|
||||
rm -f config.cache
|
||||
./configure --prefix=$INSTALL_DIR $CONFIG_OPTS || { false; exit 1; }
|
||||
@@ -862,6 +863,7 @@ case $HOST in $ROTD_HOST)
|
||||
: > Mmake.params &&
|
||||
rm -f so_locations &&
|
||||
rm -f .enable_lib_grades &&
|
||||
aclocal &&
|
||||
autoconf &&
|
||||
mercury_cv_low_tag_bits=2 \
|
||||
mercury_cv_bits_per_word=32 \
|
||||
|
||||
Reference in New Issue
Block a user