mirror of
https://github.com/openbsd/src.git
synced 2026-04-24 06:04:47 +00:00
Some new intel machines have a new 3rd tier of cpus called LP-E which are
E-core (Atom) without L3 cache. These cpus are Lethargic, and it sucks when processes migrate to them. This introduces sysctl hw.blockcpu= which takes a sequence of 4 letters. S (for SMT), P (regular performance cpu), E (efficient cpu) generally 80% to 50% as fast), and L (lethargic cpu) which are even slower. By setting this, you can select cpus to kick out of the scheduler. The default is SL. The hw.smt sysctl remains for now but we will eventually delete it. hw.smt changes and follows hw.blockcpu=S. ok kettenis mlarkin
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
.\" $OpenBSD: sysctl.2,v 1.70 2025/09/16 09:19:43 florian Exp $
|
||||
.\" $OpenBSD: sysctl.2,v 1.71 2026/03/31 16:46:22 deraadt Exp $
|
||||
.\"
|
||||
.\" Copyright (c) 1993
|
||||
.\" The Regents of the University of California. All rights reserved.
|
||||
@@ -27,7 +27,7 @@
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.Dd $Mdocdate: September 16 2025 $
|
||||
.Dd $Mdocdate: March 31 2026 $
|
||||
.Dt SYSCTL 2
|
||||
.Os
|
||||
.Sh NAME
|
||||
@@ -274,6 +274,7 @@ privileges may change the value.
|
||||
.It Dv HW_SETPERF Ta "integer" Ta "yes"
|
||||
.It Dv HW_SMT Ta "integer" Ta "yes"
|
||||
.It Dv HW_UCOMNAMES Ta "string" Ta "no"
|
||||
.It Dv HW_BLOCKCPU Ta "string" Ta "yes"
|
||||
.It Dv HW_USERMEM Ta "integer" Ta "no"
|
||||
.It Dv HW_USERMEM64 Ta "int64_t" Ta "no"
|
||||
.It Dv HW_UUID Ta "string" Ta "no"
|
||||
@@ -421,7 +422,21 @@ is set to
|
||||
.It Dv HW_SMT Pq Va hw.smt
|
||||
If set to 1, enable simultaneous multithreading (SMT) on CPUs that
|
||||
support it.
|
||||
Disabled by default.
|
||||
Deprecated, use
|
||||
.Va hw.blockcpu
|
||||
instead.
|
||||
.It Dv HW_BLOCKCPU Pq Va hw.smt
|
||||
A series of characters
|
||||
.Ar ( S
|
||||
(SMT),
|
||||
.Ar P
|
||||
(Performance core),
|
||||
.Ar E
|
||||
(Efficiency core),
|
||||
and
|
||||
.Ar L
|
||||
(Lethargic, Low Power Efficiency core)
|
||||
representing cpu types to avoid schedule tasks onto.
|
||||
.It Dv HW_UCOMNAMES Pq Va hw.ucomnames
|
||||
A comma-separated list of currently attached
|
||||
.Xr ucom 4
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: identcpu.c,v 1.152 2025/09/14 15:52:28 mlarkin Exp $ */
|
||||
/* $OpenBSD: identcpu.c,v 1.153 2026/03/31 16:46:22 deraadt Exp $ */
|
||||
/* $NetBSD: identcpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $ */
|
||||
|
||||
/*
|
||||
@@ -831,6 +831,7 @@ cpu_topology(struct cpu_info *ci)
|
||||
u_int32_t apicid, max_apicid = 0, max_coreid = 0;
|
||||
u_int32_t smt_bits = 0, core_bits, pkg_bits = 0;
|
||||
u_int32_t smt_mask = 0, core_mask, pkg_mask = 0;
|
||||
char type[8], *typ = type;
|
||||
|
||||
/* We need at least apicid at CPUID 1 */
|
||||
if (ci->ci_cpuid_level < 1)
|
||||
@@ -865,7 +866,10 @@ cpu_topology(struct cpu_info *ci)
|
||||
|
||||
/* Cut logical thread_id into core id, and smt id in a core */
|
||||
ci->ci_core_id = thread_id / nthreads;
|
||||
ci->ci_smt_id = thread_id % nthreads;
|
||||
if (ci->ci_smt_id) {
|
||||
ci->ci_cputype |= CPUTYP_SMT;
|
||||
*typ++ = 'S';
|
||||
}
|
||||
} else if (ci->ci_vendor == CPUV_INTEL) {
|
||||
/* We only support leaf 1/4 detection */
|
||||
if (ci->ci_cpuid_level < 4)
|
||||
@@ -888,10 +892,36 @@ cpu_topology(struct cpu_info *ci)
|
||||
pkg_mask = ~0U << core_bits;
|
||||
|
||||
ci->ci_smt_id = apicid & smt_mask;
|
||||
if (ci->ci_smt_id) {
|
||||
ci->ci_cputype |= CPUTYP_SMT;
|
||||
*typ++ = 'S';
|
||||
}
|
||||
ci->ci_core_id = (apicid & core_mask) >> smt_bits;
|
||||
ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits;
|
||||
|
||||
if (ci->ci_cpuid_level >= 0x1a) {
|
||||
CPUID_LEAF(0x1a, 0, eax, ebx, ecx, edx);
|
||||
if ((eax >> 24) == 0x20) {
|
||||
CPUID_LEAF(4, 3, eax, ebx, ecx, edx);
|
||||
if (eax == 0) {
|
||||
/* No L3 cache is classified as Lethargic */
|
||||
ci->ci_cputype |= CPUTYP_L;
|
||||
*typ++ = 'L';
|
||||
} else {
|
||||
ci->ci_cputype |= CPUTYP_E;
|
||||
*typ++ = 'E';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else
|
||||
goto no_topology;
|
||||
if ((ci->ci_cputype & (CPUTYP_E | CPUTYP_L)) == 0) {
|
||||
ci->ci_cputype |= CPUTYP_P;
|
||||
*typ++ = 'P';
|
||||
}
|
||||
*typ ='\0';
|
||||
|
||||
#ifdef DEBUG
|
||||
printf("cpu%d: smt %u, core %u, pkg %u "
|
||||
"(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, smt_mask 0x%x, "
|
||||
@@ -900,14 +930,15 @@ cpu_topology(struct cpu_info *ci)
|
||||
apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits,
|
||||
core_mask, pkg_bits, pkg_mask);
|
||||
#else
|
||||
printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid,
|
||||
ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id);
|
||||
printf("cpu%d: smt %u, core %u, package %u, type %s\n", ci->ci_cpuid,
|
||||
ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id, type);
|
||||
|
||||
#endif
|
||||
return;
|
||||
/* We can't map, so consider ci_core_id as ci_cpuid */
|
||||
no_topology:
|
||||
#endif
|
||||
ci->ci_cputype = CPUTYP_P;
|
||||
ci->ci_smt_id = 0;
|
||||
ci->ci_core_id = ci->ci_cpuid;
|
||||
ci->ci_pkg_id = 0;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: machdep.c,v 1.307 2026/03/11 16:18:42 kettenis Exp $ */
|
||||
/* $OpenBSD: machdep.c,v 1.308 2026/03/31 16:46:22 deraadt Exp $ */
|
||||
/* $NetBSD: machdep.c,v 1.3 2003/05/07 22:58:18 fvdl Exp $ */
|
||||
|
||||
/*-
|
||||
@@ -1483,6 +1483,8 @@ init_x86_64(paddr_t first_avail)
|
||||
int x, ist;
|
||||
uint64_t max_dm_size = ((uint64_t)512 * NUM_L4_SLOT_DIRECT) << 30;
|
||||
|
||||
sched_blockcpu = CPUTYP_SMT | CPUTYP_L;
|
||||
|
||||
/*
|
||||
* locore0 mapped 2 pages for use as GHCB before pmap is initialized.
|
||||
*/
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: cpu.h,v 1.184 2026/03/11 16:18:42 kettenis Exp $ */
|
||||
/* $OpenBSD: cpu.h,v 1.185 2026/03/31 16:46:22 deraadt Exp $ */
|
||||
/* $NetBSD: cpu.h,v 1.1 2003/04/26 18:39:39 fvdl Exp $ */
|
||||
|
||||
/*-
|
||||
@@ -188,6 +188,7 @@ struct cpu_info {
|
||||
int ci_inatomic; /* [o] */
|
||||
|
||||
#define __HAVE_CPU_TOPOLOGY
|
||||
u_int32_t ci_cputype; /* [I] */
|
||||
u_int32_t ci_smt_id; /* [I] */
|
||||
u_int32_t ci_core_id; /* [I] */
|
||||
u_int32_t ci_pkg_id; /* [I] */
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: kern_sched.c,v 1.113 2025/06/12 20:37:58 deraadt Exp $ */
|
||||
/* $OpenBSD: kern_sched.c,v 1.114 2026/03/31 16:46:22 deraadt Exp $ */
|
||||
/*
|
||||
* Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
|
||||
*
|
||||
@@ -54,8 +54,9 @@ uint64_t sched_stolen; /* Times we stole proc from other cpus */
|
||||
uint64_t sched_choose; /* Times we chose a cpu */
|
||||
uint64_t sched_wasidle; /* Times we came out of idle */
|
||||
|
||||
/* Only schedule processes on sibling CPU threads when true. */
|
||||
int sched_smt;
|
||||
#ifdef __HAVE_CPU_TOPOLOGY
|
||||
int sched_blockcpu; /* Types of cpu to not schedule on */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* A few notes about cpu_switchto that is implemented in MD code.
|
||||
@@ -153,7 +154,7 @@ sched_idle(void *v)
|
||||
* After that just go away and properly reenter once idle.
|
||||
*/
|
||||
#ifdef __HAVE_CPU_TOPOLOGY
|
||||
if (sched_smt || ci->ci_smt_id == 0)
|
||||
if ((ci->ci_cputype & sched_blockcpu) == 0)
|
||||
cpuset_add(&sched_all_cpus, ci);
|
||||
#else
|
||||
cpuset_add(&sched_all_cpus, ci);
|
||||
@@ -659,7 +660,7 @@ sched_start_secondary_cpus(void)
|
||||
atomic_clearbits_int(&spc->spc_schedflags,
|
||||
SPCF_SHOULDHALT | SPCF_HALTED);
|
||||
#ifdef __HAVE_CPU_TOPOLOGY
|
||||
if (!sched_smt && ci->ci_smt_id > 0)
|
||||
if (ci->ci_cputype & sched_blockcpu)
|
||||
continue;
|
||||
#endif
|
||||
cpuset_add(&sched_all_cpus, ci);
|
||||
@@ -847,33 +848,108 @@ cpu_is_online(struct cpu_info *ci)
|
||||
|
||||
#ifndef SMALL_KERNEL
|
||||
int
|
||||
sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
|
||||
sched_cpuadjust(int newblockcpu)
|
||||
{
|
||||
CPU_INFO_ITERATOR cii;
|
||||
struct cpu_info *ci;
|
||||
int err, newsmt;
|
||||
int inset;
|
||||
|
||||
newsmt = sched_smt;
|
||||
err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1);
|
||||
if (err)
|
||||
return err;
|
||||
if (newsmt == sched_smt)
|
||||
if (newblockcpu == sched_blockcpu)
|
||||
return 0;
|
||||
|
||||
sched_smt = newsmt;
|
||||
sched_blockcpu = newblockcpu;
|
||||
CPU_INFO_FOREACH(cii, ci) {
|
||||
if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
|
||||
continue;
|
||||
if (ci->ci_smt_id == 0)
|
||||
continue;
|
||||
if (sched_smt)
|
||||
cpuset_add(&sched_all_cpus, ci);
|
||||
else
|
||||
cpuset_del(&sched_all_cpus, ci);
|
||||
inset = cpuset_isset(&sched_all_cpus, ci);
|
||||
if (ci->ci_cputype & sched_blockcpu) {
|
||||
if (inset)
|
||||
cpuset_del(&sched_all_cpus, ci);
|
||||
} else {
|
||||
if (!inset)
|
||||
cpuset_add(&sched_all_cpus, ci);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* emulate hw.smt temporarily */
|
||||
int
|
||||
sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
|
||||
{
|
||||
int err, newsmt = 1, newblockcpu = 0;
|
||||
|
||||
#ifdef CPUTYP_SMT
|
||||
if (sched_blockcpu & CPUTYP_SMT)
|
||||
newsmt = 0;
|
||||
#endif
|
||||
err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1);
|
||||
if (err || newp == NULL)
|
||||
return err;
|
||||
#ifdef CPUTYP_SMT
|
||||
if (newsmt)
|
||||
newblockcpu &= ~CPUTYP_SMT;
|
||||
#endif
|
||||
return sched_cpuadjust(newblockcpu);
|
||||
}
|
||||
|
||||
int
|
||||
sysctl_hwblockcpu(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
|
||||
{
|
||||
int err, newblockcpu;
|
||||
char type[8], *typ = type;
|
||||
|
||||
#ifdef CPUTYP_SMT
|
||||
if (sched_blockcpu & CPUTYP_SMT)
|
||||
*typ++ = 'S';
|
||||
#endif
|
||||
#ifdef CPUTYP_P
|
||||
if (sched_blockcpu & CPUTYP_P)
|
||||
*typ++ = 'P';
|
||||
#endif
|
||||
#ifdef CPUTYP_E
|
||||
if (sched_blockcpu & CPUTYP_E)
|
||||
*typ++ = 'E';
|
||||
#endif
|
||||
#ifdef CPUTYP_L
|
||||
if (sched_blockcpu & CPUTYP_L)
|
||||
*typ++ = 'L';
|
||||
#endif
|
||||
*typ = '\0';
|
||||
if (newp == NULL)
|
||||
return sysctl_rdstring(oldp, oldlenp, newp, type);
|
||||
|
||||
err = sysctl_string(oldp, oldlenp, newp, newlen, type, sizeof type);
|
||||
if (err)
|
||||
return err;
|
||||
for (newblockcpu = 0, typ = type; *typ; typ++) {
|
||||
switch (*typ) {
|
||||
#ifdef CPUTYP_SMT
|
||||
case 'S':
|
||||
newblockcpu |= CPUTYP_SMT;
|
||||
break;
|
||||
#endif
|
||||
#ifdef CPUTYP_P
|
||||
case 'P':
|
||||
newblockcpu |= CPUTYP_P;
|
||||
break;
|
||||
#endif
|
||||
#ifdef CPUTYP_P
|
||||
case 'E':
|
||||
newblockcpu |= CPUTYP_E;
|
||||
break;
|
||||
#endif
|
||||
#ifdef CPUTYP_P
|
||||
case 'L':
|
||||
newblockcpu |= CPUTYP_L;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
return (EINVAL);
|
||||
}
|
||||
}
|
||||
return sched_cpuadjust(newblockcpu);
|
||||
}
|
||||
|
||||
#endif /* SMALL_KERNEL */
|
||||
|
||||
#endif /* __HAVE_CPU_TOPOLOGY */
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: kern_sysctl.c,v 1.486 2026/03/25 05:05:41 deraadt Exp $ */
|
||||
/* $OpenBSD: kern_sysctl.c,v 1.487 2026/03/31 16:46:22 deraadt Exp $ */
|
||||
/* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */
|
||||
|
||||
/*-
|
||||
@@ -884,6 +884,7 @@ hw_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
|
||||
case HW_UCOMNAMES:
|
||||
#ifdef __HAVE_CPU_TOPOLOGY
|
||||
case HW_SMT:
|
||||
case HW_BLOCKCPU:
|
||||
#endif
|
||||
#endif /* !SMALL_KERNEL */
|
||||
{
|
||||
@@ -984,6 +985,8 @@ hw_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
|
||||
#ifdef __HAVE_CPU_TOPOLOGY
|
||||
case HW_SMT:
|
||||
return (sysctl_hwsmt(oldp, oldlenp, newp, newlen));
|
||||
case HW_BLOCKCPU:
|
||||
return (sysctl_hwblockcpu(oldp, oldlenp, newp, newlen));
|
||||
#endif
|
||||
case HW_BATTERY:
|
||||
return (sysctl_hwbattery(name + 1, namelen - 1, oldp, oldlenp,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: sched.h,v 1.77 2025/06/09 10:57:46 claudio Exp $ */
|
||||
/* $OpenBSD: sched.h,v 1.78 2026/03/31 16:46:21 deraadt Exp $ */
|
||||
/* $NetBSD: sched.h,v 1.2 1999/02/28 18:14:58 ross Exp $ */
|
||||
|
||||
/*-
|
||||
@@ -179,8 +179,15 @@ void sched_barrier(struct cpu_info *ci);
|
||||
int sysctl_hwsetperf(void *, size_t *, void *, size_t);
|
||||
int sysctl_hwperfpolicy(void *, size_t *, void *, size_t);
|
||||
int sysctl_hwsmt(void *, size_t *, void *, size_t);
|
||||
int sysctl_hwblockcpu(void *, size_t *, void *, size_t);
|
||||
int sysctl_hwncpuonline(void);
|
||||
|
||||
#define CPUTYP_SMT 0x01 /* SMT cpu */
|
||||
#define CPUTYP_P 0x02 /* Performance core */
|
||||
#define CPUTYP_E 0x04 /* Efficiency core */
|
||||
#define CPUTYP_L 0x08 /* Lethargic, Low Power Efficiency core */
|
||||
extern int sched_blockcpu;
|
||||
|
||||
#ifdef MULTIPROCESSOR
|
||||
void sched_start_secondary_cpus(void);
|
||||
void sched_stop_secondary_cpus(void);
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: sysctl.h,v 1.246 2025/07/31 09:05:11 mvs Exp $ */
|
||||
/* $OpenBSD: sysctl.h,v 1.247 2026/03/31 16:46:21 deraadt Exp $ */
|
||||
/* $NetBSD: sysctl.h,v 1.16 1996/04/09 20:55:36 cgd Exp $ */
|
||||
|
||||
/*
|
||||
@@ -926,7 +926,8 @@ struct kinfo_file {
|
||||
#define HW_POWER 26 /* int: machine has wall-power */
|
||||
#define HW_BATTERY 27 /* node: battery */
|
||||
#define HW_UCOMNAMES 28 /* strings: ucom names */
|
||||
#define HW_MAXID 29 /* number of valid hw ids */
|
||||
#define HW_BLOCKCPU 29 /* string: cpu types to block */
|
||||
#define HW_MAXID 30 /* number of valid hw ids */
|
||||
|
||||
#define CTL_HW_NAMES { \
|
||||
{ 0, 0 }, \
|
||||
@@ -958,6 +959,7 @@ struct kinfo_file {
|
||||
{ "power", CTLTYPE_INT }, \
|
||||
{ "battery", CTLTYPE_NODE }, \
|
||||
{ "ucomnames", CTLTYPE_STRING }, \
|
||||
{ "blockcpu", CTLTYPE_STRING }, \
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user