1
0
mirror of https://github.com/openbsd/src.git synced 2026-04-24 06:04:47 +00:00

Some new intel machines have a new 3rd tier of cpus called LP-E which are

E-core (Atom) without L3 cache.  These cpus are Lethargic, and it sucks
when processes migrate to them.
This introduces sysctl hw.blockcpu= which takes a sequence of 4 letters.
S (for SMT), P (regular performance cpu), E (efficient cpu) generally
80% to 50% as fast), and L (lethargic cpu) which are even slower.
By setting this, you can select cpus to kick out of the scheduler.  The
default is SL.
The hw.smt sysctl remains for now but we will eventually delete it.
hw.smt changes and follows hw.blockcpu=S.
ok kettenis mlarkin
This commit is contained in:
deraadt
2026-03-31 16:46:21 +00:00
parent 9f2496a895
commit 7ce80b6577
8 changed files with 171 additions and 34 deletions

View File

@@ -1,4 +1,4 @@
.\" $OpenBSD: sysctl.2,v 1.70 2025/09/16 09:19:43 florian Exp $
.\" $OpenBSD: sysctl.2,v 1.71 2026/03/31 16:46:22 deraadt Exp $
.\"
.\" Copyright (c) 1993
.\" The Regents of the University of California. All rights reserved.
@@ -27,7 +27,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.Dd $Mdocdate: September 16 2025 $
.Dd $Mdocdate: March 31 2026 $
.Dt SYSCTL 2
.Os
.Sh NAME
@@ -274,6 +274,7 @@ privileges may change the value.
.It Dv HW_SETPERF Ta "integer" Ta "yes"
.It Dv HW_SMT Ta "integer" Ta "yes"
.It Dv HW_UCOMNAMES Ta "string" Ta "no"
.It Dv HW_BLOCKCPU Ta "string" Ta "yes"
.It Dv HW_USERMEM Ta "integer" Ta "no"
.It Dv HW_USERMEM64 Ta "int64_t" Ta "no"
.It Dv HW_UUID Ta "string" Ta "no"
@@ -421,7 +422,21 @@ is set to
.It Dv HW_SMT Pq Va hw.smt
If set to 1, enable simultaneous multithreading (SMT) on CPUs that
support it.
Disabled by default.
Deprecated, use
.Va hw.blockcpu
instead.
.It Dv HW_BLOCKCPU Pq Va hw.smt
A series of characters
.Ar ( S
(SMT),
.Ar P
(Performance core),
.Ar E
(Efficiency core),
and
.Ar L
(Lethargic, Low Power Efficiency core)
representing cpu types to avoid schedule tasks onto.
.It Dv HW_UCOMNAMES Pq Va hw.ucomnames
A comma-separated list of currently attached
.Xr ucom 4

View File

@@ -1,4 +1,4 @@
/* $OpenBSD: identcpu.c,v 1.152 2025/09/14 15:52:28 mlarkin Exp $ */
/* $OpenBSD: identcpu.c,v 1.153 2026/03/31 16:46:22 deraadt Exp $ */
/* $NetBSD: identcpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $ */
/*
@@ -831,6 +831,7 @@ cpu_topology(struct cpu_info *ci)
u_int32_t apicid, max_apicid = 0, max_coreid = 0;
u_int32_t smt_bits = 0, core_bits, pkg_bits = 0;
u_int32_t smt_mask = 0, core_mask, pkg_mask = 0;
char type[8], *typ = type;
/* We need at least apicid at CPUID 1 */
if (ci->ci_cpuid_level < 1)
@@ -865,7 +866,10 @@ cpu_topology(struct cpu_info *ci)
/* Cut logical thread_id into core id, and smt id in a core */
ci->ci_core_id = thread_id / nthreads;
ci->ci_smt_id = thread_id % nthreads;
if (ci->ci_smt_id) {
ci->ci_cputype |= CPUTYP_SMT;
*typ++ = 'S';
}
} else if (ci->ci_vendor == CPUV_INTEL) {
/* We only support leaf 1/4 detection */
if (ci->ci_cpuid_level < 4)
@@ -888,10 +892,36 @@ cpu_topology(struct cpu_info *ci)
pkg_mask = ~0U << core_bits;
ci->ci_smt_id = apicid & smt_mask;
if (ci->ci_smt_id) {
ci->ci_cputype |= CPUTYP_SMT;
*typ++ = 'S';
}
ci->ci_core_id = (apicid & core_mask) >> smt_bits;
ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits;
if (ci->ci_cpuid_level >= 0x1a) {
CPUID_LEAF(0x1a, 0, eax, ebx, ecx, edx);
if ((eax >> 24) == 0x20) {
CPUID_LEAF(4, 3, eax, ebx, ecx, edx);
if (eax == 0) {
/* No L3 cache is classified as Lethargic */
ci->ci_cputype |= CPUTYP_L;
*typ++ = 'L';
} else {
ci->ci_cputype |= CPUTYP_E;
*typ++ = 'E';
}
}
}
} else
goto no_topology;
if ((ci->ci_cputype & (CPUTYP_E | CPUTYP_L)) == 0) {
ci->ci_cputype |= CPUTYP_P;
*typ++ = 'P';
}
*typ ='\0';
#ifdef DEBUG
printf("cpu%d: smt %u, core %u, pkg %u "
"(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, smt_mask 0x%x, "
@@ -900,14 +930,15 @@ cpu_topology(struct cpu_info *ci)
apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits,
core_mask, pkg_bits, pkg_mask);
#else
printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid,
ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id);
printf("cpu%d: smt %u, core %u, package %u, type %s\n", ci->ci_cpuid,
ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id, type);
#endif
return;
/* We can't map, so consider ci_core_id as ci_cpuid */
no_topology:
#endif
ci->ci_cputype = CPUTYP_P;
ci->ci_smt_id = 0;
ci->ci_core_id = ci->ci_cpuid;
ci->ci_pkg_id = 0;

View File

@@ -1,4 +1,4 @@
/* $OpenBSD: machdep.c,v 1.307 2026/03/11 16:18:42 kettenis Exp $ */
/* $OpenBSD: machdep.c,v 1.308 2026/03/31 16:46:22 deraadt Exp $ */
/* $NetBSD: machdep.c,v 1.3 2003/05/07 22:58:18 fvdl Exp $ */
/*-
@@ -1483,6 +1483,8 @@ init_x86_64(paddr_t first_avail)
int x, ist;
uint64_t max_dm_size = ((uint64_t)512 * NUM_L4_SLOT_DIRECT) << 30;
sched_blockcpu = CPUTYP_SMT | CPUTYP_L;
/*
* locore0 mapped 2 pages for use as GHCB before pmap is initialized.
*/

View File

@@ -1,4 +1,4 @@
/* $OpenBSD: cpu.h,v 1.184 2026/03/11 16:18:42 kettenis Exp $ */
/* $OpenBSD: cpu.h,v 1.185 2026/03/31 16:46:22 deraadt Exp $ */
/* $NetBSD: cpu.h,v 1.1 2003/04/26 18:39:39 fvdl Exp $ */
/*-
@@ -188,6 +188,7 @@ struct cpu_info {
int ci_inatomic; /* [o] */
#define __HAVE_CPU_TOPOLOGY
u_int32_t ci_cputype; /* [I] */
u_int32_t ci_smt_id; /* [I] */
u_int32_t ci_core_id; /* [I] */
u_int32_t ci_pkg_id; /* [I] */

View File

@@ -1,4 +1,4 @@
/* $OpenBSD: kern_sched.c,v 1.113 2025/06/12 20:37:58 deraadt Exp $ */
/* $OpenBSD: kern_sched.c,v 1.114 2026/03/31 16:46:22 deraadt Exp $ */
/*
* Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
*
@@ -54,8 +54,9 @@ uint64_t sched_stolen; /* Times we stole proc from other cpus */
uint64_t sched_choose; /* Times we chose a cpu */
uint64_t sched_wasidle; /* Times we came out of idle */
/* Only schedule processes on sibling CPU threads when true. */
int sched_smt;
#ifdef __HAVE_CPU_TOPOLOGY
int sched_blockcpu; /* Types of cpu to not schedule on */
#endif
/*
* A few notes about cpu_switchto that is implemented in MD code.
@@ -153,7 +154,7 @@ sched_idle(void *v)
* After that just go away and properly reenter once idle.
*/
#ifdef __HAVE_CPU_TOPOLOGY
if (sched_smt || ci->ci_smt_id == 0)
if ((ci->ci_cputype & sched_blockcpu) == 0)
cpuset_add(&sched_all_cpus, ci);
#else
cpuset_add(&sched_all_cpus, ci);
@@ -659,7 +660,7 @@ sched_start_secondary_cpus(void)
atomic_clearbits_int(&spc->spc_schedflags,
SPCF_SHOULDHALT | SPCF_HALTED);
#ifdef __HAVE_CPU_TOPOLOGY
if (!sched_smt && ci->ci_smt_id > 0)
if (ci->ci_cputype & sched_blockcpu)
continue;
#endif
cpuset_add(&sched_all_cpus, ci);
@@ -847,33 +848,108 @@ cpu_is_online(struct cpu_info *ci)
#ifndef SMALL_KERNEL
int
sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
sched_cpuadjust(int newblockcpu)
{
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
int err, newsmt;
int inset;
newsmt = sched_smt;
err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1);
if (err)
return err;
if (newsmt == sched_smt)
if (newblockcpu == sched_blockcpu)
return 0;
sched_smt = newsmt;
sched_blockcpu = newblockcpu;
CPU_INFO_FOREACH(cii, ci) {
if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
continue;
if (ci->ci_smt_id == 0)
continue;
if (sched_smt)
cpuset_add(&sched_all_cpus, ci);
else
cpuset_del(&sched_all_cpus, ci);
inset = cpuset_isset(&sched_all_cpus, ci);
if (ci->ci_cputype & sched_blockcpu) {
if (inset)
cpuset_del(&sched_all_cpus, ci);
} else {
if (!inset)
cpuset_add(&sched_all_cpus, ci);
}
}
return 0;
}
/* emulate hw.smt temporarily */
int
sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
{
int err, newsmt = 1, newblockcpu = 0;
#ifdef CPUTYP_SMT
if (sched_blockcpu & CPUTYP_SMT)
newsmt = 0;
#endif
err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1);
if (err || newp == NULL)
return err;
#ifdef CPUTYP_SMT
if (newsmt)
newblockcpu &= ~CPUTYP_SMT;
#endif
return sched_cpuadjust(newblockcpu);
}
int
sysctl_hwblockcpu(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
{
int err, newblockcpu;
char type[8], *typ = type;
#ifdef CPUTYP_SMT
if (sched_blockcpu & CPUTYP_SMT)
*typ++ = 'S';
#endif
#ifdef CPUTYP_P
if (sched_blockcpu & CPUTYP_P)
*typ++ = 'P';
#endif
#ifdef CPUTYP_E
if (sched_blockcpu & CPUTYP_E)
*typ++ = 'E';
#endif
#ifdef CPUTYP_L
if (sched_blockcpu & CPUTYP_L)
*typ++ = 'L';
#endif
*typ = '\0';
if (newp == NULL)
return sysctl_rdstring(oldp, oldlenp, newp, type);
err = sysctl_string(oldp, oldlenp, newp, newlen, type, sizeof type);
if (err)
return err;
for (newblockcpu = 0, typ = type; *typ; typ++) {
switch (*typ) {
#ifdef CPUTYP_SMT
case 'S':
newblockcpu |= CPUTYP_SMT;
break;
#endif
#ifdef CPUTYP_P
case 'P':
newblockcpu |= CPUTYP_P;
break;
#endif
#ifdef CPUTYP_P
case 'E':
newblockcpu |= CPUTYP_E;
break;
#endif
#ifdef CPUTYP_P
case 'L':
newblockcpu |= CPUTYP_L;
break;
#endif
default:
return (EINVAL);
}
}
return sched_cpuadjust(newblockcpu);
}
#endif /* SMALL_KERNEL */
#endif /* __HAVE_CPU_TOPOLOGY */

View File

@@ -1,4 +1,4 @@
/* $OpenBSD: kern_sysctl.c,v 1.486 2026/03/25 05:05:41 deraadt Exp $ */
/* $OpenBSD: kern_sysctl.c,v 1.487 2026/03/31 16:46:22 deraadt Exp $ */
/* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */
/*-
@@ -884,6 +884,7 @@ hw_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
case HW_UCOMNAMES:
#ifdef __HAVE_CPU_TOPOLOGY
case HW_SMT:
case HW_BLOCKCPU:
#endif
#endif /* !SMALL_KERNEL */
{
@@ -984,6 +985,8 @@ hw_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp,
#ifdef __HAVE_CPU_TOPOLOGY
case HW_SMT:
return (sysctl_hwsmt(oldp, oldlenp, newp, newlen));
case HW_BLOCKCPU:
return (sysctl_hwblockcpu(oldp, oldlenp, newp, newlen));
#endif
case HW_BATTERY:
return (sysctl_hwbattery(name + 1, namelen - 1, oldp, oldlenp,

View File

@@ -1,4 +1,4 @@
/* $OpenBSD: sched.h,v 1.77 2025/06/09 10:57:46 claudio Exp $ */
/* $OpenBSD: sched.h,v 1.78 2026/03/31 16:46:21 deraadt Exp $ */
/* $NetBSD: sched.h,v 1.2 1999/02/28 18:14:58 ross Exp $ */
/*-
@@ -179,8 +179,15 @@ void sched_barrier(struct cpu_info *ci);
int sysctl_hwsetperf(void *, size_t *, void *, size_t);
int sysctl_hwperfpolicy(void *, size_t *, void *, size_t);
int sysctl_hwsmt(void *, size_t *, void *, size_t);
int sysctl_hwblockcpu(void *, size_t *, void *, size_t);
int sysctl_hwncpuonline(void);
#define CPUTYP_SMT 0x01 /* SMT cpu */
#define CPUTYP_P 0x02 /* Performance core */
#define CPUTYP_E 0x04 /* Efficiency core */
#define CPUTYP_L 0x08 /* Lethargic, Low Power Efficiency core */
extern int sched_blockcpu;
#ifdef MULTIPROCESSOR
void sched_start_secondary_cpus(void);
void sched_stop_secondary_cpus(void);

View File

@@ -1,4 +1,4 @@
/* $OpenBSD: sysctl.h,v 1.246 2025/07/31 09:05:11 mvs Exp $ */
/* $OpenBSD: sysctl.h,v 1.247 2026/03/31 16:46:21 deraadt Exp $ */
/* $NetBSD: sysctl.h,v 1.16 1996/04/09 20:55:36 cgd Exp $ */
/*
@@ -926,7 +926,8 @@ struct kinfo_file {
#define HW_POWER 26 /* int: machine has wall-power */
#define HW_BATTERY 27 /* node: battery */
#define HW_UCOMNAMES 28 /* strings: ucom names */
#define HW_MAXID 29 /* number of valid hw ids */
#define HW_BLOCKCPU 29 /* string: cpu types to block */
#define HW_MAXID 30 /* number of valid hw ids */
#define CTL_HW_NAMES { \
{ 0, 0 }, \
@@ -958,6 +959,7 @@ struct kinfo_file {
{ "power", CTLTYPE_INT }, \
{ "battery", CTLTYPE_NODE }, \
{ "ucomnames", CTLTYPE_STRING }, \
{ "blockcpu", CTLTYPE_STRING }, \
}
/*