diff --git a/lib/libc/sys/sysctl.2 b/lib/libc/sys/sysctl.2 index ce4da218d63..a0d3dd438ae 100644 --- a/lib/libc/sys/sysctl.2 +++ b/lib/libc/sys/sysctl.2 @@ -1,4 +1,4 @@ -.\" $OpenBSD: sysctl.2,v 1.70 2025/09/16 09:19:43 florian Exp $ +.\" $OpenBSD: sysctl.2,v 1.71 2026/03/31 16:46:22 deraadt Exp $ .\" .\" Copyright (c) 1993 .\" The Regents of the University of California. All rights reserved. @@ -27,7 +27,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd $Mdocdate: September 16 2025 $ +.Dd $Mdocdate: March 31 2026 $ .Dt SYSCTL 2 .Os .Sh NAME @@ -274,6 +274,7 @@ privileges may change the value. .It Dv HW_SETPERF Ta "integer" Ta "yes" .It Dv HW_SMT Ta "integer" Ta "yes" .It Dv HW_UCOMNAMES Ta "string" Ta "no" +.It Dv HW_BLOCKCPU Ta "string" Ta "yes" .It Dv HW_USERMEM Ta "integer" Ta "no" .It Dv HW_USERMEM64 Ta "int64_t" Ta "no" .It Dv HW_UUID Ta "string" Ta "no" @@ -421,7 +422,21 @@ is set to .It Dv HW_SMT Pq Va hw.smt If set to 1, enable simultaneous multithreading (SMT) on CPUs that support it. -Disabled by default. +Deprecated, use +.Va hw.blockcpu +instead. +.It Dv HW_BLOCKCPU Pq Va hw.smt +A series of characters +.Ar ( S +(SMT), +.Ar P +(Performance core), +.Ar E +(Efficiency core), +and +.Ar L +(Lethargic, Low Power Efficiency core) +representing cpu types to avoid schedule tasks onto. .It Dv HW_UCOMNAMES Pq Va hw.ucomnames A comma-separated list of currently attached .Xr ucom 4 diff --git a/sys/arch/amd64/amd64/identcpu.c b/sys/arch/amd64/amd64/identcpu.c index ac4e845a1aa..527a199a3ee 100644 --- a/sys/arch/amd64/amd64/identcpu.c +++ b/sys/arch/amd64/amd64/identcpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: identcpu.c,v 1.152 2025/09/14 15:52:28 mlarkin Exp $ */ +/* $OpenBSD: identcpu.c,v 1.153 2026/03/31 16:46:22 deraadt Exp $ */ /* $NetBSD: identcpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $ */ /* @@ -831,6 +831,7 @@ cpu_topology(struct cpu_info *ci) u_int32_t apicid, max_apicid = 0, max_coreid = 0; u_int32_t smt_bits = 0, core_bits, pkg_bits = 0; u_int32_t smt_mask = 0, core_mask, pkg_mask = 0; + char type[8], *typ = type; /* We need at least apicid at CPUID 1 */ if (ci->ci_cpuid_level < 1) @@ -865,7 +866,10 @@ cpu_topology(struct cpu_info *ci) /* Cut logical thread_id into core id, and smt id in a core */ ci->ci_core_id = thread_id / nthreads; - ci->ci_smt_id = thread_id % nthreads; + if (ci->ci_smt_id) { + ci->ci_cputype |= CPUTYP_SMT; + *typ++ = 'S'; + } } else if (ci->ci_vendor == CPUV_INTEL) { /* We only support leaf 1/4 detection */ if (ci->ci_cpuid_level < 4) @@ -888,10 +892,36 @@ cpu_topology(struct cpu_info *ci) pkg_mask = ~0U << core_bits; ci->ci_smt_id = apicid & smt_mask; + if (ci->ci_smt_id) { + ci->ci_cputype |= CPUTYP_SMT; + *typ++ = 'S'; + } ci->ci_core_id = (apicid & core_mask) >> smt_bits; ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits; + + if (ci->ci_cpuid_level >= 0x1a) { + CPUID_LEAF(0x1a, 0, eax, ebx, ecx, edx); + if ((eax >> 24) == 0x20) { + CPUID_LEAF(4, 3, eax, ebx, ecx, edx); + if (eax == 0) { + /* No L3 cache is classified as Lethargic */ + ci->ci_cputype |= CPUTYP_L; + *typ++ = 'L'; + } else { + ci->ci_cputype |= CPUTYP_E; + *typ++ = 'E'; + } + } + } + } else goto no_topology; + if ((ci->ci_cputype & (CPUTYP_E | CPUTYP_L)) == 0) { + ci->ci_cputype |= CPUTYP_P; + *typ++ = 'P'; + } + *typ ='\0'; + #ifdef DEBUG printf("cpu%d: smt %u, core %u, pkg %u " "(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, smt_mask 0x%x, " @@ -900,14 +930,15 @@ cpu_topology(struct cpu_info *ci) apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits, core_mask, pkg_bits, pkg_mask); #else - printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid, - ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id); + printf("cpu%d: smt %u, core %u, package %u, type %s\n", ci->ci_cpuid, + ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id, type); #endif return; /* We can't map, so consider ci_core_id as ci_cpuid */ no_topology: #endif + ci->ci_cputype = CPUTYP_P; ci->ci_smt_id = 0; ci->ci_core_id = ci->ci_cpuid; ci->ci_pkg_id = 0; diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c index 33bb4dfc47b..98eddde4ab0 100644 --- a/sys/arch/amd64/amd64/machdep.c +++ b/sys/arch/amd64/amd64/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.307 2026/03/11 16:18:42 kettenis Exp $ */ +/* $OpenBSD: machdep.c,v 1.308 2026/03/31 16:46:22 deraadt Exp $ */ /* $NetBSD: machdep.c,v 1.3 2003/05/07 22:58:18 fvdl Exp $ */ /*- @@ -1483,6 +1483,8 @@ init_x86_64(paddr_t first_avail) int x, ist; uint64_t max_dm_size = ((uint64_t)512 * NUM_L4_SLOT_DIRECT) << 30; + sched_blockcpu = CPUTYP_SMT | CPUTYP_L; + /* * locore0 mapped 2 pages for use as GHCB before pmap is initialized. */ diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h index c0df3670517..15423f5d5f2 100644 --- a/sys/arch/amd64/include/cpu.h +++ b/sys/arch/amd64/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.184 2026/03/11 16:18:42 kettenis Exp $ */ +/* $OpenBSD: cpu.h,v 1.185 2026/03/31 16:46:22 deraadt Exp $ */ /* $NetBSD: cpu.h,v 1.1 2003/04/26 18:39:39 fvdl Exp $ */ /*- @@ -188,6 +188,7 @@ struct cpu_info { int ci_inatomic; /* [o] */ #define __HAVE_CPU_TOPOLOGY + u_int32_t ci_cputype; /* [I] */ u_int32_t ci_smt_id; /* [I] */ u_int32_t ci_core_id; /* [I] */ u_int32_t ci_pkg_id; /* [I] */ diff --git a/sys/kern/kern_sched.c b/sys/kern/kern_sched.c index 74183e6bb68..ead659aafb5 100644 --- a/sys/kern/kern_sched.c +++ b/sys/kern/kern_sched.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sched.c,v 1.113 2025/06/12 20:37:58 deraadt Exp $ */ +/* $OpenBSD: kern_sched.c,v 1.114 2026/03/31 16:46:22 deraadt Exp $ */ /* * Copyright (c) 2007, 2008 Artur Grabowski * @@ -54,8 +54,9 @@ uint64_t sched_stolen; /* Times we stole proc from other cpus */ uint64_t sched_choose; /* Times we chose a cpu */ uint64_t sched_wasidle; /* Times we came out of idle */ -/* Only schedule processes on sibling CPU threads when true. */ -int sched_smt; +#ifdef __HAVE_CPU_TOPOLOGY +int sched_blockcpu; /* Types of cpu to not schedule on */ +#endif /* * A few notes about cpu_switchto that is implemented in MD code. @@ -153,7 +154,7 @@ sched_idle(void *v) * After that just go away and properly reenter once idle. */ #ifdef __HAVE_CPU_TOPOLOGY - if (sched_smt || ci->ci_smt_id == 0) + if ((ci->ci_cputype & sched_blockcpu) == 0) cpuset_add(&sched_all_cpus, ci); #else cpuset_add(&sched_all_cpus, ci); @@ -659,7 +660,7 @@ sched_start_secondary_cpus(void) atomic_clearbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT | SPCF_HALTED); #ifdef __HAVE_CPU_TOPOLOGY - if (!sched_smt && ci->ci_smt_id > 0) + if (ci->ci_cputype & sched_blockcpu) continue; #endif cpuset_add(&sched_all_cpus, ci); @@ -847,33 +848,108 @@ cpu_is_online(struct cpu_info *ci) #ifndef SMALL_KERNEL int -sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen) +sched_cpuadjust(int newblockcpu) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; - int err, newsmt; + int inset; - newsmt = sched_smt; - err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1); - if (err) - return err; - if (newsmt == sched_smt) + if (newblockcpu == sched_blockcpu) return 0; - - sched_smt = newsmt; + sched_blockcpu = newblockcpu; CPU_INFO_FOREACH(cii, ci) { if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) continue; - if (ci->ci_smt_id == 0) - continue; - if (sched_smt) - cpuset_add(&sched_all_cpus, ci); - else - cpuset_del(&sched_all_cpus, ci); + inset = cpuset_isset(&sched_all_cpus, ci); + if (ci->ci_cputype & sched_blockcpu) { + if (inset) + cpuset_del(&sched_all_cpus, ci); + } else { + if (!inset) + cpuset_add(&sched_all_cpus, ci); + } } - return 0; } + +/* emulate hw.smt temporarily */ +int +sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen) +{ + int err, newsmt = 1, newblockcpu = 0; + +#ifdef CPUTYP_SMT + if (sched_blockcpu & CPUTYP_SMT) + newsmt = 0; +#endif + err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1); + if (err || newp == NULL) + return err; +#ifdef CPUTYP_SMT + if (newsmt) + newblockcpu &= ~CPUTYP_SMT; +#endif + return sched_cpuadjust(newblockcpu); +} + +int +sysctl_hwblockcpu(void *oldp, size_t *oldlenp, void *newp, size_t newlen) +{ + int err, newblockcpu; + char type[8], *typ = type; + +#ifdef CPUTYP_SMT + if (sched_blockcpu & CPUTYP_SMT) + *typ++ = 'S'; +#endif +#ifdef CPUTYP_P + if (sched_blockcpu & CPUTYP_P) + *typ++ = 'P'; +#endif +#ifdef CPUTYP_E + if (sched_blockcpu & CPUTYP_E) + *typ++ = 'E'; +#endif +#ifdef CPUTYP_L + if (sched_blockcpu & CPUTYP_L) + *typ++ = 'L'; +#endif + *typ = '\0'; + if (newp == NULL) + return sysctl_rdstring(oldp, oldlenp, newp, type); + + err = sysctl_string(oldp, oldlenp, newp, newlen, type, sizeof type); + if (err) + return err; + for (newblockcpu = 0, typ = type; *typ; typ++) { + switch (*typ) { +#ifdef CPUTYP_SMT + case 'S': + newblockcpu |= CPUTYP_SMT; + break; +#endif +#ifdef CPUTYP_P + case 'P': + newblockcpu |= CPUTYP_P; + break; +#endif +#ifdef CPUTYP_P + case 'E': + newblockcpu |= CPUTYP_E; + break; +#endif +#ifdef CPUTYP_P + case 'L': + newblockcpu |= CPUTYP_L; + break; +#endif + default: + return (EINVAL); + } + } + return sched_cpuadjust(newblockcpu); +} + #endif /* SMALL_KERNEL */ #endif /* __HAVE_CPU_TOPOLOGY */ diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index e196df44620..d15f359dc0e 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sysctl.c,v 1.486 2026/03/25 05:05:41 deraadt Exp $ */ +/* $OpenBSD: kern_sysctl.c,v 1.487 2026/03/31 16:46:22 deraadt Exp $ */ /* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */ /*- @@ -884,6 +884,7 @@ hw_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, case HW_UCOMNAMES: #ifdef __HAVE_CPU_TOPOLOGY case HW_SMT: + case HW_BLOCKCPU: #endif #endif /* !SMALL_KERNEL */ { @@ -984,6 +985,8 @@ hw_sysctl_locked(int *name, u_int namelen, void *oldp, size_t *oldlenp, #ifdef __HAVE_CPU_TOPOLOGY case HW_SMT: return (sysctl_hwsmt(oldp, oldlenp, newp, newlen)); + case HW_BLOCKCPU: + return (sysctl_hwblockcpu(oldp, oldlenp, newp, newlen)); #endif case HW_BATTERY: return (sysctl_hwbattery(name + 1, namelen - 1, oldp, oldlenp, diff --git a/sys/sys/sched.h b/sys/sys/sched.h index 64c7044204c..6814a6bbd6b 100644 --- a/sys/sys/sched.h +++ b/sys/sys/sched.h @@ -1,4 +1,4 @@ -/* $OpenBSD: sched.h,v 1.77 2025/06/09 10:57:46 claudio Exp $ */ +/* $OpenBSD: sched.h,v 1.78 2026/03/31 16:46:21 deraadt Exp $ */ /* $NetBSD: sched.h,v 1.2 1999/02/28 18:14:58 ross Exp $ */ /*- @@ -179,8 +179,15 @@ void sched_barrier(struct cpu_info *ci); int sysctl_hwsetperf(void *, size_t *, void *, size_t); int sysctl_hwperfpolicy(void *, size_t *, void *, size_t); int sysctl_hwsmt(void *, size_t *, void *, size_t); +int sysctl_hwblockcpu(void *, size_t *, void *, size_t); int sysctl_hwncpuonline(void); +#define CPUTYP_SMT 0x01 /* SMT cpu */ +#define CPUTYP_P 0x02 /* Performance core */ +#define CPUTYP_E 0x04 /* Efficiency core */ +#define CPUTYP_L 0x08 /* Lethargic, Low Power Efficiency core */ +extern int sched_blockcpu; + #ifdef MULTIPROCESSOR void sched_start_secondary_cpus(void); void sched_stop_secondary_cpus(void); diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h index 5dd7eebfa38..90682f2c80d 100644 --- a/sys/sys/sysctl.h +++ b/sys/sys/sysctl.h @@ -1,4 +1,4 @@ -/* $OpenBSD: sysctl.h,v 1.246 2025/07/31 09:05:11 mvs Exp $ */ +/* $OpenBSD: sysctl.h,v 1.247 2026/03/31 16:46:21 deraadt Exp $ */ /* $NetBSD: sysctl.h,v 1.16 1996/04/09 20:55:36 cgd Exp $ */ /* @@ -926,7 +926,8 @@ struct kinfo_file { #define HW_POWER 26 /* int: machine has wall-power */ #define HW_BATTERY 27 /* node: battery */ #define HW_UCOMNAMES 28 /* strings: ucom names */ -#define HW_MAXID 29 /* number of valid hw ids */ +#define HW_BLOCKCPU 29 /* string: cpu types to block */ +#define HW_MAXID 30 /* number of valid hw ids */ #define CTL_HW_NAMES { \ { 0, 0 }, \ @@ -958,6 +959,7 @@ struct kinfo_file { { "power", CTLTYPE_INT }, \ { "battery", CTLTYPE_NODE }, \ { "ucomnames", CTLTYPE_STRING }, \ + { "blockcpu", CTLTYPE_STRING }, \ } /*