cpu.go

  1// Copyright 2018 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5// Package cpu implements processor feature detection for
  6// various CPU architectures.
  7package cpu
  8
  9import (
 10	"os"
 11	"strings"
 12)
 13
 14// Initialized reports whether the CPU features were initialized.
 15//
 16// For some GOOS/GOARCH combinations initialization of the CPU features depends
 17// on reading an operating specific file, e.g. /proc/self/auxv on linux/arm
 18// Initialized will report false if reading the file fails.
 19var Initialized bool
 20
 21// CacheLinePad is used to pad structs to avoid false sharing.
 22type CacheLinePad struct{ _ [cacheLineSize]byte }
 23
 24// X86 contains the supported CPU features of the
 25// current X86/AMD64 platform. If the current platform
 26// is not X86/AMD64 then all feature flags are false.
 27//
 28// X86 is padded to avoid false sharing. Further the HasAVX
 29// and HasAVX2 are only set if the OS supports XMM and YMM
 30// registers in addition to the CPUID feature bit being set.
 31var X86 struct {
 32	_                   CacheLinePad
 33	HasAES              bool // AES hardware implementation (AES NI)
 34	HasADX              bool // Multi-precision add-carry instruction extensions
 35	HasAVX              bool // Advanced vector extension
 36	HasAVX2             bool // Advanced vector extension 2
 37	HasAVX512           bool // Advanced vector extension 512
 38	HasAVX512F          bool // Advanced vector extension 512 Foundation Instructions
 39	HasAVX512CD         bool // Advanced vector extension 512 Conflict Detection Instructions
 40	HasAVX512ER         bool // Advanced vector extension 512 Exponential and Reciprocal Instructions
 41	HasAVX512PF         bool // Advanced vector extension 512 Prefetch Instructions
 42	HasAVX512VL         bool // Advanced vector extension 512 Vector Length Extensions
 43	HasAVX512BW         bool // Advanced vector extension 512 Byte and Word Instructions
 44	HasAVX512DQ         bool // Advanced vector extension 512 Doubleword and Quadword Instructions
 45	HasAVX512IFMA       bool // Advanced vector extension 512 Integer Fused Multiply Add
 46	HasAVX512VBMI       bool // Advanced vector extension 512 Vector Byte Manipulation Instructions
 47	HasAVX5124VNNIW     bool // Advanced vector extension 512 Vector Neural Network Instructions Word variable precision
 48	HasAVX5124FMAPS     bool // Advanced vector extension 512 Fused Multiply Accumulation Packed Single precision
 49	HasAVX512VPOPCNTDQ  bool // Advanced vector extension 512 Double and quad word population count instructions
 50	HasAVX512VPCLMULQDQ bool // Advanced vector extension 512 Vector carry-less multiply operations
 51	HasAVX512VNNI       bool // Advanced vector extension 512 Vector Neural Network Instructions
 52	HasAVX512GFNI       bool // Advanced vector extension 512 Galois field New Instructions
 53	HasAVX512VAES       bool // Advanced vector extension 512 Vector AES instructions
 54	HasAVX512VBMI2      bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2
 55	HasAVX512BITALG     bool // Advanced vector extension 512 Bit Algorithms
 56	HasAVX512BF16       bool // Advanced vector extension 512 BFloat16 Instructions
 57	HasAMXTile          bool // Advanced Matrix Extension Tile instructions
 58	HasAMXInt8          bool // Advanced Matrix Extension Int8 instructions
 59	HasAMXBF16          bool // Advanced Matrix Extension BFloat16 instructions
 60	HasBMI1             bool // Bit manipulation instruction set 1
 61	HasBMI2             bool // Bit manipulation instruction set 2
 62	HasCX16             bool // Compare and exchange 16 Bytes
 63	HasERMS             bool // Enhanced REP for MOVSB and STOSB
 64	HasFMA              bool // Fused-multiply-add instructions
 65	HasOSXSAVE          bool // OS supports XSAVE/XRESTOR for saving/restoring XMM registers.
 66	HasPCLMULQDQ        bool // PCLMULQDQ instruction - most often used for AES-GCM
 67	HasPOPCNT           bool // Hamming weight instruction POPCNT.
 68	HasRDRAND           bool // RDRAND instruction (on-chip random number generator)
 69	HasRDSEED           bool // RDSEED instruction (on-chip random number generator)
 70	HasSSE2             bool // Streaming SIMD extension 2 (always available on amd64)
 71	HasSSE3             bool // Streaming SIMD extension 3
 72	HasSSSE3            bool // Supplemental streaming SIMD extension 3
 73	HasSSE41            bool // Streaming SIMD extension 4 and 4.1
 74	HasSSE42            bool // Streaming SIMD extension 4 and 4.2
 75	HasAVXIFMA          bool // Advanced vector extension Integer Fused Multiply Add
 76	HasAVXVNNI          bool // Advanced vector extension Vector Neural Network Instructions
 77	HasAVXVNNIInt8      bool // Advanced vector extension Vector Neural Network Int8 instructions
 78	_                   CacheLinePad
 79}
 80
 81// ARM64 contains the supported CPU features of the
 82// current ARMv8(aarch64) platform. If the current platform
 83// is not arm64 then all feature flags are false.
 84var ARM64 struct {
 85	_           CacheLinePad
 86	HasFP       bool // Floating-point instruction set (always available)
 87	HasASIMD    bool // Advanced SIMD (always available)
 88	HasEVTSTRM  bool // Event stream support
 89	HasAES      bool // AES hardware implementation
 90	HasPMULL    bool // Polynomial multiplication instruction set
 91	HasSHA1     bool // SHA1 hardware implementation
 92	HasSHA2     bool // SHA2 hardware implementation
 93	HasCRC32    bool // CRC32 hardware implementation
 94	HasATOMICS  bool // Atomic memory operation instruction set
 95	HasFPHP     bool // Half precision floating-point instruction set
 96	HasASIMDHP  bool // Advanced SIMD half precision instruction set
 97	HasCPUID    bool // CPUID identification scheme registers
 98	HasASIMDRDM bool // Rounding double multiply add/subtract instruction set
 99	HasJSCVT    bool // Javascript conversion from floating-point to integer
100	HasFCMA     bool // Floating-point multiplication and addition of complex numbers
101	HasLRCPC    bool // Release Consistent processor consistent support
102	HasDCPOP    bool // Persistent memory support
103	HasSHA3     bool // SHA3 hardware implementation
104	HasSM3      bool // SM3 hardware implementation
105	HasSM4      bool // SM4 hardware implementation
106	HasASIMDDP  bool // Advanced SIMD double precision instruction set
107	HasSHA512   bool // SHA512 hardware implementation
108	HasSVE      bool // Scalable Vector Extensions
109	HasSVE2     bool // Scalable Vector Extensions 2
110	HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32
111	HasDIT      bool // Data Independent Timing support
112	HasI8MM     bool // Advanced SIMD Int8 matrix multiplication instructions
113	_           CacheLinePad
114}
115
116// ARM contains the supported CPU features of the current ARM (32-bit) platform.
117// All feature flags are false if:
118//  1. the current platform is not arm, or
119//  2. the current operating system is not Linux.
120var ARM struct {
121	_           CacheLinePad
122	HasSWP      bool // SWP instruction support
123	HasHALF     bool // Half-word load and store support
124	HasTHUMB    bool // ARM Thumb instruction set
125	Has26BIT    bool // Address space limited to 26-bits
126	HasFASTMUL  bool // 32-bit operand, 64-bit result multiplication support
127	HasFPA      bool // Floating point arithmetic support
128	HasVFP      bool // Vector floating point support
129	HasEDSP     bool // DSP Extensions support
130	HasJAVA     bool // Java instruction set
131	HasIWMMXT   bool // Intel Wireless MMX technology support
132	HasCRUNCH   bool // MaverickCrunch context switching and handling
133	HasTHUMBEE  bool // Thumb EE instruction set
134	HasNEON     bool // NEON instruction set
135	HasVFPv3    bool // Vector floating point version 3 support
136	HasVFPv3D16 bool // Vector floating point version 3 D8-D15
137	HasTLS      bool // Thread local storage support
138	HasVFPv4    bool // Vector floating point version 4 support
139	HasIDIVA    bool // Integer divide instruction support in ARM mode
140	HasIDIVT    bool // Integer divide instruction support in Thumb mode
141	HasVFPD32   bool // Vector floating point version 3 D15-D31
142	HasLPAE     bool // Large Physical Address Extensions
143	HasEVTSTRM  bool // Event stream support
144	HasAES      bool // AES hardware implementation
145	HasPMULL    bool // Polynomial multiplication instruction set
146	HasSHA1     bool // SHA1 hardware implementation
147	HasSHA2     bool // SHA2 hardware implementation
148	HasCRC32    bool // CRC32 hardware implementation
149	_           CacheLinePad
150}
151
152// The booleans in Loong64 contain the correspondingly named cpu feature bit.
153// The struct is padded to avoid false sharing.
154var Loong64 struct {
155	_         CacheLinePad
156	HasLSX    bool // support 128-bit vector extension
157	HasLASX   bool // support 256-bit vector extension
158	HasCRC32  bool // support CRC instruction
159	HasLAM_BH bool // support AM{SWAP/ADD}[_DB].{B/H} instruction
160	HasLAMCAS bool // support AMCAS[_DB].{B/H/W/D} instruction
161	_         CacheLinePad
162}
163
164// MIPS64X contains the supported CPU features of the current mips64/mips64le
165// platforms. If the current platform is not mips64/mips64le or the current
166// operating system is not Linux then all feature flags are false.
167var MIPS64X struct {
168	_      CacheLinePad
169	HasMSA bool // MIPS SIMD architecture
170	_      CacheLinePad
171}
172
173// PPC64 contains the supported CPU features of the current ppc64/ppc64le platforms.
174// If the current platform is not ppc64/ppc64le then all feature flags are false.
175//
176// For ppc64/ppc64le, it is safe to check only for ISA level starting on ISA v3.00,
177// since there are no optional categories. There are some exceptions that also
178// require kernel support to work (DARN, SCV), so there are feature bits for
179// those as well. The struct is padded to avoid false sharing.
180var PPC64 struct {
181	_        CacheLinePad
182	HasDARN  bool // Hardware random number generator (requires kernel enablement)
183	HasSCV   bool // Syscall vectored (requires kernel enablement)
184	IsPOWER8 bool // ISA v2.07 (POWER8)
185	IsPOWER9 bool // ISA v3.00 (POWER9), implies IsPOWER8
186	_        CacheLinePad
187}
188
189// S390X contains the supported CPU features of the current IBM Z
190// (s390x) platform. If the current platform is not IBM Z then all
191// feature flags are false.
192//
193// S390X is padded to avoid false sharing. Further HasVX is only set
194// if the OS supports vector registers in addition to the STFLE
195// feature bit being set.
196var S390X struct {
197	_         CacheLinePad
198	HasZARCH  bool // z/Architecture mode is active [mandatory]
199	HasSTFLE  bool // store facility list extended
200	HasLDISP  bool // long (20-bit) displacements
201	HasEIMM   bool // 32-bit immediates
202	HasDFP    bool // decimal floating point
203	HasETF3EH bool // ETF-3 enhanced
204	HasMSA    bool // message security assist (CPACF)
205	HasAES    bool // KM-AES{128,192,256} functions
206	HasAESCBC bool // KMC-AES{128,192,256} functions
207	HasAESCTR bool // KMCTR-AES{128,192,256} functions
208	HasAESGCM bool // KMA-GCM-AES{128,192,256} functions
209	HasGHASH  bool // KIMD-GHASH function
210	HasSHA1   bool // K{I,L}MD-SHA-1 functions
211	HasSHA256 bool // K{I,L}MD-SHA-256 functions
212	HasSHA512 bool // K{I,L}MD-SHA-512 functions
213	HasSHA3   bool // K{I,L}MD-SHA3-{224,256,384,512} and K{I,L}MD-SHAKE-{128,256} functions
214	HasVX     bool // vector facility
215	HasVXE    bool // vector-enhancements facility 1
216	_         CacheLinePad
217}
218
219// RISCV64 contains the supported CPU features and performance characteristics for riscv64
220// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate
221// the presence of RISC-V extensions.
222//
223// It is safe to assume that all the RV64G extensions are supported and so they are omitted from
224// this structure. As riscv64 Go programs require at least RV64G, the code that populates
225// this structure cannot run successfully if some of the RV64G extensions are missing.
226// The struct is padded to avoid false sharing.
227var RISCV64 struct {
228	_                 CacheLinePad
229	HasFastMisaligned bool // Fast misaligned accesses
230	HasC              bool // Compressed instruction-set extension
231	HasV              bool // Vector extension compatible with RVV 1.0
232	HasZba            bool // Address generation instructions extension
233	HasZbb            bool // Basic bit-manipulation extension
234	HasZbs            bool // Single-bit instructions extension
235	HasZvbb           bool // Vector Basic Bit-manipulation
236	HasZvbc           bool // Vector Carryless Multiplication
237	HasZvkb           bool // Vector Cryptography Bit-manipulation
238	HasZvkt           bool // Vector Data-Independent Execution Latency
239	HasZvkg           bool // Vector GCM/GMAC
240	HasZvkn           bool // NIST Algorithm Suite (AES/SHA256/SHA512)
241	HasZvknc          bool // NIST Algorithm Suite with carryless multiply
242	HasZvkng          bool // NIST Algorithm Suite with GCM
243	HasZvks           bool // ShangMi Algorithm Suite
244	HasZvksc          bool // ShangMi Algorithm Suite with carryless multiplication
245	HasZvksg          bool // ShangMi Algorithm Suite with GCM
246	_                 CacheLinePad
247}
248
249func init() {
250	archInit()
251	initOptions()
252	processOptions()
253}
254
255// options contains the cpu debug options that can be used in GODEBUG.
256// Options are arch dependent and are added by the arch specific initOptions functions.
257// Features that are mandatory for the specific GOARCH should have the Required field set
258// (e.g. SSE2 on amd64).
259var options []option
260
261// Option names should be lower case. e.g. avx instead of AVX.
262type option struct {
263	Name      string
264	Feature   *bool
265	Specified bool // whether feature value was specified in GODEBUG
266	Enable    bool // whether feature should be enabled
267	Required  bool // whether feature is mandatory and can not be disabled
268}
269
270func processOptions() {
271	env := os.Getenv("GODEBUG")
272field:
273	for env != "" {
274		field := ""
275		i := strings.IndexByte(env, ',')
276		if i < 0 {
277			field, env = env, ""
278		} else {
279			field, env = env[:i], env[i+1:]
280		}
281		if len(field) < 4 || field[:4] != "cpu." {
282			continue
283		}
284		i = strings.IndexByte(field, '=')
285		if i < 0 {
286			print("GODEBUG sys/cpu: no value specified for \"", field, "\"\n")
287			continue
288		}
289		key, value := field[4:i], field[i+1:] // e.g. "SSE2", "on"
290
291		var enable bool
292		switch value {
293		case "on":
294			enable = true
295		case "off":
296			enable = false
297		default:
298			print("GODEBUG sys/cpu: value \"", value, "\" not supported for cpu option \"", key, "\"\n")
299			continue field
300		}
301
302		if key == "all" {
303			for i := range options {
304				options[i].Specified = true
305				options[i].Enable = enable || options[i].Required
306			}
307			continue field
308		}
309
310		for i := range options {
311			if options[i].Name == key {
312				options[i].Specified = true
313				options[i].Enable = enable
314				continue field
315			}
316		}
317
318		print("GODEBUG sys/cpu: unknown cpu feature \"", key, "\"\n")
319	}
320
321	for _, o := range options {
322		if !o.Specified {
323			continue
324		}
325
326		if o.Enable && !*o.Feature {
327			print("GODEBUG sys/cpu: can not enable \"", o.Name, "\", missing CPU support\n")
328			continue
329		}
330
331		if !o.Enable && o.Required {
332			print("GODEBUG sys/cpu: can not disable \"", o.Name, "\", required CPU feature\n")
333			continue
334		}
335
336		*o.Feature = o.Enable
337	}
338}