Commit 388ff6bc authored by Matthew Holt's avatar Matthew Holt

diagnostics: Implemented collection functions and create first metrics

- Also implemented robust error handling and failovers
- Vendored klauspost/cpuid
parent 8f0b44b8
...@@ -27,6 +27,7 @@ import ( ...@@ -27,6 +27,7 @@ import (
"strings" "strings"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/klauspost/cpuid"
"github.com/mholt/caddy" "github.com/mholt/caddy"
"github.com/mholt/caddy/caddytls" "github.com/mholt/caddy/caddytls"
"github.com/mholt/caddy/diagnostics" "github.com/mholt/caddy/diagnostics"
...@@ -51,6 +52,7 @@ func init() { ...@@ -51,6 +52,7 @@ func init() {
flag.StringVar(&caddytls.DefaultEmail, "email", "", "Default ACME CA account email address") flag.StringVar(&caddytls.DefaultEmail, "email", "", "Default ACME CA account email address")
flag.DurationVar(&acme.HTTPClient.Timeout, "catimeout", acme.HTTPClient.Timeout, "Default ACME CA HTTP timeout") flag.DurationVar(&acme.HTTPClient.Timeout, "catimeout", acme.HTTPClient.Timeout, "Default ACME CA HTTP timeout")
flag.StringVar(&logfile, "log", "", "Process log file") flag.StringVar(&logfile, "log", "", "Process log file")
flag.BoolVar(&noDiag, "no-diagnostics", false, "Disable diagnostic reporting")
flag.StringVar(&caddy.PidFile, "pidfile", "", "Path to write pid file") flag.StringVar(&caddy.PidFile, "pidfile", "", "Path to write pid file")
flag.BoolVar(&caddy.Quiet, "quiet", false, "Quiet mode (no initialization output)") flag.BoolVar(&caddy.Quiet, "quiet", false, "Quiet mode (no initialization output)")
flag.StringVar(&revoke, "revoke", "", "Hostname for which to revoke the certificate") flag.StringVar(&revoke, "revoke", "", "Hostname for which to revoke the certificate")
...@@ -88,7 +90,9 @@ func Run() { ...@@ -88,7 +90,9 @@ func Run() {
} }
// initialize diagnostics client // initialize diagnostics client
initDiagnostics() if !noDiag {
initDiagnostics()
}
// Check for one-time actions // Check for one-time actions
if revoke != "" { if revoke != "" {
...@@ -146,6 +150,23 @@ func Run() { ...@@ -146,6 +150,23 @@ func Run() {
// Execute instantiation events // Execute instantiation events
caddy.EmitEvent(caddy.InstanceStartupEvent, instance) caddy.EmitEvent(caddy.InstanceStartupEvent, instance)
// Begin diagnostics (these are no-ops if diagnostics disabled)
diagnostics.Set("caddy_version", appVersion)
// TODO: plugins
diagnostics.Set("num_listeners", len(instance.Servers()))
diagnostics.Set("os", runtime.GOOS)
diagnostics.Set("arch", runtime.GOARCH)
diagnostics.Set("cpu", struct {
NumLogical int `json:"num_logical"`
AESNI bool `json:"aes_ni"`
BrandName string `json:"brand_name"`
}{
NumLogical: runtime.NumCPU(),
AESNI: cpuid.CPU.AesNi(),
BrandName: cpuid.CPU.BrandName,
})
diagnostics.StartEmitting()
// Twiddle your thumbs // Twiddle your thumbs
instance.Wait() instance.Wait()
} }
...@@ -321,6 +342,7 @@ var ( ...@@ -321,6 +342,7 @@ var (
version bool version bool
plugins bool plugins bool
validate bool validate bool
noDiag bool
) )
// Build information obtained with the help of -ldflags // Build information obtained with the help of -ldflags
......
...@@ -29,6 +29,7 @@ import ( ...@@ -29,6 +29,7 @@ import (
"github.com/mholt/caddy/caddyfile" "github.com/mholt/caddy/caddyfile"
"github.com/mholt/caddy/caddyhttp/staticfiles" "github.com/mholt/caddy/caddyhttp/staticfiles"
"github.com/mholt/caddy/caddytls" "github.com/mholt/caddy/caddytls"
"github.com/mholt/caddy/diagnostics"
) )
const serverType = "http" const serverType = "http"
...@@ -205,6 +206,8 @@ func (h *httpContext) MakeServers() ([]caddy.Server, error) { ...@@ -205,6 +206,8 @@ func (h *httpContext) MakeServers() ([]caddy.Server, error) {
} }
} }
diagnostics.Set("num_sites", len(h.siteConfigs))
// we must map (group) each config to a bind address // we must map (group) each config to a bind address
groups, err := groupSiteConfigsByListenAddr(h.siteConfigs) groups, err := groupSiteConfigsByListenAddr(h.siteConfigs)
if err != nil { if err != nil {
......
...@@ -36,6 +36,7 @@ import ( ...@@ -36,6 +36,7 @@ import (
"github.com/mholt/caddy" "github.com/mholt/caddy"
"github.com/mholt/caddy/caddyhttp/staticfiles" "github.com/mholt/caddy/caddyhttp/staticfiles"
"github.com/mholt/caddy/caddytls" "github.com/mholt/caddy/caddytls"
"github.com/mholt/caddy/diagnostics"
) )
// Server is the HTTP server implementation. // Server is the HTTP server implementation.
...@@ -345,6 +346,8 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { ...@@ -345,6 +346,8 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
} }
}() }()
go diagnostics.AppendUniqueString("user_agent", r.Header.Get("User-Agent"))
// copy the original, unchanged URL into the context // copy the original, unchanged URL into the context
// so it can be referenced by middlewares // so it can be referenced by middlewares
urlCopy := *r.URL urlCopy := *r.URL
......
...@@ -26,6 +26,7 @@ import ( ...@@ -26,6 +26,7 @@ import (
"time" "time"
"github.com/mholt/caddy" "github.com/mholt/caddy"
"github.com/mholt/caddy/diagnostics"
"github.com/xenolf/lego/acme" "github.com/xenolf/lego/acme"
) )
...@@ -276,6 +277,8 @@ Attempts: ...@@ -276,6 +277,8 @@ Attempts:
break break
} }
go diagnostics.Increment("acme_certificates_obtained")
return nil return nil
} }
...@@ -350,8 +353,9 @@ func (c *ACMEClient) Renew(name string) error { ...@@ -350,8 +353,9 @@ func (c *ACMEClient) Renew(name string) error {
return errors.New("too many renewal attempts; last error: " + err.Error()) return errors.New("too many renewal attempts; last error: " + err.Error())
} }
// Executes Cert renew events
caddy.EmitEvent(caddy.CertRenewEvent, name) caddy.EmitEvent(caddy.CertRenewEvent, name)
go diagnostics.Increment("acme_certificates_obtained")
go diagnostics.Increment("acme_certificates_renewed")
return saveCertResource(storage, newCertMeta) return saveCertResource(storage, newCertMeta)
} }
......
// Copyright 2015 Light Code Labs, LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package diagnostics
import (
"log"
"github.com/google/uuid"
)
// Init initializes this package so that it may
// be used. Do not call this function more than
// once. Init panics if it is called more than
// once or if the UUID value is empty. Once this
// function is called, the rest of the package
// may safely be used. If this function is not
// called, the collector functions may still be
// invoked, but they will be no-ops.
func Init(instanceID uuid.UUID) {
if enabled {
panic("already initialized")
}
if instanceID.String() == "" {
panic("empty UUID")
}
instanceUUID = instanceID
enabled = true
}
// StartEmitting sends the current payload and begins the
// transmission cycle for updates. This is the first
// update sent, and future ones will be sent until
// StopEmitting is called.
//
// This function is non-blocking (it spawns a new goroutine).
//
// This function panics if it was called more than once.
// It is a no-op if this package was not initialized.
func StartEmitting() {
if !enabled {
return
}
updateTimerMu.Lock()
if updateTimer != nil {
updateTimerMu.Unlock()
panic("updates already started")
}
updateTimerMu.Unlock()
updateMu.Lock()
if updating {
updateMu.Unlock()
panic("update already in progress")
}
updateMu.Unlock()
go logEmit(false)
}
// StopEmitting sends the current payload and terminates
// the update cycle. No more updates will be sent.
//
// It is a no-op if the package was never initialized
// or if emitting was never started.
func StopEmitting() {
if !enabled {
return
}
updateTimerMu.Lock()
if updateTimer == nil {
updateTimerMu.Unlock()
return
}
updateTimerMu.Unlock()
logEmit(true)
}
// Set puts a value in the buffer to be included
// in the next emission. It overwrites any
// previous value.
//
// This function is safe for multiple goroutines,
// and it is recommended to call this using the
// go keyword after the call to SendHello so it
// doesn't block crucial code.
func Set(key string, val interface{}) {
if !enabled {
return
}
bufferMu.Lock()
if bufferItemCount >= maxBufferItems {
bufferMu.Unlock()
return
}
if _, ok := buffer[key]; !ok {
bufferItemCount++
}
buffer[key] = val
bufferMu.Unlock()
}
// Append appends value to a list named key.
// If key is new, a new list will be created.
// If key maps to a type that is not a list,
// an error is logged, and this is a no-op.
//
// TODO: is this function needed/useful?
func Append(key string, value interface{}) {
if !enabled {
return
}
bufferMu.Lock()
if bufferItemCount >= maxBufferItems {
bufferMu.Unlock()
return
}
// TODO: Test this...
bufVal, inBuffer := buffer[key]
sliceVal, sliceOk := bufVal.([]interface{})
if inBuffer && !sliceOk {
bufferMu.Unlock()
log.Printf("[PANIC] Diagnostics: key %s already used for non-slice value", key)
return
}
if sliceVal == nil {
buffer[key] = []interface{}{value}
} else if sliceOk {
buffer[key] = append(sliceVal, value)
}
bufferItemCount++
bufferMu.Unlock()
}
// AppendUniqueString adds value to a set named key.
// Set items are unordered. Values in the set
// are unique, but repeat values are counted.
//
// If key is new, a new set will be created.
// If key maps to a type that is not a string
// set, an error is logged, and this is a no-op.
func AppendUniqueString(key, value string) {
if !enabled {
return
}
bufferMu.Lock()
if bufferItemCount >= maxBufferItems {
bufferMu.Unlock()
return
}
bufVal, inBuffer := buffer[key]
mapVal, mapOk := bufVal.(map[string]int)
if inBuffer && !mapOk {
bufferMu.Unlock()
log.Printf("[PANIC] Diagnostics: key %s already used for non-map value", key)
return
}
if mapVal == nil {
buffer[key] = map[string]int{value: 1}
bufferItemCount++
} else if mapOk {
mapVal[value]++
}
bufferMu.Unlock()
}
// AppendUniqueInt adds value to a set named key.
// Set items are unordered. Values in the set
// are unique, but repeat values are counted.
//
// If key is new, a new set will be created.
// If key maps to a type that is not an integer
// set, an error is logged, and this is a no-op.
func AppendUniqueInt(key string, value int) {
if !enabled {
return
}
bufferMu.Lock()
if bufferItemCount >= maxBufferItems {
bufferMu.Unlock()
return
}
bufVal, inBuffer := buffer[key]
mapVal, mapOk := bufVal.(map[int]int)
if inBuffer && !mapOk {
bufferMu.Unlock()
log.Printf("[PANIC] Diagnostics: key %s already used for non-map value", key)
return
}
if mapVal == nil {
buffer[key] = map[int]int{value: 1}
bufferItemCount++
} else if mapOk {
mapVal[value]++
}
bufferMu.Unlock()
}
// Increment adds 1 to a value named key.
// If it does not exist, it is created with
// a value of 1. If key maps to a type that
// is not an integer, an error is logged,
// and this is a no-op.
func Increment(key string) {
incrementOrDecrement(key, true)
}
// Decrement is the same as increment except
// it subtracts 1.
func Decrement(key string) {
incrementOrDecrement(key, false)
}
// inc == true: increment
// inc == false: decrement
func incrementOrDecrement(key string, inc bool) {
if !enabled {
return
}
bufferMu.Lock()
bufVal, inBuffer := buffer[key]
intVal, intOk := bufVal.(int)
if inBuffer && !intOk {
bufferMu.Unlock()
log.Printf("[PANIC] Diagnostics: key %s already used for non-integer value", key)
return
}
if !inBuffer {
if bufferItemCount >= maxBufferItems {
bufferMu.Unlock()
return
}
bufferItemCount++
}
if inc {
buffer[key] = intVal + 1
} else {
buffer[key] = intVal - 1
}
bufferMu.Unlock()
}
...@@ -12,14 +12,252 @@ ...@@ -12,14 +12,252 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// Package diagnostics implements the client for server-side diagnostics
// of the network. Functions in this package are synchronous and blocking
// unless otherwise specified. For convenience, most functions here do
// not return errors, but errors are logged to the standard logger.
//
// To use this package, first call Init(). You can then call any of the
// collection/aggregation functions. Call StartEmitting() when you are
// ready to begin sending diagnostic updates.
//
// When collecting metrics (functions like Set, Append*, or Increment),
// it may be desirable and even recommended to run invoke them in a new
// goroutine (use the go keyword) in case there is lock contention;
// they are thread-safe (unless noted), and you may not want them to
// block the main thread of execution. However, sometimes blocking
// may be necessary too; for example, adding startup metrics to the
// buffer before the call to StartEmitting().
package diagnostics package diagnostics
import ( import (
"bytes"
"encoding/json"
"fmt"
"log"
"net/http"
"strings"
"sync"
"time"
"github.com/google/uuid" "github.com/google/uuid"
) )
func Init(uuid uuid.UUID) { // logEmit calls emit and then logs the error, if any.
instanceUUID = uuid func logEmit(final bool) {
err := emit(final)
if err != nil {
log.Printf("[ERROR] Sending diganostics: %v", err)
}
}
// emit sends an update to the diagnostics server.
// If final is true, no future updates will be scheduled.
// Otherwise, the next update will be scheduled.
func emit(final bool) error {
if !enabled {
return fmt.Errorf("diagnostics not enabled")
}
// ensure only one update happens at a time;
// skip update if previous one still in progress
updateMu.Lock()
if updating {
updateMu.Unlock()
log.Println("[NOTICE] Skipping this diagnostics update because previous one is still working")
return nil
}
updating = true
updateMu.Unlock()
defer func() {
updateMu.Lock()
updating = false
updateMu.Unlock()
}()
// terminate any pending update if this is the last one
if final {
updateTimerMu.Lock()
updateTimer.Stop()
updateTimer = nil
updateTimerMu.Unlock()
}
payloadBytes, err := makePayloadAndResetBuffer()
if err != nil {
return err
}
// this will hold the server's reply
var reply Response
// transmit the payload - use a loop to retry in case of failure
for i := 0; i < 4; i++ {
if i > 0 && err != nil {
// don't hammer the server; first failure might have been
// a fluke, but back off more after that
log.Printf("[WARNING] Sending diagnostics (attempt %d): %v - waiting and retrying", i, err)
time.Sleep(time.Duration(i*i*i) * time.Second)
}
// send it
var resp *http.Response
resp, err = httpClient.Post(endpoint+instanceUUID.String(), "application/json", bytes.NewReader(payloadBytes))
if err != nil {
continue
}
// ensure we can read the response
if ct := resp.Header.Get("Content-Type"); (resp.StatusCode < 300 || resp.StatusCode >= 400) &&
!strings.Contains(ct, "json") {
err = fmt.Errorf("diagnostics server replied with unknown content-type: %s", ct)
resp.Body.Close()
continue
}
// read the response body
err = json.NewDecoder(resp.Body).Decode(&reply)
resp.Body.Close() // close response body as soon as we're done with it
if err != nil {
continue
}
// ensure we won't slam the diagnostics server
if reply.NextUpdate < 1*time.Second {
reply.NextUpdate = defaultUpdateInterval
}
// make sure we didn't send the update too soon; if so,
// just wait and try again -- this is a special case of
// error that we handle differently, as you can see
if resp.StatusCode == http.StatusTooManyRequests {
log.Printf("[NOTICE] Sending diagnostics: we were too early; waiting %s before trying again", reply.NextUpdate)
time.Sleep(reply.NextUpdate)
continue
} else if resp.StatusCode >= 400 {
err = fmt.Errorf("diagnostics server returned status code %d", resp.StatusCode)
continue
}
break
}
if err == nil {
// (remember, if there was an error, we return it
// below, so it will get logged if it's supposed to)
log.Println("[INFO] Sending diagnostics: success")
}
// even if there was an error after retrying, we should
// schedule the next update using our default update
// interval because the server might be healthy later
// schedule the next update (if this wasn't the last one and
// if the remote server didn't tell us to stop sending)
if !final && !reply.Stop {
updateTimerMu.Lock()
updateTimer = time.AfterFunc(reply.NextUpdate, func() {
logEmit(false)
})
updateTimerMu.Unlock()
}
return err
} }
// makePayloadAndResetBuffer prepares a payload
// by emptying the collection buffer. It returns
// the bytes of the payload to send to the server.
// Since the buffer is reset by this, if the
// resulting byte slice is lost, the payload is
// gone with it.
func makePayloadAndResetBuffer() ([]byte, error) {
// make a local pointer to the buffer, then reset
// the buffer to an empty map to clear it out
bufferMu.Lock()
bufCopy := buffer
buffer = make(map[string]interface{})
bufferItemCount = 0
bufferMu.Unlock()
// encode payload in preparation for transmission
payload := Payload{
InstanceID: instanceUUID.String(),
Timestamp: time.Now().UTC(),
Data: bufCopy,
}
return json.Marshal(payload)
}
// Response contains the body of a response from the
// diagnostics server.
type Response struct {
// NextUpdate is how long to wait before the next update.
NextUpdate time.Duration `json:"next_update"`
// Stop instructs the diagnostics server to stop sending
// diagnostics. This would only be done under extenuating
// circumstances, but we are prepared for it nonetheless.
Stop bool `json:"stop,omitempty"`
// Error will be populated with an error message, if any.
// This field should be empty if the status code is < 400.
Error string `json:"error,omitempty"`
}
// Payload is the data that gets sent to the diagnostics server.
type Payload struct {
// The universally unique ID of the instance
InstanceID string `json:"instance_id"`
// The UTC timestamp of the transmission
Timestamp time.Time `json:"timestamp"`
// The metrics
Data map[string]interface{} `json:"data,omitempty"`
}
// httpClient should be used for HTTP requests. It
// is configured with a timeout for reliability.
var httpClient = http.Client{Timeout: 1 * time.Minute}
// buffer holds the data that we are building up to send.
var buffer = make(map[string]interface{})
var bufferItemCount = 0
var bufferMu sync.RWMutex // protects both the buffer and its count
// updating is used to ensure only one
// update happens at a time.
var updating bool
var updateMu sync.Mutex
// updateTimer fires off the next update.
// If no update is scheduled, this is nil.
var updateTimer *time.Timer
var updateTimerMu sync.Mutex
// instanceUUID is the ID of the current instance.
// This MUST be set to emit diagnostics.
var instanceUUID uuid.UUID var instanceUUID uuid.UUID
// enabled indicates whether the package has
// been initialized and can be actively used.
var enabled bool
const (
// endpoint is the base URL to remote diagnostics server;
// the instance ID will be appended to it.
endpoint = "http://localhost:8081/update/"
// defaultUpdateInterval is how long to wait before emitting
// more diagnostic data. This value is only used if the
// client receives a nonsensical value, or doesn't send one
// at all, indicating a likely problem with the server. Thus,
// this value should be a long duration to help alleviate
// extra load on the server.
defaultUpdateInterval = 1 * time.Hour
// maxBufferItems is the maximum number of items we'll allow
// in the buffer before we start dropping new ones, in a
// rough (simple) attempt to keep memory use under control.
maxBufferItems = 100000
)
The MIT License (MIT)
Copyright (c) 2015 Klaus Post
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// Package cpuid provides information about the CPU running the current program.
//
// CPU features are detected on startup, and kept for fast access through the life of the application.
// Currently x86 / x64 (AMD64) is supported.
//
// You can access the CPU information by accessing the shared CPU variable of the cpuid library.
//
// Package home: https://github.com/klauspost/cpuid
package cpuid
import "strings"
// Vendor is a representation of a CPU vendor.
type Vendor int
const (
Other Vendor = iota
Intel
AMD
VIA
Transmeta
NSC
KVM // Kernel-based Virtual Machine
MSVM // Microsoft Hyper-V or Windows Virtual PC
VMware
XenHVM
)
const (
CMOV = 1 << iota // i686 CMOV
NX // NX (No-Execute) bit
AMD3DNOW // AMD 3DNOW
AMD3DNOWEXT // AMD 3DNowExt
MMX // standard MMX
MMXEXT // SSE integer functions or AMD MMX ext
SSE // SSE functions
SSE2 // P4 SSE functions
SSE3 // Prescott SSE3 functions
SSSE3 // Conroe SSSE3 functions
SSE4 // Penryn SSE4.1 functions
SSE4A // AMD Barcelona microarchitecture SSE4a instructions
SSE42 // Nehalem SSE4.2 functions
AVX // AVX functions
AVX2 // AVX2 functions
FMA3 // Intel FMA 3
FMA4 // Bulldozer FMA4 functions
XOP // Bulldozer XOP functions
F16C // Half-precision floating-point conversion
BMI1 // Bit Manipulation Instruction Set 1
BMI2 // Bit Manipulation Instruction Set 2
TBM // AMD Trailing Bit Manipulation
LZCNT // LZCNT instruction
POPCNT // POPCNT instruction
AESNI // Advanced Encryption Standard New Instructions
CLMUL // Carry-less Multiplication
HTT // Hyperthreading (enabled)
HLE // Hardware Lock Elision
RTM // Restricted Transactional Memory
RDRAND // RDRAND instruction is available
RDSEED // RDSEED instruction is available
ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
SHA // Intel SHA Extensions
AVX512F // AVX-512 Foundation
AVX512DQ // AVX-512 Doubleword and Quadword Instructions
AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
AVX512PF // AVX-512 Prefetch Instructions
AVX512ER // AVX-512 Exponential and Reciprocal Instructions
AVX512CD // AVX-512 Conflict Detection Instructions
AVX512BW // AVX-512 Byte and Word Instructions
AVX512VL // AVX-512 Vector Length Extensions
AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
MPX // Intel MPX (Memory Protection Extensions)
ERMS // Enhanced REP MOVSB/STOSB
RDTSCP // RDTSCP Instruction
CX16 // CMPXCHG16B Instruction
SGX // Software Guard Extensions
// Performance indicators
SSE2SLOW // SSE2 is supported, but usually not faster
SSE3SLOW // SSE3 is supported, but usually not faster
ATOM // Atom processor, some SSSE3 instructions are slower
)
var flagNames = map[Flags]string{
CMOV: "CMOV", // i686 CMOV
NX: "NX", // NX (No-Execute) bit
AMD3DNOW: "AMD3DNOW", // AMD 3DNOW
AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
MMX: "MMX", // Standard MMX
MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext
SSE: "SSE", // SSE functions
SSE2: "SSE2", // P4 SSE2 functions
SSE3: "SSE3", // Prescott SSE3 functions
SSSE3: "SSSE3", // Conroe SSSE3 functions
SSE4: "SSE4.1", // Penryn SSE4.1 functions
SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions
SSE42: "SSE4.2", // Nehalem SSE4.2 functions
AVX: "AVX", // AVX functions
AVX2: "AVX2", // AVX functions
FMA3: "FMA3", // Intel FMA 3
FMA4: "FMA4", // Bulldozer FMA4 functions
XOP: "XOP", // Bulldozer XOP functions
F16C: "F16C", // Half-precision floating-point conversion
BMI1: "BMI1", // Bit Manipulation Instruction Set 1
BMI2: "BMI2", // Bit Manipulation Instruction Set 2
TBM: "TBM", // AMD Trailing Bit Manipulation
LZCNT: "LZCNT", // LZCNT instruction
POPCNT: "POPCNT", // POPCNT instruction
AESNI: "AESNI", // Advanced Encryption Standard New Instructions
CLMUL: "CLMUL", // Carry-less Multiplication
HTT: "HTT", // Hyperthreading (enabled)
HLE: "HLE", // Hardware Lock Elision
RTM: "RTM", // Restricted Transactional Memory
RDRAND: "RDRAND", // RDRAND instruction is available
RDSEED: "RDSEED", // RDSEED instruction is available
ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
SHA: "SHA", // Intel SHA Extensions
AVX512F: "AVX512F", // AVX-512 Foundation
AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions
AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions
AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions
AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions
AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions
AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions
AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions
AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions
MPX: "MPX", // Intel MPX (Memory Protection Extensions)
ERMS: "ERMS", // Enhanced REP MOVSB/STOSB
RDTSCP: "RDTSCP", // RDTSCP Instruction
CX16: "CX16", // CMPXCHG16B Instruction
SGX: "SGX", // Software Guard Extensions
// Performance indicators
SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower
}
// CPUInfo contains information about the detected system CPU.
type CPUInfo struct {
BrandName string // Brand name reported by the CPU
VendorID Vendor // Comparable CPU vendor ID
Features Flags // Features of the CPU
PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
Family int // CPU family number
Model int // CPU model number
CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
Cache struct {
L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
L2 int // L2 Cache (per core or shared). Will be -1 if undetected
L3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
}
SGX SGXSupport
maxFunc uint32
maxExFunc uint32
}
var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
var xgetbv func(index uint32) (eax, edx uint32)
var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
// CPU contains information about the CPU as detected on startup,
// or when Detect last was called.
//
// Use this as the primary entry point to you data,
// this way queries are
var CPU CPUInfo
func init() {
initCPU()
Detect()
}
// Detect will re-detect current CPU info.
// This will replace the content of the exported CPU variable.
//
// Unless you expect the CPU to change while you are running your program
// you should not need to call this function.
// If you call this, you must ensure that no other goroutine is accessing the
// exported CPU variable.
func Detect() {
CPU.maxFunc = maxFunctionID()
CPU.maxExFunc = maxExtendedFunction()
CPU.BrandName = brandName()
CPU.CacheLine = cacheLine()
CPU.Family, CPU.Model = familyModel()
CPU.Features = support()
CPU.SGX = hasSGX(CPU.Features&SGX != 0)
CPU.ThreadsPerCore = threadsPerCore()
CPU.LogicalCores = logicalCores()
CPU.PhysicalCores = physicalCores()
CPU.VendorID = vendorID()
CPU.cacheSize()
}
// Generated here: http://play.golang.org/p/BxFH2Gdc0G
// Cmov indicates support of CMOV instructions
func (c CPUInfo) Cmov() bool {
return c.Features&CMOV != 0
}
// Amd3dnow indicates support of AMD 3DNOW! instructions
func (c CPUInfo) Amd3dnow() bool {
return c.Features&AMD3DNOW != 0
}
// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
func (c CPUInfo) Amd3dnowExt() bool {
return c.Features&AMD3DNOWEXT != 0
}
// MMX indicates support of MMX instructions
func (c CPUInfo) MMX() bool {
return c.Features&MMX != 0
}
// MMXExt indicates support of MMXEXT instructions
// (SSE integer functions or AMD MMX ext)
func (c CPUInfo) MMXExt() bool {
return c.Features&MMXEXT != 0
}
// SSE indicates support of SSE instructions
func (c CPUInfo) SSE() bool {
return c.Features&SSE != 0
}
// SSE2 indicates support of SSE 2 instructions
func (c CPUInfo) SSE2() bool {
return c.Features&SSE2 != 0
}
// SSE3 indicates support of SSE 3 instructions
func (c CPUInfo) SSE3() bool {
return c.Features&SSE3 != 0
}
// SSSE3 indicates support of SSSE 3 instructions
func (c CPUInfo) SSSE3() bool {
return c.Features&SSSE3 != 0
}
// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
func (c CPUInfo) SSE4() bool {
return c.Features&SSE4 != 0
}
// SSE42 indicates support of SSE4.2 instructions
func (c CPUInfo) SSE42() bool {
return c.Features&SSE42 != 0
}
// AVX indicates support of AVX instructions
// and operating system support of AVX instructions
func (c CPUInfo) AVX() bool {
return c.Features&AVX != 0
}
// AVX2 indicates support of AVX2 instructions
func (c CPUInfo) AVX2() bool {
return c.Features&AVX2 != 0
}
// FMA3 indicates support of FMA3 instructions
func (c CPUInfo) FMA3() bool {
return c.Features&FMA3 != 0
}
// FMA4 indicates support of FMA4 instructions
func (c CPUInfo) FMA4() bool {
return c.Features&FMA4 != 0
}
// XOP indicates support of XOP instructions
func (c CPUInfo) XOP() bool {
return c.Features&XOP != 0
}
// F16C indicates support of F16C instructions
func (c CPUInfo) F16C() bool {
return c.Features&F16C != 0
}
// BMI1 indicates support of BMI1 instructions
func (c CPUInfo) BMI1() bool {
return c.Features&BMI1 != 0
}
// BMI2 indicates support of BMI2 instructions
func (c CPUInfo) BMI2() bool {
return c.Features&BMI2 != 0
}
// TBM indicates support of TBM instructions
// (AMD Trailing Bit Manipulation)
func (c CPUInfo) TBM() bool {
return c.Features&TBM != 0
}
// Lzcnt indicates support of LZCNT instruction
func (c CPUInfo) Lzcnt() bool {
return c.Features&LZCNT != 0
}
// Popcnt indicates support of POPCNT instruction
func (c CPUInfo) Popcnt() bool {
return c.Features&POPCNT != 0
}
// HTT indicates the processor has Hyperthreading enabled
func (c CPUInfo) HTT() bool {
return c.Features&HTT != 0
}
// SSE2Slow indicates that SSE2 may be slow on this processor
func (c CPUInfo) SSE2Slow() bool {
return c.Features&SSE2SLOW != 0
}
// SSE3Slow indicates that SSE3 may be slow on this processor
func (c CPUInfo) SSE3Slow() bool {
return c.Features&SSE3SLOW != 0
}
// AesNi indicates support of AES-NI instructions
// (Advanced Encryption Standard New Instructions)
func (c CPUInfo) AesNi() bool {
return c.Features&AESNI != 0
}
// Clmul indicates support of CLMUL instructions
// (Carry-less Multiplication)
func (c CPUInfo) Clmul() bool {
return c.Features&CLMUL != 0
}
// NX indicates support of NX (No-Execute) bit
func (c CPUInfo) NX() bool {
return c.Features&NX != 0
}
// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
func (c CPUInfo) SSE4A() bool {
return c.Features&SSE4A != 0
}
// HLE indicates support of Hardware Lock Elision
func (c CPUInfo) HLE() bool {
return c.Features&HLE != 0
}
// RTM indicates support of Restricted Transactional Memory
func (c CPUInfo) RTM() bool {
return c.Features&RTM != 0
}
// Rdrand indicates support of RDRAND instruction is available
func (c CPUInfo) Rdrand() bool {
return c.Features&RDRAND != 0
}
// Rdseed indicates support of RDSEED instruction is available
func (c CPUInfo) Rdseed() bool {
return c.Features&RDSEED != 0
}
// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
func (c CPUInfo) ADX() bool {
return c.Features&ADX != 0
}
// SHA indicates support of Intel SHA Extensions
func (c CPUInfo) SHA() bool {
return c.Features&SHA != 0
}
// AVX512F indicates support of AVX-512 Foundation
func (c CPUInfo) AVX512F() bool {
return c.Features&AVX512F != 0
}
// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
func (c CPUInfo) AVX512DQ() bool {
return c.Features&AVX512DQ != 0
}
// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
func (c CPUInfo) AVX512IFMA() bool {
return c.Features&AVX512IFMA != 0
}
// AVX512PF indicates support of AVX-512 Prefetch Instructions
func (c CPUInfo) AVX512PF() bool {
return c.Features&AVX512PF != 0
}
// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
func (c CPUInfo) AVX512ER() bool {
return c.Features&AVX512ER != 0
}
// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
func (c CPUInfo) AVX512CD() bool {
return c.Features&AVX512CD != 0
}
// AVX512BW indicates support of AVX-512 Byte and Word Instructions
func (c CPUInfo) AVX512BW() bool {
return c.Features&AVX512BW != 0
}
// AVX512VL indicates support of AVX-512 Vector Length Extensions
func (c CPUInfo) AVX512VL() bool {
return c.Features&AVX512VL != 0
}
// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
func (c CPUInfo) AVX512VBMI() bool {
return c.Features&AVX512VBMI != 0
}
// MPX indicates support of Intel MPX (Memory Protection Extensions)
func (c CPUInfo) MPX() bool {
return c.Features&MPX != 0
}
// ERMS indicates support of Enhanced REP MOVSB/STOSB
func (c CPUInfo) ERMS() bool {
return c.Features&ERMS != 0
}
// RDTSCP Instruction is available.
func (c CPUInfo) RDTSCP() bool {
return c.Features&RDTSCP != 0
}
// CX16 indicates if CMPXCHG16B instruction is available.
func (c CPUInfo) CX16() bool {
return c.Features&CX16 != 0
}
// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
// So TSX simply checks that.
func (c CPUInfo) TSX() bool {
return c.Features&(MPX|RTM) == MPX|RTM
}
// Atom indicates an Atom processor
func (c CPUInfo) Atom() bool {
return c.Features&ATOM != 0
}
// Intel returns true if vendor is recognized as Intel
func (c CPUInfo) Intel() bool {
return c.VendorID == Intel
}
// AMD returns true if vendor is recognized as AMD
func (c CPUInfo) AMD() bool {
return c.VendorID == AMD
}
// Transmeta returns true if vendor is recognized as Transmeta
func (c CPUInfo) Transmeta() bool {
return c.VendorID == Transmeta
}
// NSC returns true if vendor is recognized as National Semiconductor
func (c CPUInfo) NSC() bool {
return c.VendorID == NSC
}
// VIA returns true if vendor is recognized as VIA
func (c CPUInfo) VIA() bool {
return c.VendorID == VIA
}
// RTCounter returns the 64-bit time-stamp counter
// Uses the RDTSCP instruction. The value 0 is returned
// if the CPU does not support the instruction.
func (c CPUInfo) RTCounter() uint64 {
if !c.RDTSCP() {
return 0
}
a, _, _, d := rdtscpAsm()
return uint64(a) | (uint64(d) << 32)
}
// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
// This variable is OS dependent, but on Linux contains information
// about the current cpu/core the code is running on.
// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
func (c CPUInfo) Ia32TscAux() uint32 {
if !c.RDTSCP() {
return 0
}
_, _, ecx, _ := rdtscpAsm()
return ecx
}
// LogicalCPU will return the Logical CPU the code is currently executing on.
// This is likely to change when the OS re-schedules the running thread
// to another CPU.
// If the current core cannot be detected, -1 will be returned.
func (c CPUInfo) LogicalCPU() int {
if c.maxFunc < 1 {
return -1
}
_, ebx, _, _ := cpuid(1)
return int(ebx >> 24)
}
// VM Will return true if the cpu id indicates we are in
// a virtual machine. This is only a hint, and will very likely
// have many false negatives.
func (c CPUInfo) VM() bool {
switch c.VendorID {
case MSVM, KVM, VMware, XenHVM:
return true
}
return false
}
// Flags contains detected cpu features and caracteristics
type Flags uint64
// String returns a string representation of the detected
// CPU features.
func (f Flags) String() string {
return strings.Join(f.Strings(), ",")
}
// Strings returns and array of the detected features.
func (f Flags) Strings() []string {
s := support()
r := make([]string, 0, 20)
for i := uint(0); i < 64; i++ {
key := Flags(1 << i)
val := flagNames[key]
if s&key != 0 {
r = append(r, val)
}
}
return r
}
func maxExtendedFunction() uint32 {
eax, _, _, _ := cpuid(0x80000000)
return eax
}
func maxFunctionID() uint32 {
a, _, _, _ := cpuid(0)
return a
}
func brandName() string {
if maxExtendedFunction() >= 0x80000004 {
v := make([]uint32, 0, 48)
for i := uint32(0); i < 3; i++ {
a, b, c, d := cpuid(0x80000002 + i)
v = append(v, a, b, c, d)
}
return strings.Trim(string(valAsString(v...)), " ")
}
return "unknown"
}
func threadsPerCore() int {
mfi := maxFunctionID()
if mfi < 0x4 || vendorID() != Intel {
return 1
}
if mfi < 0xb {
_, b, _, d := cpuid(1)
if (d & (1 << 28)) != 0 {
// v will contain logical core count
v := (b >> 16) & 255
if v > 1 {
a4, _, _, _ := cpuid(4)
// physical cores
v2 := (a4 >> 26) + 1
if v2 > 0 {
return int(v) / int(v2)
}
}
}
return 1
}
_, b, _, _ := cpuidex(0xb, 0)
if b&0xffff == 0 {
return 1
}
return int(b & 0xffff)
}
func logicalCores() int {
mfi := maxFunctionID()
switch vendorID() {
case Intel:
// Use this on old Intel processors
if mfi < 0xb {
if mfi < 1 {
return 0
}
// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
// that can be assigned to logical processors in a physical package.
// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
_, ebx, _, _ := cpuid(1)
logical := (ebx >> 16) & 0xff
return int(logical)
}
_, b, _, _ := cpuidex(0xb, 1)
return int(b & 0xffff)
case AMD:
_, b, _, _ := cpuid(1)
return int((b >> 16) & 0xff)
default:
return 0
}
}
func familyModel() (int, int) {
if maxFunctionID() < 0x1 {
return 0, 0
}
eax, _, _, _ := cpuid(1)
family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
return int(family), int(model)
}
func physicalCores() int {
switch vendorID() {
case Intel:
return logicalCores() / threadsPerCore()
case AMD:
if maxExtendedFunction() >= 0x80000008 {
_, _, c, _ := cpuid(0x80000008)
return int(c&0xff) + 1
}
}
return 0
}
// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
var vendorMapping = map[string]Vendor{
"AMDisbetter!": AMD,
"AuthenticAMD": AMD,
"CentaurHauls": VIA,
"GenuineIntel": Intel,
"TransmetaCPU": Transmeta,
"GenuineTMx86": Transmeta,
"Geode by NSC": NSC,
"VIA VIA VIA ": VIA,
"KVMKVMKVMKVM": KVM,
"Microsoft Hv": MSVM,
"VMwareVMware": VMware,
"XenVMMXenVMM": XenHVM,
}
func vendorID() Vendor {
_, b, c, d := cpuid(0)
v := valAsString(b, d, c)
vend, ok := vendorMapping[string(v)]
if !ok {
return Other
}
return vend
}
func cacheLine() int {
if maxFunctionID() < 0x1 {
return 0
}
_, ebx, _, _ := cpuid(1)
cache := (ebx & 0xff00) >> 5 // cflush size
if cache == 0 && maxExtendedFunction() >= 0x80000006 {
_, _, ecx, _ := cpuid(0x80000006)
cache = ecx & 0xff // cacheline size
}
// TODO: Read from Cache and TLB Information
return int(cache)
}
func (c *CPUInfo) cacheSize() {
c.Cache.L1D = -1
c.Cache.L1I = -1
c.Cache.L2 = -1
c.Cache.L3 = -1
vendor := vendorID()
switch vendor {
case Intel:
if maxFunctionID() < 4 {
return
}
for i := uint32(0); ; i++ {
eax, ebx, ecx, _ := cpuidex(4, i)
cacheType := eax & 15
if cacheType == 0 {
break
}
cacheLevel := (eax >> 5) & 7
coherency := int(ebx&0xfff) + 1
partitions := int((ebx>>12)&0x3ff) + 1
associativity := int((ebx>>22)&0x3ff) + 1
sets := int(ecx) + 1
size := associativity * partitions * coherency * sets
switch cacheLevel {
case 1:
if cacheType == 1 {
// 1 = Data Cache
c.Cache.L1D = size
} else if cacheType == 2 {
// 2 = Instruction Cache
c.Cache.L1I = size
} else {
if c.Cache.L1D < 0 {
c.Cache.L1I = size
}
if c.Cache.L1I < 0 {
c.Cache.L1I = size
}
}
case 2:
c.Cache.L2 = size
case 3:
c.Cache.L3 = size
}
}
case AMD:
// Untested.
if maxExtendedFunction() < 0x80000005 {
return
}
_, _, ecx, edx := cpuid(0x80000005)
c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
if maxExtendedFunction() < 0x80000006 {
return
}
_, _, ecx, _ = cpuid(0x80000006)
c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
}
return
}
type SGXSupport struct {
Available bool
SGX1Supported bool
SGX2Supported bool
MaxEnclaveSizeNot64 int64
MaxEnclaveSize64 int64
}
func hasSGX(available bool) (rval SGXSupport) {
rval.Available = available
if !available {
return
}
a, _, _, d := cpuidex(0x12, 0)
rval.SGX1Supported = a&0x01 != 0
rval.SGX2Supported = a&0x02 != 0
rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
return
}
func support() Flags {
mfi := maxFunctionID()
vend := vendorID()
if mfi < 0x1 {
return 0
}
rval := uint64(0)
_, _, c, d := cpuid(1)
if (d & (1 << 15)) != 0 {
rval |= CMOV
}
if (d & (1 << 23)) != 0 {
rval |= MMX
}
if (d & (1 << 25)) != 0 {
rval |= MMXEXT
}
if (d & (1 << 25)) != 0 {
rval |= SSE
}
if (d & (1 << 26)) != 0 {
rval |= SSE2
}
if (c & 1) != 0 {
rval |= SSE3
}
if (c & 0x00000200) != 0 {
rval |= SSSE3
}
if (c & 0x00080000) != 0 {
rval |= SSE4
}
if (c & 0x00100000) != 0 {
rval |= SSE42
}
if (c & (1 << 25)) != 0 {
rval |= AESNI
}
if (c & (1 << 1)) != 0 {
rval |= CLMUL
}
if c&(1<<23) != 0 {
rval |= POPCNT
}
if c&(1<<30) != 0 {
rval |= RDRAND
}
if c&(1<<29) != 0 {
rval |= F16C
}
if c&(1<<13) != 0 {
rval |= CX16
}
if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
if threadsPerCore() > 1 {
rval |= HTT
}
}
// Check XGETBV, OXSAVE and AVX bits
if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
// Check for OS support
eax, _ := xgetbv(0)
if (eax & 0x6) == 0x6 {
rval |= AVX
if (c & 0x00001000) != 0 {
rval |= FMA3
}
}
}
// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
if mfi >= 7 {
_, ebx, ecx, _ := cpuidex(7, 0)
if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
rval |= AVX2
}
if (ebx & 0x00000008) != 0 {
rval |= BMI1
if (ebx & 0x00000100) != 0 {
rval |= BMI2
}
}
if ebx&(1<<2) != 0 {
rval |= SGX
}
if ebx&(1<<4) != 0 {
rval |= HLE
}
if ebx&(1<<9) != 0 {
rval |= ERMS
}
if ebx&(1<<11) != 0 {
rval |= RTM
}
if ebx&(1<<14) != 0 {
rval |= MPX
}
if ebx&(1<<18) != 0 {
rval |= RDSEED
}
if ebx&(1<<19) != 0 {
rval |= ADX
}
if ebx&(1<<29) != 0 {
rval |= SHA
}
// Only detect AVX-512 features if XGETBV is supported
if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
// Check for OS support
eax, _ := xgetbv(0)
// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
// ZMM16-ZMM31 state are enabled by OS)
/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
if ebx&(1<<16) != 0 {
rval |= AVX512F
}
if ebx&(1<<17) != 0 {
rval |= AVX512DQ
}
if ebx&(1<<21) != 0 {
rval |= AVX512IFMA
}
if ebx&(1<<26) != 0 {
rval |= AVX512PF
}
if ebx&(1<<27) != 0 {
rval |= AVX512ER
}
if ebx&(1<<28) != 0 {
rval |= AVX512CD
}
if ebx&(1<<30) != 0 {
rval |= AVX512BW
}
if ebx&(1<<31) != 0 {
rval |= AVX512VL
}
// ecx
if ecx&(1<<1) != 0 {
rval |= AVX512VBMI
}
}
}
}
if maxExtendedFunction() >= 0x80000001 {
_, _, c, d := cpuid(0x80000001)
if (c & (1 << 5)) != 0 {
rval |= LZCNT
rval |= POPCNT
}
if (d & (1 << 31)) != 0 {
rval |= AMD3DNOW
}
if (d & (1 << 30)) != 0 {
rval |= AMD3DNOWEXT
}
if (d & (1 << 23)) != 0 {
rval |= MMX
}
if (d & (1 << 22)) != 0 {
rval |= MMXEXT
}
if (c & (1 << 6)) != 0 {
rval |= SSE4A
}
if d&(1<<20) != 0 {
rval |= NX
}
if d&(1<<27) != 0 {
rval |= RDTSCP
}
/* Allow for selectively disabling SSE2 functions on AMD processors
with SSE2 support but not SSE4a. This includes Athlon64, some
Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
than SSE2 often enough to utilize this special-case flag.
AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
so that SSE2 is used unless explicitly disabled by checking
AV_CPU_FLAG_SSE2SLOW. */
if vendorID() != Intel &&
rval&SSE2 != 0 && (c&0x00000040) == 0 {
rval |= SSE2SLOW
}
/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
* used unless the OS has AVX support. */
if (rval & AVX) != 0 {
if (c & 0x00000800) != 0 {
rval |= XOP
}
if (c & 0x00010000) != 0 {
rval |= FMA4
}
}
if vendorID() == Intel {
family, model := familyModel()
if family == 6 && (model == 9 || model == 13 || model == 14) {
/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
* 6/14 (core1 "yonah") theoretically support sse2, but it's
* usually slower than mmx. */
if (rval & SSE2) != 0 {
rval |= SSE2SLOW
}
if (rval & SSE3) != 0 {
rval |= SSE3SLOW
}
}
/* The Atom processor has SSSE3 support, which is useful in many cases,
* but sometimes the SSSE3 version is slower than the SSE2 equivalent
* on the Atom, but is generally faster on other processors supporting
* SSSE3. This flag allows for selectively disabling certain SSSE3
* functions on the Atom. */
if family == 6 && model == 28 {
rval |= ATOM
}
}
}
return Flags(rval)
}
func valAsString(values ...uint32) []byte {
r := make([]byte, 4*len(values))
for i, v := range values {
dst := r[i*4:]
dst[0] = byte(v & 0xff)
dst[1] = byte((v >> 8) & 0xff)
dst[2] = byte((v >> 16) & 0xff)
dst[3] = byte((v >> 24) & 0xff)
switch {
case dst[0] == 0:
return r[:i*4]
case dst[1] == 0:
return r[:i*4+1]
case dst[2] == 0:
return r[:i*4+2]
case dst[3] == 0:
return r[:i*4+3]
}
}
return r
}
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build 386,!gccgo
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuid(SB), 7, $0
XORL CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+4(FP)
MOVL BX, ebx+8(FP)
MOVL CX, ecx+12(FP)
MOVL DX, edx+16(FP)
RET
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv(index uint32) (eax, edx uint32)
TEXT ·asmXgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+4(FP)
MOVL DX, edx+8(FP)
RET
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
TEXT ·asmRdtscpAsm(SB), 7, $0
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
MOVL AX, eax+0(FP)
MOVL BX, ebx+4(FP)
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build amd64,!gccgo
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuid(SB), 7, $0
XORQ CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func asmXgetbv(index uint32) (eax, edx uint32)
TEXT ·asmXgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+8(FP)
MOVL DX, edx+12(FP)
RET
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
TEXT ·asmRdtscpAsm(SB), 7, $0
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
MOVL AX, eax+0(FP)
MOVL BX, ebx+4(FP)
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build 386,!gccgo amd64,!gccgo
package cpuid
func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
func asmXgetbv(index uint32) (eax, edx uint32)
func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
func initCPU() {
cpuid = asmCpuid
cpuidex = asmCpuidex
xgetbv = asmXgetbv
rdtscpAsm = asmRdtscpAsm
}
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build !amd64,!386 gccgo
package cpuid
func initCPU() {
cpuid = func(op uint32) (eax, ebx, ecx, edx uint32) {
return 0, 0, 0, 0
}
cpuidex = func(op, op2 uint32) (eax, ebx, ecx, edx uint32) {
return 0, 0, 0, 0
}
xgetbv = func(index uint32) (eax, edx uint32) {
return 0, 0
}
rdtscpAsm = func() (eax, ebx, ecx, edx uint32) {
return 0, 0, 0, 0
}
}
package cpuid
//go:generate go run private-gen.go
//go:generate gofmt -w ./private
// +build ignore
package main
import (
"bytes"
"fmt"
"go/ast"
"go/parser"
"go/printer"
"go/token"
"io"
"io/ioutil"
"log"
"os"
"reflect"
"strings"
"unicode"
"unicode/utf8"
)
var inFiles = []string{"cpuid.go", "cpuid_test.go"}
var copyFiles = []string{"cpuid_amd64.s", "cpuid_386.s", "detect_ref.go", "detect_intel.go"}
var fileSet = token.NewFileSet()
var reWrites = []rewrite{
initRewrite("CPUInfo -> cpuInfo"),
initRewrite("Vendor -> vendor"),
initRewrite("Flags -> flags"),
initRewrite("Detect -> detect"),
initRewrite("CPU -> cpu"),
}
var excludeNames = map[string]bool{"string": true, "join": true, "trim": true,
// cpuid_test.go
"t": true, "println": true, "logf": true, "log": true, "fatalf": true, "fatal": true,
}
var excludePrefixes = []string{"test", "benchmark"}
func main() {
Package := "private"
parserMode := parser.ParseComments
exported := make(map[string]rewrite)
for _, file := range inFiles {
in, err := os.Open(file)
if err != nil {
log.Fatalf("opening input", err)
}
src, err := ioutil.ReadAll(in)
if err != nil {
log.Fatalf("reading input", err)
}
astfile, err := parser.ParseFile(fileSet, file, src, parserMode)
if err != nil {
log.Fatalf("parsing input", err)
}
for _, rw := range reWrites {
astfile = rw(astfile)
}
// Inspect the AST and print all identifiers and literals.
var startDecl token.Pos
var endDecl token.Pos
ast.Inspect(astfile, func(n ast.Node) bool {
var s string
switch x := n.(type) {
case *ast.Ident:
if x.IsExported() {
t := strings.ToLower(x.Name)
for _, pre := range excludePrefixes {
if strings.HasPrefix(t, pre) {
return true
}
}
if excludeNames[t] != true {
//if x.Pos() > startDecl && x.Pos() < endDecl {
exported[x.Name] = initRewrite(x.Name + " -> " + t)
}
}
case *ast.GenDecl:
if x.Tok == token.CONST && x.Lparen > 0 {
startDecl = x.Lparen
endDecl = x.Rparen
// fmt.Printf("Decl:%s -> %s\n", fileSet.Position(startDecl), fileSet.Position(endDecl))
}
}
if s != "" {
fmt.Printf("%s:\t%s\n", fileSet.Position(n.Pos()), s)
}
return true
})
for _, rw := range exported {
astfile = rw(astfile)
}
var buf bytes.Buffer
printer.Fprint(&buf, fileSet, astfile)
// Remove package documentation and insert information
s := buf.String()
ind := strings.Index(buf.String(), "\npackage cpuid")
s = s[ind:]
s = "// Generated, DO NOT EDIT,\n" +
"// but copy it to your own project and rename the package.\n" +
"// See more at http://github.com/klauspost/cpuid\n" +
s
outputName := Package + string(os.PathSeparator) + file
err = ioutil.WriteFile(outputName, []byte(s), 0644)
if err != nil {
log.Fatalf("writing output: %s", err)
}
log.Println("Generated", outputName)
}
for _, file := range copyFiles {
dst := ""
if strings.HasPrefix(file, "cpuid") {
dst = Package + string(os.PathSeparator) + file
} else {
dst = Package + string(os.PathSeparator) + "cpuid_" + file
}
err := copyFile(file, dst)
if err != nil {
log.Fatalf("copying file: %s", err)
}
log.Println("Copied", dst)
}
}
// CopyFile copies a file from src to dst. If src and dst files exist, and are
// the same, then return success. Copy the file contents from src to dst.
func copyFile(src, dst string) (err error) {
sfi, err := os.Stat(src)
if err != nil {
return
}
if !sfi.Mode().IsRegular() {
// cannot copy non-regular files (e.g., directories,
// symlinks, devices, etc.)
return fmt.Errorf("CopyFile: non-regular source file %s (%q)", sfi.Name(), sfi.Mode().String())
}
dfi, err := os.Stat(dst)
if err != nil {
if !os.IsNotExist(err) {
return
}
} else {
if !(dfi.Mode().IsRegular()) {
return fmt.Errorf("CopyFile: non-regular destination file %s (%q)", dfi.Name(), dfi.Mode().String())
}
if os.SameFile(sfi, dfi) {
return
}
}
err = copyFileContents(src, dst)
return
}
// copyFileContents copies the contents of the file named src to the file named
// by dst. The file will be created if it does not already exist. If the
// destination file exists, all it's contents will be replaced by the contents
// of the source file.
func copyFileContents(src, dst string) (err error) {
in, err := os.Open(src)
if err != nil {
return
}
defer in.Close()
out, err := os.Create(dst)
if err != nil {
return
}
defer func() {
cerr := out.Close()
if err == nil {
err = cerr
}
}()
if _, err = io.Copy(out, in); err != nil {
return
}
err = out.Sync()
return
}
type rewrite func(*ast.File) *ast.File
// Mostly copied from gofmt
func initRewrite(rewriteRule string) rewrite {
f := strings.Split(rewriteRule, "->")
if len(f) != 2 {
fmt.Fprintf(os.Stderr, "rewrite rule must be of the form 'pattern -> replacement'\n")
os.Exit(2)
}
pattern := parseExpr(f[0], "pattern")
replace := parseExpr(f[1], "replacement")
return func(p *ast.File) *ast.File { return rewriteFile(pattern, replace, p) }
}
// parseExpr parses s as an expression.
// It might make sense to expand this to allow statement patterns,
// but there are problems with preserving formatting and also
// with what a wildcard for a statement looks like.
func parseExpr(s, what string) ast.Expr {
x, err := parser.ParseExpr(s)
if err != nil {
fmt.Fprintf(os.Stderr, "parsing %s %s at %s\n", what, s, err)
os.Exit(2)
}
return x
}
// Keep this function for debugging.
/*
func dump(msg string, val reflect.Value) {
fmt.Printf("%s:\n", msg)
ast.Print(fileSet, val.Interface())
fmt.Println()
}
*/
// rewriteFile applies the rewrite rule 'pattern -> replace' to an entire file.
func rewriteFile(pattern, replace ast.Expr, p *ast.File) *ast.File {
cmap := ast.NewCommentMap(fileSet, p, p.Comments)
m := make(map[string]reflect.Value)
pat := reflect.ValueOf(pattern)
repl := reflect.ValueOf(replace)
var rewriteVal func(val reflect.Value) reflect.Value
rewriteVal = func(val reflect.Value) reflect.Value {
// don't bother if val is invalid to start with
if !val.IsValid() {
return reflect.Value{}
}
for k := range m {
delete(m, k)
}
val = apply(rewriteVal, val)
if match(m, pat, val) {
val = subst(m, repl, reflect.ValueOf(val.Interface().(ast.Node).Pos()))
}
return val
}
r := apply(rewriteVal, reflect.ValueOf(p)).Interface().(*ast.File)
r.Comments = cmap.Filter(r).Comments() // recreate comments list
return r
}
// set is a wrapper for x.Set(y); it protects the caller from panics if x cannot be changed to y.
func set(x, y reflect.Value) {
// don't bother if x cannot be set or y is invalid
if !x.CanSet() || !y.IsValid() {
return
}
defer func() {
if x := recover(); x != nil {
if s, ok := x.(string); ok &&
(strings.Contains(s, "type mismatch") || strings.Contains(s, "not assignable")) {
// x cannot be set to y - ignore this rewrite
return
}
panic(x)
}
}()
x.Set(y)
}
// Values/types for special cases.
var (
objectPtrNil = reflect.ValueOf((*ast.Object)(nil))
scopePtrNil = reflect.ValueOf((*ast.Scope)(nil))
identType = reflect.TypeOf((*ast.Ident)(nil))
objectPtrType = reflect.TypeOf((*ast.Object)(nil))
positionType = reflect.TypeOf(token.NoPos)
callExprType = reflect.TypeOf((*ast.CallExpr)(nil))
scopePtrType = reflect.TypeOf((*ast.Scope)(nil))
)
// apply replaces each AST field x in val with f(x), returning val.
// To avoid extra conversions, f operates on the reflect.Value form.
func apply(f func(reflect.Value) reflect.Value, val reflect.Value) reflect.Value {
if !val.IsValid() {
return reflect.Value{}
}
// *ast.Objects introduce cycles and are likely incorrect after
// rewrite; don't follow them but replace with nil instead
if val.Type() == objectPtrType {
return objectPtrNil
}
// similarly for scopes: they are likely incorrect after a rewrite;
// replace them with nil
if val.Type() == scopePtrType {
return scopePtrNil
}
switch v := reflect.Indirect(val); v.Kind() {
case reflect.Slice:
for i := 0; i < v.Len(); i++ {
e := v.Index(i)
set(e, f(e))
}
case reflect.Struct:
for i := 0; i < v.NumField(); i++ {
e := v.Field(i)
set(e, f(e))
}
case reflect.Interface:
e := v.Elem()
set(v, f(e))
}
return val
}
func isWildcard(s string) bool {
rune, size := utf8.DecodeRuneInString(s)
return size == len(s) && unicode.IsLower(rune)
}
// match returns true if pattern matches val,
// recording wildcard submatches in m.
// If m == nil, match checks whether pattern == val.
func match(m map[string]reflect.Value, pattern, val reflect.Value) bool {
// Wildcard matches any expression. If it appears multiple
// times in the pattern, it must match the same expression
// each time.
if m != nil && pattern.IsValid() && pattern.Type() == identType {
name := pattern.Interface().(*ast.Ident).Name
if isWildcard(name) && val.IsValid() {
// wildcards only match valid (non-nil) expressions.
if _, ok := val.Interface().(ast.Expr); ok && !val.IsNil() {
if old, ok := m[name]; ok {
return match(nil, old, val)
}
m[name] = val
return true
}
}
}
// Otherwise, pattern and val must match recursively.
if !pattern.IsValid() || !val.IsValid() {
return !pattern.IsValid() && !val.IsValid()
}
if pattern.Type() != val.Type() {
return false
}
// Special cases.
switch pattern.Type() {
case identType:
// For identifiers, only the names need to match
// (and none of the other *ast.Object information).
// This is a common case, handle it all here instead
// of recursing down any further via reflection.
p := pattern.Interface().(*ast.Ident)
v := val.Interface().(*ast.Ident)
return p == nil && v == nil || p != nil && v != nil && p.Name == v.Name
case objectPtrType, positionType:
// object pointers and token positions always match
return true
case callExprType:
// For calls, the Ellipsis fields (token.Position) must
// match since that is how f(x) and f(x...) are different.
// Check them here but fall through for the remaining fields.
p := pattern.Interface().(*ast.CallExpr)
v := val.Interface().(*ast.CallExpr)
if p.Ellipsis.IsValid() != v.Ellipsis.IsValid() {
return false
}
}
p := reflect.Indirect(pattern)
v := reflect.Indirect(val)
if !p.IsValid() || !v.IsValid() {
return !p.IsValid() && !v.IsValid()
}
switch p.Kind() {
case reflect.Slice:
if p.Len() != v.Len() {
return false
}
for i := 0; i < p.Len(); i++ {
if !match(m, p.Index(i), v.Index(i)) {
return false
}
}
return true
case reflect.Struct:
for i := 0; i < p.NumField(); i++ {
if !match(m, p.Field(i), v.Field(i)) {
return false
}
}
return true
case reflect.Interface:
return match(m, p.Elem(), v.Elem())
}
// Handle token integers, etc.
return p.Interface() == v.Interface()
}
// subst returns a copy of pattern with values from m substituted in place
// of wildcards and pos used as the position of tokens from the pattern.
// if m == nil, subst returns a copy of pattern and doesn't change the line
// number information.
func subst(m map[string]reflect.Value, pattern reflect.Value, pos reflect.Value) reflect.Value {
if !pattern.IsValid() {
return reflect.Value{}
}
// Wildcard gets replaced with map value.
if m != nil && pattern.Type() == identType {
name := pattern.Interface().(*ast.Ident).Name
if isWildcard(name) {
if old, ok := m[name]; ok {
return subst(nil, old, reflect.Value{})
}
}
}
if pos.IsValid() && pattern.Type() == positionType {
// use new position only if old position was valid in the first place
if old := pattern.Interface().(token.Pos); !old.IsValid() {
return pattern
}
return pos
}
// Otherwise copy.
switch p := pattern; p.Kind() {
case reflect.Slice:
v := reflect.MakeSlice(p.Type(), p.Len(), p.Len())
for i := 0; i < p.Len(); i++ {
v.Index(i).Set(subst(m, p.Index(i), pos))
}
return v
case reflect.Struct:
v := reflect.New(p.Type()).Elem()
for i := 0; i < p.NumField(); i++ {
v.Field(i).Set(subst(m, p.Field(i), pos))
}
return v
case reflect.Ptr:
v := reflect.New(p.Type()).Elem()
if elem := p.Elem(); elem.IsValid() {
v.Set(subst(m, elem, pos).Addr())
}
return v
case reflect.Interface:
v := reflect.New(p.Type()).Elem()
if elem := p.Elem(); elem.IsValid() {
v.Set(subst(m, elem, pos))
}
return v
}
return pattern
}
// Generated, DO NOT EDIT,
// but copy it to your own project and rename the package.
// See more at http://github.com/klauspost/cpuid
package cpuid
import "strings"
// Vendor is a representation of a CPU vendor.
type vendor int
const (
other vendor = iota
intel
amd
via
transmeta
nsc
kvm // Kernel-based Virtual Machine
msvm // Microsoft Hyper-V or Windows Virtual PC
vmware
xenhvm
)
const (
cmov = 1 << iota // i686 CMOV
nx // NX (No-Execute) bit
amd3dnow // AMD 3DNOW
amd3dnowext // AMD 3DNowExt
mmx // standard MMX
mmxext // SSE integer functions or AMD MMX ext
sse // SSE functions
sse2 // P4 SSE functions
sse3 // Prescott SSE3 functions
ssse3 // Conroe SSSE3 functions
sse4 // Penryn SSE4.1 functions
sse4a // AMD Barcelona microarchitecture SSE4a instructions
sse42 // Nehalem SSE4.2 functions
avx // AVX functions
avx2 // AVX2 functions
fma3 // Intel FMA 3
fma4 // Bulldozer FMA4 functions
xop // Bulldozer XOP functions
f16c // Half-precision floating-point conversion
bmi1 // Bit Manipulation Instruction Set 1
bmi2 // Bit Manipulation Instruction Set 2
tbm // AMD Trailing Bit Manipulation
lzcnt // LZCNT instruction
popcnt // POPCNT instruction
aesni // Advanced Encryption Standard New Instructions
clmul // Carry-less Multiplication
htt // Hyperthreading (enabled)
hle // Hardware Lock Elision
rtm // Restricted Transactional Memory
rdrand // RDRAND instruction is available
rdseed // RDSEED instruction is available
adx // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
sha // Intel SHA Extensions
avx512f // AVX-512 Foundation
avx512dq // AVX-512 Doubleword and Quadword Instructions
avx512ifma // AVX-512 Integer Fused Multiply-Add Instructions
avx512pf // AVX-512 Prefetch Instructions
avx512er // AVX-512 Exponential and Reciprocal Instructions
avx512cd // AVX-512 Conflict Detection Instructions
avx512bw // AVX-512 Byte and Word Instructions
avx512vl // AVX-512 Vector Length Extensions
avx512vbmi // AVX-512 Vector Bit Manipulation Instructions
mpx // Intel MPX (Memory Protection Extensions)
erms // Enhanced REP MOVSB/STOSB
rdtscp // RDTSCP Instruction
cx16 // CMPXCHG16B Instruction
sgx // Software Guard Extensions
// Performance indicators
sse2slow // SSE2 is supported, but usually not faster
sse3slow // SSE3 is supported, but usually not faster
atom // Atom processor, some SSSE3 instructions are slower
)
var flagNames = map[flags]string{
cmov: "CMOV", // i686 CMOV
nx: "NX", // NX (No-Execute) bit
amd3dnow: "AMD3DNOW", // AMD 3DNOW
amd3dnowext: "AMD3DNOWEXT", // AMD 3DNowExt
mmx: "MMX", // Standard MMX
mmxext: "MMXEXT", // SSE integer functions or AMD MMX ext
sse: "SSE", // SSE functions
sse2: "SSE2", // P4 SSE2 functions
sse3: "SSE3", // Prescott SSE3 functions
ssse3: "SSSE3", // Conroe SSSE3 functions
sse4: "SSE4.1", // Penryn SSE4.1 functions
sse4a: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions
sse42: "SSE4.2", // Nehalem SSE4.2 functions
avx: "AVX", // AVX functions
avx2: "AVX2", // AVX functions
fma3: "FMA3", // Intel FMA 3
fma4: "FMA4", // Bulldozer FMA4 functions
xop: "XOP", // Bulldozer XOP functions
f16c: "F16C", // Half-precision floating-point conversion
bmi1: "BMI1", // Bit Manipulation Instruction Set 1
bmi2: "BMI2", // Bit Manipulation Instruction Set 2
tbm: "TBM", // AMD Trailing Bit Manipulation
lzcnt: "LZCNT", // LZCNT instruction
popcnt: "POPCNT", // POPCNT instruction
aesni: "AESNI", // Advanced Encryption Standard New Instructions
clmul: "CLMUL", // Carry-less Multiplication
htt: "HTT", // Hyperthreading (enabled)
hle: "HLE", // Hardware Lock Elision
rtm: "RTM", // Restricted Transactional Memory
rdrand: "RDRAND", // RDRAND instruction is available
rdseed: "RDSEED", // RDSEED instruction is available
adx: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
sha: "SHA", // Intel SHA Extensions
avx512f: "AVX512F", // AVX-512 Foundation
avx512dq: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions
avx512ifma: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions
avx512pf: "AVX512PF", // AVX-512 Prefetch Instructions
avx512er: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions
avx512cd: "AVX512CD", // AVX-512 Conflict Detection Instructions
avx512bw: "AVX512BW", // AVX-512 Byte and Word Instructions
avx512vl: "AVX512VL", // AVX-512 Vector Length Extensions
avx512vbmi: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions
mpx: "MPX", // Intel MPX (Memory Protection Extensions)
erms: "ERMS", // Enhanced REP MOVSB/STOSB
rdtscp: "RDTSCP", // RDTSCP Instruction
cx16: "CX16", // CMPXCHG16B Instruction
sgx: "SGX", // Software Guard Extensions
// Performance indicators
sse2slow: "SSE2SLOW", // SSE2 supported, but usually not faster
sse3slow: "SSE3SLOW", // SSE3 supported, but usually not faster
atom: "ATOM", // Atom processor, some SSSE3 instructions are slower
}
// CPUInfo contains information about the detected system CPU.
type cpuInfo struct {
brandname string // Brand name reported by the CPU
vendorid vendor // Comparable CPU vendor ID
features flags // Features of the CPU
physicalcores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
threadspercore int // Number of threads per physical core. Will be 1 if undetectable.
logicalcores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
family int // CPU family number
model int // CPU model number
cacheline int // Cache line size in bytes. Will be 0 if undetectable.
cache struct {
l1i int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
l1d int // L1 Data Cache (per core or shared). Will be -1 if undetected
l2 int // L2 Cache (per core or shared). Will be -1 if undetected
l3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
}
sgx sgxsupport
maxFunc uint32
maxExFunc uint32
}
var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
var xgetbv func(index uint32) (eax, edx uint32)
var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
// CPU contains information about the CPU as detected on startup,
// or when Detect last was called.
//
// Use this as the primary entry point to you data,
// this way queries are
var cpu cpuInfo
func init() {
initCPU()
detect()
}
// Detect will re-detect current CPU info.
// This will replace the content of the exported CPU variable.
//
// Unless you expect the CPU to change while you are running your program
// you should not need to call this function.
// If you call this, you must ensure that no other goroutine is accessing the
// exported CPU variable.
func detect() {
cpu.maxFunc = maxFunctionID()
cpu.maxExFunc = maxExtendedFunction()
cpu.brandname = brandName()
cpu.cacheline = cacheLine()
cpu.family, cpu.model = familyModel()
cpu.features = support()
cpu.sgx = hasSGX(cpu.features&sgx != 0)
cpu.threadspercore = threadsPerCore()
cpu.logicalcores = logicalCores()
cpu.physicalcores = physicalCores()
cpu.vendorid = vendorID()
cpu.cacheSize()
}
// Generated here: http://play.golang.org/p/BxFH2Gdc0G
// Cmov indicates support of CMOV instructions
func (c cpuInfo) cmov() bool {
return c.features&cmov != 0
}
// Amd3dnow indicates support of AMD 3DNOW! instructions
func (c cpuInfo) amd3dnow() bool {
return c.features&amd3dnow != 0
}
// Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
func (c cpuInfo) amd3dnowext() bool {
return c.features&amd3dnowext != 0
}
// MMX indicates support of MMX instructions
func (c cpuInfo) mmx() bool {
return c.features&mmx != 0
}
// MMXExt indicates support of MMXEXT instructions
// (SSE integer functions or AMD MMX ext)
func (c cpuInfo) mmxext() bool {
return c.features&mmxext != 0
}
// SSE indicates support of SSE instructions
func (c cpuInfo) sse() bool {
return c.features&sse != 0
}
// SSE2 indicates support of SSE 2 instructions
func (c cpuInfo) sse2() bool {
return c.features&sse2 != 0
}
// SSE3 indicates support of SSE 3 instructions
func (c cpuInfo) sse3() bool {
return c.features&sse3 != 0
}
// SSSE3 indicates support of SSSE 3 instructions
func (c cpuInfo) ssse3() bool {
return c.features&ssse3 != 0
}
// SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
func (c cpuInfo) sse4() bool {
return c.features&sse4 != 0
}
// SSE42 indicates support of SSE4.2 instructions
func (c cpuInfo) sse42() bool {
return c.features&sse42 != 0
}
// AVX indicates support of AVX instructions
// and operating system support of AVX instructions
func (c cpuInfo) avx() bool {
return c.features&avx != 0
}
// AVX2 indicates support of AVX2 instructions
func (c cpuInfo) avx2() bool {
return c.features&avx2 != 0
}
// FMA3 indicates support of FMA3 instructions
func (c cpuInfo) fma3() bool {
return c.features&fma3 != 0
}
// FMA4 indicates support of FMA4 instructions
func (c cpuInfo) fma4() bool {
return c.features&fma4 != 0
}
// XOP indicates support of XOP instructions
func (c cpuInfo) xop() bool {
return c.features&xop != 0
}
// F16C indicates support of F16C instructions
func (c cpuInfo) f16c() bool {
return c.features&f16c != 0
}
// BMI1 indicates support of BMI1 instructions
func (c cpuInfo) bmi1() bool {
return c.features&bmi1 != 0
}
// BMI2 indicates support of BMI2 instructions
func (c cpuInfo) bmi2() bool {
return c.features&bmi2 != 0
}
// TBM indicates support of TBM instructions
// (AMD Trailing Bit Manipulation)
func (c cpuInfo) tbm() bool {
return c.features&tbm != 0
}
// Lzcnt indicates support of LZCNT instruction
func (c cpuInfo) lzcnt() bool {
return c.features&lzcnt != 0
}
// Popcnt indicates support of POPCNT instruction
func (c cpuInfo) popcnt() bool {
return c.features&popcnt != 0
}
// HTT indicates the processor has Hyperthreading enabled
func (c cpuInfo) htt() bool {
return c.features&htt != 0
}
// SSE2Slow indicates that SSE2 may be slow on this processor
func (c cpuInfo) sse2slow() bool {
return c.features&sse2slow != 0
}
// SSE3Slow indicates that SSE3 may be slow on this processor
func (c cpuInfo) sse3slow() bool {
return c.features&sse3slow != 0
}
// AesNi indicates support of AES-NI instructions
// (Advanced Encryption Standard New Instructions)
func (c cpuInfo) aesni() bool {
return c.features&aesni != 0
}
// Clmul indicates support of CLMUL instructions
// (Carry-less Multiplication)
func (c cpuInfo) clmul() bool {
return c.features&clmul != 0
}
// NX indicates support of NX (No-Execute) bit
func (c cpuInfo) nx() bool {
return c.features&nx != 0
}
// SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
func (c cpuInfo) sse4a() bool {
return c.features&sse4a != 0
}
// HLE indicates support of Hardware Lock Elision
func (c cpuInfo) hle() bool {
return c.features&hle != 0
}
// RTM indicates support of Restricted Transactional Memory
func (c cpuInfo) rtm() bool {
return c.features&rtm != 0
}
// Rdrand indicates support of RDRAND instruction is available
func (c cpuInfo) rdrand() bool {
return c.features&rdrand != 0
}
// Rdseed indicates support of RDSEED instruction is available
func (c cpuInfo) rdseed() bool {
return c.features&rdseed != 0
}
// ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
func (c cpuInfo) adx() bool {
return c.features&adx != 0
}
// SHA indicates support of Intel SHA Extensions
func (c cpuInfo) sha() bool {
return c.features&sha != 0
}
// AVX512F indicates support of AVX-512 Foundation
func (c cpuInfo) avx512f() bool {
return c.features&avx512f != 0
}
// AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
func (c cpuInfo) avx512dq() bool {
return c.features&avx512dq != 0
}
// AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
func (c cpuInfo) avx512ifma() bool {
return c.features&avx512ifma != 0
}
// AVX512PF indicates support of AVX-512 Prefetch Instructions
func (c cpuInfo) avx512pf() bool {
return c.features&avx512pf != 0
}
// AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
func (c cpuInfo) avx512er() bool {
return c.features&avx512er != 0
}
// AVX512CD indicates support of AVX-512 Conflict Detection Instructions
func (c cpuInfo) avx512cd() bool {
return c.features&avx512cd != 0
}
// AVX512BW indicates support of AVX-512 Byte and Word Instructions
func (c cpuInfo) avx512bw() bool {
return c.features&avx512bw != 0
}
// AVX512VL indicates support of AVX-512 Vector Length Extensions
func (c cpuInfo) avx512vl() bool {
return c.features&avx512vl != 0
}
// AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
func (c cpuInfo) avx512vbmi() bool {
return c.features&avx512vbmi != 0
}
// MPX indicates support of Intel MPX (Memory Protection Extensions)
func (c cpuInfo) mpx() bool {
return c.features&mpx != 0
}
// ERMS indicates support of Enhanced REP MOVSB/STOSB
func (c cpuInfo) erms() bool {
return c.features&erms != 0
}
// RDTSCP Instruction is available.
func (c cpuInfo) rdtscp() bool {
return c.features&rdtscp != 0
}
// CX16 indicates if CMPXCHG16B instruction is available.
func (c cpuInfo) cx16() bool {
return c.features&cx16 != 0
}
// TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
// So TSX simply checks that.
func (c cpuInfo) tsx() bool {
return c.features&(mpx|rtm) == mpx|rtm
}
// Atom indicates an Atom processor
func (c cpuInfo) atom() bool {
return c.features&atom != 0
}
// Intel returns true if vendor is recognized as Intel
func (c cpuInfo) intel() bool {
return c.vendorid == intel
}
// AMD returns true if vendor is recognized as AMD
func (c cpuInfo) amd() bool {
return c.vendorid == amd
}
// Transmeta returns true if vendor is recognized as Transmeta
func (c cpuInfo) transmeta() bool {
return c.vendorid == transmeta
}
// NSC returns true if vendor is recognized as National Semiconductor
func (c cpuInfo) nsc() bool {
return c.vendorid == nsc
}
// VIA returns true if vendor is recognized as VIA
func (c cpuInfo) via() bool {
return c.vendorid == via
}
// RTCounter returns the 64-bit time-stamp counter
// Uses the RDTSCP instruction. The value 0 is returned
// if the CPU does not support the instruction.
func (c cpuInfo) rtcounter() uint64 {
if !c.rdtscp() {
return 0
}
a, _, _, d := rdtscpAsm()
return uint64(a) | (uint64(d) << 32)
}
// Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
// This variable is OS dependent, but on Linux contains information
// about the current cpu/core the code is running on.
// If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
func (c cpuInfo) ia32tscaux() uint32 {
if !c.rdtscp() {
return 0
}
_, _, ecx, _ := rdtscpAsm()
return ecx
}
// LogicalCPU will return the Logical CPU the code is currently executing on.
// This is likely to change when the OS re-schedules the running thread
// to another CPU.
// If the current core cannot be detected, -1 will be returned.
func (c cpuInfo) logicalcpu() int {
if c.maxFunc < 1 {
return -1
}
_, ebx, _, _ := cpuid(1)
return int(ebx >> 24)
}
// VM Will return true if the cpu id indicates we are in
// a virtual machine. This is only a hint, and will very likely
// have many false negatives.
func (c cpuInfo) vm() bool {
switch c.vendorid {
case msvm, kvm, vmware, xenhvm:
return true
}
return false
}
// Flags contains detected cpu features and caracteristics
type flags uint64
// String returns a string representation of the detected
// CPU features.
func (f flags) String() string {
return strings.Join(f.strings(), ",")
}
// Strings returns and array of the detected features.
func (f flags) strings() []string {
s := support()
r := make([]string, 0, 20)
for i := uint(0); i < 64; i++ {
key := flags(1 << i)
val := flagNames[key]
if s&key != 0 {
r = append(r, val)
}
}
return r
}
func maxExtendedFunction() uint32 {
eax, _, _, _ := cpuid(0x80000000)
return eax
}
func maxFunctionID() uint32 {
a, _, _, _ := cpuid(0)
return a
}
func brandName() string {
if maxExtendedFunction() >= 0x80000004 {
v := make([]uint32, 0, 48)
for i := uint32(0); i < 3; i++ {
a, b, c, d := cpuid(0x80000002 + i)
v = append(v, a, b, c, d)
}
return strings.Trim(string(valAsString(v...)), " ")
}
return "unknown"
}
func threadsPerCore() int {
mfi := maxFunctionID()
if mfi < 0x4 || vendorID() != intel {
return 1
}
if mfi < 0xb {
_, b, _, d := cpuid(1)
if (d & (1 << 28)) != 0 {
// v will contain logical core count
v := (b >> 16) & 255
if v > 1 {
a4, _, _, _ := cpuid(4)
// physical cores
v2 := (a4 >> 26) + 1
if v2 > 0 {
return int(v) / int(v2)
}
}
}
return 1
}
_, b, _, _ := cpuidex(0xb, 0)
if b&0xffff == 0 {
return 1
}
return int(b & 0xffff)
}
func logicalCores() int {
mfi := maxFunctionID()
switch vendorID() {
case intel:
// Use this on old Intel processors
if mfi < 0xb {
if mfi < 1 {
return 0
}
// CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
// that can be assigned to logical processors in a physical package.
// The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
_, ebx, _, _ := cpuid(1)
logical := (ebx >> 16) & 0xff
return int(logical)
}
_, b, _, _ := cpuidex(0xb, 1)
return int(b & 0xffff)
case amd:
_, b, _, _ := cpuid(1)
return int((b >> 16) & 0xff)
default:
return 0
}
}
func familyModel() (int, int) {
if maxFunctionID() < 0x1 {
return 0, 0
}
eax, _, _, _ := cpuid(1)
family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
return int(family), int(model)
}
func physicalCores() int {
switch vendorID() {
case intel:
return logicalCores() / threadsPerCore()
case amd:
if maxExtendedFunction() >= 0x80000008 {
_, _, c, _ := cpuid(0x80000008)
return int(c&0xff) + 1
}
}
return 0
}
// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
var vendorMapping = map[string]vendor{
"AMDisbetter!": amd,
"AuthenticAMD": amd,
"CentaurHauls": via,
"GenuineIntel": intel,
"TransmetaCPU": transmeta,
"GenuineTMx86": transmeta,
"Geode by NSC": nsc,
"VIA VIA VIA ": via,
"KVMKVMKVMKVM": kvm,
"Microsoft Hv": msvm,
"VMwareVMware": vmware,
"XenVMMXenVMM": xenhvm,
}
func vendorID() vendor {
_, b, c, d := cpuid(0)
v := valAsString(b, d, c)
vend, ok := vendorMapping[string(v)]
if !ok {
return other
}
return vend
}
func cacheLine() int {
if maxFunctionID() < 0x1 {
return 0
}
_, ebx, _, _ := cpuid(1)
cache := (ebx & 0xff00) >> 5 // cflush size
if cache == 0 && maxExtendedFunction() >= 0x80000006 {
_, _, ecx, _ := cpuid(0x80000006)
cache = ecx & 0xff // cacheline size
}
// TODO: Read from Cache and TLB Information
return int(cache)
}
func (c *cpuInfo) cacheSize() {
c.cache.l1d = -1
c.cache.l1i = -1
c.cache.l2 = -1
c.cache.l3 = -1
vendor := vendorID()
switch vendor {
case intel:
if maxFunctionID() < 4 {
return
}
for i := uint32(0); ; i++ {
eax, ebx, ecx, _ := cpuidex(4, i)
cacheType := eax & 15
if cacheType == 0 {
break
}
cacheLevel := (eax >> 5) & 7
coherency := int(ebx&0xfff) + 1
partitions := int((ebx>>12)&0x3ff) + 1
associativity := int((ebx>>22)&0x3ff) + 1
sets := int(ecx) + 1
size := associativity * partitions * coherency * sets
switch cacheLevel {
case 1:
if cacheType == 1 {
// 1 = Data Cache
c.cache.l1d = size
} else if cacheType == 2 {
// 2 = Instruction Cache
c.cache.l1i = size
} else {
if c.cache.l1d < 0 {
c.cache.l1i = size
}
if c.cache.l1i < 0 {
c.cache.l1i = size
}
}
case 2:
c.cache.l2 = size
case 3:
c.cache.l3 = size
}
}
case amd:
// Untested.
if maxExtendedFunction() < 0x80000005 {
return
}
_, _, ecx, edx := cpuid(0x80000005)
c.cache.l1d = int(((ecx >> 24) & 0xFF) * 1024)
c.cache.l1i = int(((edx >> 24) & 0xFF) * 1024)
if maxExtendedFunction() < 0x80000006 {
return
}
_, _, ecx, _ = cpuid(0x80000006)
c.cache.l2 = int(((ecx >> 16) & 0xFFFF) * 1024)
}
return
}
type sgxsupport struct {
available bool
sgx1supported bool
sgx2supported bool
maxenclavesizenot64 int64
maxenclavesize64 int64
}
func hasSGX(available bool) (rval sgxsupport) {
rval.available = available
if !available {
return
}
a, _, _, d := cpuidex(0x12, 0)
rval.sgx1supported = a&0x01 != 0
rval.sgx2supported = a&0x02 != 0
rval.maxenclavesizenot64 = 1 << (d & 0xFF) // pow 2
rval.maxenclavesize64 = 1 << ((d >> 8) & 0xFF) // pow 2
return
}
func support() flags {
mfi := maxFunctionID()
vend := vendorID()
if mfi < 0x1 {
return 0
}
rval := uint64(0)
_, _, c, d := cpuid(1)
if (d & (1 << 15)) != 0 {
rval |= cmov
}
if (d & (1 << 23)) != 0 {
rval |= mmx
}
if (d & (1 << 25)) != 0 {
rval |= mmxext
}
if (d & (1 << 25)) != 0 {
rval |= sse
}
if (d & (1 << 26)) != 0 {
rval |= sse2
}
if (c & 1) != 0 {
rval |= sse3
}
if (c & 0x00000200) != 0 {
rval |= ssse3
}
if (c & 0x00080000) != 0 {
rval |= sse4
}
if (c & 0x00100000) != 0 {
rval |= sse42
}
if (c & (1 << 25)) != 0 {
rval |= aesni
}
if (c & (1 << 1)) != 0 {
rval |= clmul
}
if c&(1<<23) != 0 {
rval |= popcnt
}
if c&(1<<30) != 0 {
rval |= rdrand
}
if c&(1<<29) != 0 {
rval |= f16c
}
if c&(1<<13) != 0 {
rval |= cx16
}
if vend == intel && (d&(1<<28)) != 0 && mfi >= 4 {
if threadsPerCore() > 1 {
rval |= htt
}
}
// Check XGETBV, OXSAVE and AVX bits
if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
// Check for OS support
eax, _ := xgetbv(0)
if (eax & 0x6) == 0x6 {
rval |= avx
if (c & 0x00001000) != 0 {
rval |= fma3
}
}
}
// Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
if mfi >= 7 {
_, ebx, ecx, _ := cpuidex(7, 0)
if (rval&avx) != 0 && (ebx&0x00000020) != 0 {
rval |= avx2
}
if (ebx & 0x00000008) != 0 {
rval |= bmi1
if (ebx & 0x00000100) != 0 {
rval |= bmi2
}
}
if ebx&(1<<2) != 0 {
rval |= sgx
}
if ebx&(1<<4) != 0 {
rval |= hle
}
if ebx&(1<<9) != 0 {
rval |= erms
}
if ebx&(1<<11) != 0 {
rval |= rtm
}
if ebx&(1<<14) != 0 {
rval |= mpx
}
if ebx&(1<<18) != 0 {
rval |= rdseed
}
if ebx&(1<<19) != 0 {
rval |= adx
}
if ebx&(1<<29) != 0 {
rval |= sha
}
// Only detect AVX-512 features if XGETBV is supported
if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
// Check for OS support
eax, _ := xgetbv(0)
// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
// ZMM16-ZMM31 state are enabled by OS)
/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
if ebx&(1<<16) != 0 {
rval |= avx512f
}
if ebx&(1<<17) != 0 {
rval |= avx512dq
}
if ebx&(1<<21) != 0 {
rval |= avx512ifma
}
if ebx&(1<<26) != 0 {
rval |= avx512pf
}
if ebx&(1<<27) != 0 {
rval |= avx512er
}
if ebx&(1<<28) != 0 {
rval |= avx512cd
}
if ebx&(1<<30) != 0 {
rval |= avx512bw
}
if ebx&(1<<31) != 0 {
rval |= avx512vl
}
// ecx
if ecx&(1<<1) != 0 {
rval |= avx512vbmi
}
}
}
}
if maxExtendedFunction() >= 0x80000001 {
_, _, c, d := cpuid(0x80000001)
if (c & (1 << 5)) != 0 {
rval |= lzcnt
rval |= popcnt
}
if (d & (1 << 31)) != 0 {
rval |= amd3dnow
}
if (d & (1 << 30)) != 0 {
rval |= amd3dnowext
}
if (d & (1 << 23)) != 0 {
rval |= mmx
}
if (d & (1 << 22)) != 0 {
rval |= mmxext
}
if (c & (1 << 6)) != 0 {
rval |= sse4a
}
if d&(1<<20) != 0 {
rval |= nx
}
if d&(1<<27) != 0 {
rval |= rdtscp
}
/* Allow for selectively disabling SSE2 functions on AMD processors
with SSE2 support but not SSE4a. This includes Athlon64, some
Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
than SSE2 often enough to utilize this special-case flag.
AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
so that SSE2 is used unless explicitly disabled by checking
AV_CPU_FLAG_SSE2SLOW. */
if vendorID() != intel &&
rval&sse2 != 0 && (c&0x00000040) == 0 {
rval |= sse2slow
}
/* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
* used unless the OS has AVX support. */
if (rval & avx) != 0 {
if (c & 0x00000800) != 0 {
rval |= xop
}
if (c & 0x00010000) != 0 {
rval |= fma4
}
}
if vendorID() == intel {
family, model := familyModel()
if family == 6 && (model == 9 || model == 13 || model == 14) {
/* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
* 6/14 (core1 "yonah") theoretically support sse2, but it's
* usually slower than mmx. */
if (rval & sse2) != 0 {
rval |= sse2slow
}
if (rval & sse3) != 0 {
rval |= sse3slow
}
}
/* The Atom processor has SSSE3 support, which is useful in many cases,
* but sometimes the SSSE3 version is slower than the SSE2 equivalent
* on the Atom, but is generally faster on other processors supporting
* SSSE3. This flag allows for selectively disabling certain SSSE3
* functions on the Atom. */
if family == 6 && model == 28 {
rval |= atom
}
}
}
return flags(rval)
}
func valAsString(values ...uint32) []byte {
r := make([]byte, 4*len(values))
for i, v := range values {
dst := r[i*4:]
dst[0] = byte(v & 0xff)
dst[1] = byte((v >> 8) & 0xff)
dst[2] = byte((v >> 16) & 0xff)
dst[3] = byte((v >> 24) & 0xff)
switch {
case dst[0] == 0:
return r[:i*4]
case dst[1] == 0:
return r[:i*4+1]
case dst[2] == 0:
return r[:i*4+2]
case dst[3] == 0:
return r[:i*4+3]
}
}
return r
}
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build 386,!gccgo
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuid(SB), 7, $0
XORL CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+4(FP)
MOVL BX, ebx+8(FP)
MOVL CX, ecx+12(FP)
MOVL DX, edx+16(FP)
RET
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv(index uint32) (eax, edx uint32)
TEXT ·asmXgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+4(FP)
MOVL DX, edx+8(FP)
RET
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
TEXT ·asmRdtscpAsm(SB), 7, $0
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
MOVL AX, eax+0(FP)
MOVL BX, ebx+4(FP)
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build amd64,!gccgo
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuid(SB), 7, $0
XORQ CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func asmXgetbv(index uint32) (eax, edx uint32)
TEXT ·asmXgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+8(FP)
MOVL DX, edx+12(FP)
RET
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
TEXT ·asmRdtscpAsm(SB), 7, $0
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
MOVL AX, eax+0(FP)
MOVL BX, ebx+4(FP)
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build 386,!gccgo amd64,!gccgo
package cpuid
func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
func asmXgetbv(index uint32) (eax, edx uint32)
func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
func initCPU() {
cpuid = asmCpuid
cpuidex = asmCpuidex
xgetbv = asmXgetbv
rdtscpAsm = asmRdtscpAsm
}
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
// +build !amd64,!386 gccgo
package cpuid
func initCPU() {
cpuid = func(op uint32) (eax, ebx, ecx, edx uint32) {
return 0, 0, 0, 0
}
cpuidex = func(op, op2 uint32) (eax, ebx, ecx, edx uint32) {
return 0, 0, 0, 0
}
xgetbv = func(index uint32) (eax, edx uint32) {
return 0, 0
}
rdtscpAsm = func() (eax, ebx, ecx, edx uint32) {
return 0, 0, 0, 0
}
}
...@@ -117,6 +117,14 @@ ...@@ -117,6 +117,14 @@
"path": "/basic", "path": "/basic",
"notests": true "notests": true
}, },
{
"importpath": "github.com/klauspost/cpuid",
"repository": "https://github.com/klauspost/cpuid",
"vcs": "git",
"revision": "ae832f27941af41db13bd6d8efd2493e3b22415a",
"branch": "master",
"notests": true
},
{ {
"importpath": "github.com/lucas-clemente/aes12", "importpath": "github.com/lucas-clemente/aes12",
"repository": "https://github.com/lucas-clemente/aes12", "repository": "https://github.com/lucas-clemente/aes12",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment