1 /** 2 * This is a quick change of core.cpuid, to be usable in programs without a runtime. 3 * 4 * Copyright: Copyright Don Clugston 2007 - 2009. 5 * Copyright Auburn Sounds 2017. 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Authors: Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk> 8 * Source: $(DRUNTIMESRC core/_cpuid.d) 9 */ 10 module dplug.core.cpuid; 11 12 @trusted: 13 nothrow: 14 @nogc: 15 16 // If optimizing for a particular processor, it is generally better 17 // to identify based on features rather than model. NOTE: Normally 18 // it's only worthwhile to optimise for the latest Intel and AMD CPU, 19 // with a backup for other CPUs. 20 // Pentium -- preferPentium1() 21 // PMMX -- + mmx() 22 // PPro -- default 23 // PII -- + mmx() 24 // PIII -- + mmx() + sse() 25 // PentiumM -- + mmx() + sse() + sse2() 26 // Pentium4 -- preferPentium4() 27 // PentiumD -- + isX86_64() 28 // Core2 -- default + isX86_64() 29 // AMD K5 -- preferPentium1() 30 // AMD K6 -- + mmx() 31 // AMD K6-II -- + mmx() + 3dnow() 32 // AMD K7 -- preferAthlon() 33 // AMD K8 -- + sse2() 34 // AMD K10 -- + isX86_64() 35 // Cyrix 6x86 -- preferPentium1() 36 // 6x86MX -- + mmx() 37 version(D_InlineAsm_X86) 38 { 39 version = InlineAsm_X86_Any; 40 } 41 else version(D_InlineAsm_X86_64) 42 { 43 version = InlineAsm_X86_Any; 44 } 45 46 public: 47 48 /// Cache size and behaviour 49 struct CacheInfo 50 { 51 /// Size of the cache, in kilobytes, per CPU. 52 /// For L1 unified (data + code) caches, this size is half the physical size. 53 /// (we don't halve it for larger sizes, since normally 54 /// data size is much greater than code size for critical loops). 55 size_t size; 56 /// Number of ways of associativity, eg: 57 /// $(UL 58 /// $(LI 1 = direct mapped) 59 /// $(LI 2 = 2-way set associative) 60 /// $(LI 3 = 3-way set associative) 61 /// $(LI ubyte.max = fully associative) 62 /// ) 63 ubyte associativity; 64 /// Number of bytes read into the cache when a cache miss occurs. 65 uint lineSize; 66 } 67 68 public: 69 /// $(RED Scheduled for deprecation. Please use $(D dataCaches) instead.) 70 // Note: When we deprecate it, we simply make it private. 71 __gshared CacheInfo[5] datacache; 72 73 @property 74 { 75 /// The data caches. If there are fewer than 5 physical caches levels, 76 /// the remaining levels are set to size_t.max (== entire memory space) 77 const(CacheInfo)[5] dataCaches() { return _dataCaches; } 78 79 /// Returns vendor string, for display purposes only. 80 /// Do NOT use this to determine features! 81 /// Note that some CPUs have programmable vendorIDs. 82 string vendor() {return _vendor;} 83 /// Returns processor string, for display purposes only 84 string processor() {return _processor;} 85 86 /// Does it have an x87 FPU on-chip? 87 bool x87onChip() {return _x87onChip;} 88 /// Is MMX supported? 89 bool mmx() {return _mmx;} 90 /// Is SSE supported? 91 bool sse() {return _sse;} 92 /// Is SSE2 supported? 93 bool sse2() {return _sse2;} 94 /// Is SSE3 supported? 95 bool sse3() {return _sse3;} 96 /// Is SSSE3 supported? 97 bool ssse3() {return _ssse3;} 98 /// Is SSE4.1 supported? 99 bool sse41() {return _sse41;} 100 /// Is SSE4.2 supported? 101 bool sse42() {return _sse42;} 102 /// Is SSE4a supported? 103 bool sse4a() {return _sse4a;} 104 /// Is AES supported 105 bool aes() {return _aes;} 106 /// Is pclmulqdq supported 107 bool hasPclmulqdq() {return _hasPclmulqdq;} 108 /// Is rdrand supported 109 bool hasRdrand() {return _hasRdrand;} 110 /// Is AVX supported 111 bool avx() {return _avx;} 112 /// Is VEX-Encoded AES supported 113 bool vaes() {return _vaes;} 114 /// Is vpclmulqdq supported 115 bool hasVpclmulqdq(){return _hasVpclmulqdq; } 116 /// Is FMA supported 117 bool fma() {return _fma;} 118 /// Is FP16C supported 119 bool fp16c() {return _fp16c;} 120 /// Is AVX2 supported 121 bool avx2() {return _avx2;} 122 /// Is HLE (hardware lock elision) supported 123 bool hle() {return _hle;} 124 /// Is RTM (restricted transactional memory) supported 125 bool rtm() {return _rtm;} 126 /// Is rdseed supported 127 bool hasRdseed() {return _hasRdseed;} 128 /// Is SHA supported 129 bool hasSha() {return _hasSha;} 130 /// Is AMD 3DNOW supported? 131 bool amd3dnow() {return _amd3dnow;} 132 /// Is AMD 3DNOW Ext supported? 133 bool amd3dnowExt() {return _amd3dnowExt;} 134 /// Are AMD extensions to MMX supported? 135 bool amdMmx() {return _amdMmx;} 136 /// Is fxsave/fxrstor supported? 137 bool hasFxsr() {return _hasFxsr;} 138 /// Is cmov supported? 139 bool hasCmov() {return _hasCmov;} 140 /// Is rdtsc supported? 141 bool hasRdtsc() {return _hasRdtsc;} 142 /// Is cmpxchg8b supported? 143 bool hasCmpxchg8b() {return _hasCmpxchg8b;} 144 /// Is cmpxchg8b supported? 145 bool hasCmpxchg16b() {return _hasCmpxchg16b;} 146 /// Is SYSENTER/SYSEXIT supported? 147 bool hasSysEnterSysExit() {return _hasSysEnterSysExit;} 148 /// Is 3DNow prefetch supported? 149 bool has3dnowPrefetch() {return _has3dnowPrefetch;} 150 /// Are LAHF and SAHF supported in 64-bit mode? 151 bool hasLahfSahf() {return _hasLahfSahf;} 152 /// Is POPCNT supported? 153 bool hasPopcnt() {return _hasPopcnt;} 154 /// Is LZCNT supported? 155 bool hasLzcnt() {return _hasLzcnt;} 156 /// Is this an Intel64 or AMD 64? 157 bool isX86_64() {return _isX86_64;} 158 159 /// Is this an IA64 (Itanium) processor? 160 bool isItanium() { return _isItanium; } 161 162 /// Is hyperthreading supported? 163 bool hyperThreading() { return _hyperThreading; } 164 /// Returns number of threads per CPU 165 uint threadsPerCPU() {return _threadsPerCPU;} 166 /// Returns number of cores in CPU 167 uint coresPerCPU() {return _coresPerCPU;} 168 169 /// Optimisation hints for assembly code. 170 /// 171 /// For forward compatibility, the CPU is compared against different 172 /// microarchitectures. For 32-bit x86, comparisons are made against 173 /// the Intel PPro/PII/PIII/PM family. 174 /// 175 /// The major 32-bit x86 microarchitecture 'dynasties' have been: 176 /// 177 /// $(UL 178 /// $(LI Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). ) 179 /// $(LI AMD Athlon (K7, K8, K10). ) 180 /// $(LI Intel NetBurst (Pentium 4, Pentium D). ) 181 /// $(LI In-order Pentium (Pentium1, PMMX, Atom) ) 182 /// ) 183 /// 184 /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta, 185 /// Cyrix, Rise) were mostly in-order. 186 /// 187 /// Some new processors do not fit into the existing categories: 188 /// 189 /// $(UL 190 /// $(LI Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. ) 191 /// $(LI Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. ) 192 /// ) 193 /// 194 /// Within each dynasty, the optimisation techniques are largely 195 /// identical (eg, use instruction pairing for group 4). Major 196 /// instruction set improvements occur within each dynasty. 197 198 /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code? 199 bool preferAthlon() { return _preferAthlon; } 200 /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code? 201 bool preferPentium4() { return _preferPentium4; } 202 /// Does this CPU perform better on Pentium I code than Pentium Pro code? 203 bool preferPentium1() { return _preferPentium1; } 204 } 205 206 private __gshared 207 { 208 /* These exist as immutables so that the query property functions can 209 * be backwards compatible with code that called them with (). 210 * Also, immutables can only be set by the static this(). 211 */ 212 CacheInfo[5] _dataCaches; 213 string _vendor; 214 string _processor; 215 bool _x87onChip; 216 bool _mmx; 217 bool _sse; 218 bool _sse2; 219 bool _sse3; 220 bool _ssse3; 221 bool _sse41; 222 bool _sse42; 223 bool _sse4a; 224 bool _aes; 225 bool _hasPclmulqdq; 226 bool _hasRdrand; 227 bool _avx; 228 bool _vaes; 229 bool _hasVpclmulqdq; 230 bool _fma; 231 bool _fp16c; 232 bool _avx2; 233 bool _hle; 234 bool _rtm; 235 bool _hasRdseed; 236 bool _hasSha; 237 bool _amd3dnow; 238 bool _amd3dnowExt; 239 bool _amdMmx; 240 bool _hasFxsr; 241 bool _hasCmov; 242 bool _hasRdtsc; 243 bool _hasCmpxchg8b; 244 bool _hasCmpxchg16b; 245 bool _hasSysEnterSysExit; 246 bool _has3dnowPrefetch; 247 bool _hasLahfSahf; 248 bool _hasPopcnt; 249 bool _hasLzcnt; 250 bool _isX86_64; 251 bool _isItanium; 252 bool _hyperThreading; 253 uint _threadsPerCPU; 254 uint _coresPerCPU; 255 bool _preferAthlon; 256 bool _preferPentium4; 257 bool _preferPentium1; 258 } 259 260 __gshared: 261 // All these values are set only once, and never subsequently modified. 262 public: 263 /// $(RED Warning: This field will be turned into a property in a future release.) 264 /// 265 /// Processor type (vendor-dependent). 266 /// This should be visible ONLY for display purposes. 267 uint stepping, model, family; 268 /// $(RED This field has been deprecated. Please use $(D cacheLevels) instead.) 269 uint numCacheLevels = 1; 270 /// The number of cache levels in the CPU. 271 @property uint cacheLevels() { return numCacheLevels; } 272 private: 273 274 struct CpuFeatures 275 { 276 bool probablyIntel; // true = _probably_ an Intel processor, might be faking 277 bool probablyAMD; // true = _probably_ an AMD processor 278 string processorName; 279 char [12] vendorID; 280 char [48] processorNameBuffer; 281 uint features = 0; // mmx, sse, sse2, hyperthreading, etc 282 uint miscfeatures = 0; // sse3, etc. 283 uint extfeatures = 0; // HLE, AVX2, RTM, etc. 284 uint amdfeatures = 0; // 3DNow!, mmxext, etc 285 uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc 286 ulong xfeatures = 0; // XFEATURES_ENABLED_MASK 287 uint maxCores = 1; 288 uint maxThreads = 1; 289 } 290 291 __gshared CpuFeatures cpuFeatures; 292 293 /* Hide from the optimizer where cf (a register) is coming from, so that 294 * cf doesn't get "optimized away". The idea is to reference 295 * the global data through cf so not so many fixups are inserted 296 * into the executable image. 297 */ 298 CpuFeatures* getCpuFeatures() @nogc nothrow 299 { 300 pragma(inline, false); 301 return &cpuFeatures; 302 } 303 304 // Note that this may indicate multi-core rather than hyperthreading. 305 @property bool hyperThreadingBit() { return (cpuFeatures.features&HTT_BIT)!=0;} 306 307 // feature flags CPUID1_EDX 308 enum : uint 309 { 310 FPU_BIT = 1, 311 TIMESTAMP_BIT = 1<<4, // rdtsc 312 MDSR_BIT = 1<<5, // RDMSR/WRMSR 313 CMPXCHG8B_BIT = 1<<8, 314 SYSENTERSYSEXIT_BIT = 1<<11, 315 CMOV_BIT = 1<<15, 316 MMX_BIT = 1<<23, 317 FXSR_BIT = 1<<24, 318 SSE_BIT = 1<<25, 319 SSE2_BIT = 1<<26, 320 HTT_BIT = 1<<28, 321 IA64_BIT = 1<<30 322 } 323 // feature flags misc CPUID1_ECX 324 enum : uint 325 { 326 SSE3_BIT = 1, 327 PCLMULQDQ_BIT = 1<<1, // from AVX 328 MWAIT_BIT = 1<<3, 329 SSSE3_BIT = 1<<9, 330 FMA_BIT = 1<<12, // from AVX 331 CMPXCHG16B_BIT = 1<<13, 332 SSE41_BIT = 1<<19, 333 SSE42_BIT = 1<<20, 334 POPCNT_BIT = 1<<23, 335 AES_BIT = 1<<25, // AES instructions from AVX 336 OSXSAVE_BIT = 1<<27, // Used for AVX 337 AVX_BIT = 1<<28, 338 FP16C_BIT = 1<<29, 339 RDRAND_BIT = 1<<30, 340 } 341 // Feature flags for cpuid.{EAX = 7, ECX = 0}.EBX. 342 enum : uint 343 { 344 FSGSBASE_BIT = 1 << 0, 345 BMI1_BIT = 1 << 3, 346 HLE_BIT = 1 << 4, 347 AVX2_BIT = 1 << 5, 348 SMEP_BIT = 1 << 7, 349 BMI2_BIT = 1 << 8, 350 ERMS_BIT = 1 << 9, 351 INVPCID_BIT = 1 << 10, 352 RTM_BIT = 1 << 11, 353 RDSEED_BIT = 1 << 18, 354 SHA_BIT = 1 << 29, 355 } 356 // feature flags XFEATURES_ENABLED_MASK 357 enum : ulong 358 { 359 XF_FP_BIT = 0x1, 360 XF_SSE_BIT = 0x2, 361 XF_YMM_BIT = 0x4, 362 } 363 // AMD feature flags CPUID80000001_EDX 364 enum : uint 365 { 366 AMD_MMX_BIT = 1<<22, 367 // FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions. 368 FFXSR_BIT = 1<<25, 369 PAGE1GB_BIT = 1<<26, // support for 1GB pages 370 RDTSCP_BIT = 1<<27, 371 AMD64_BIT = 1<<29, 372 AMD_3DNOW_EXT_BIT = 1<<30, 373 AMD_3DNOW_BIT = 1<<31 374 } 375 // AMD misc feature flags CPUID80000001_ECX 376 enum : uint 377 { 378 LAHFSAHF_BIT = 1, 379 LZCNT_BIT = 1<<5, 380 SSE4A_BIT = 1<<6, 381 AMD_3DNOW_PREFETCH_BIT = 1<<8, 382 } 383 384 385 version(InlineAsm_X86_Any) { 386 // Note that this code will also work for Itanium in x86 mode. 387 388 __gshared uint max_cpuid, max_extended_cpuid; 389 390 // CPUID2: "cache and tlb information" 391 void getcacheinfoCPUID2() 392 { 393 // We are only interested in the data caches 394 void decipherCpuid2(ubyte x) @nogc nothrow { 395 if (x==0) return; 396 // Values from http://www.sandpile.org/ia32/cpuid.htm. 397 // Includes Itanium and non-Intel CPUs. 398 // 399 static immutable ubyte [63] ids = [ 400 0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68, 401 // level 2 cache 402 0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F, 403 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E, 404 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81, 405 // level 3 cache 406 0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D, 407 408 0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE, 409 0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC 410 ]; 411 static immutable uint [63] sizes = [ 412 8, 16, 16, 64, 16, 24, 8, 16, 32, 413 128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512, 414 256, 512, 1024, 2048, 512, 1024, 4096, 6*1024, 415 128, 192, 128, 256, 384, 512, 3072, 512, 128, 416 512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024, 417 418 512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024, 419 2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024 420 ]; 421 // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative 422 static immutable ubyte [63] ways = [ 423 2, 4, 4, 8, 8, 6, 4, 4, 4, 424 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2, 425 8, 8, 8, 8, 4, 8, 16, 24, 426 4, 6, 2, 4, 6, 4, 12, 8, 8, 427 4, 8, 8, 8, 4, 8, 12, 16, 12, 16, 428 4, 4, 4, 8, 8, 8, 12, 12, 12, 429 16, 16, 16, 24, 24, 24 430 ]; 431 enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 } 432 for (size_t i=0; i< ids.length; ++i) { 433 if (x==ids[i]) { 434 int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2; 435 if (x==0x49 && family==0xF && model==0x6) level=2; 436 datacache[level].size=sizes[i]; 437 datacache[level].associativity=ways[i]; 438 if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80) 439 || x==0x86 || x==0x87 440 || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){ 441 datacache[level].lineSize = 64; 442 } else datacache[level].lineSize = 32; 443 } 444 } 445 } 446 447 uint[4] a; 448 bool firstTime = true; 449 // On a multi-core system, this could theoretically fail, but it's only used 450 // for old single-core CPUs. 451 uint numinfos = 1; 452 do { 453 asm pure nothrow @nogc { 454 mov EAX, 2; 455 cpuid; 456 mov a, EAX; 457 mov a+4, EBX; 458 mov a+8, ECX; 459 mov a+12, EDX; 460 } 461 if (firstTime) { 462 if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) { 463 // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080. 464 // These are NOT standard Intel values 465 // (TLB = 32 entry, 4 way associative, 4K pages) 466 // (L1 cache = 16K, 4way, linesize16) 467 datacache[0].size=8; 468 datacache[0].associativity=4; 469 datacache[0].lineSize=16; 470 return; 471 } 472 // lsb of a is how many times to loop. 473 numinfos = a[0] & 0xFF; 474 // and otherwise it should be ignored 475 a[0] &= 0xFFFF_FF00; 476 firstTime = false; 477 } 478 for (int c=0; c<4;++c) { 479 // high bit set == no info. 480 if (a[c] & 0x8000_0000) continue; 481 decipherCpuid2(cast(ubyte)(a[c] & 0xFF)); 482 decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF)); 483 decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF)); 484 decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF)); 485 } 486 } while (--numinfos); 487 } 488 489 // CPUID4: "Deterministic cache parameters" leaf 490 void getcacheinfoCPUID4() 491 { 492 int cachenum = 0; 493 for(;;) { 494 uint a, b, number_of_sets; 495 asm pure nothrow @nogc { 496 mov EAX, 4; 497 mov ECX, cachenum; 498 cpuid; 499 mov a, EAX; 500 mov b, EBX; 501 mov number_of_sets, ECX; 502 } 503 ++cachenum; 504 if ((a&0x1F)==0) break; // no more caches 505 uint numthreads = ((a>>14) & 0xFFF) + 1; 506 uint numcores = ((a>>26) & 0x3F) + 1; 507 if (numcores > cpuFeatures.maxCores) cpuFeatures.maxCores = numcores; 508 if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches 509 510 ++number_of_sets; 511 ubyte level = cast(ubyte)(((a>>5)&7)-1); 512 if (level > datacache.length) continue; // ignore deep caches 513 datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1); 514 datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size 515 uint line_partitions = ((b >> 12)& 0x3FF) + 1; 516 // Size = number of sets * associativity * cachelinesize * linepartitions 517 // and must convert to Kb, also dividing by the number of hyperthreads using this cache. 518 ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets * 519 datacache[level].associativity : number_of_sets; 520 datacache[level].size = cast(uint)( 521 (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024)); 522 if (level == 0 && (a&0xF)==3) { 523 // Halve the size for unified L1 caches 524 datacache[level].size/=2; 525 } 526 } 527 } 528 529 // CPUID8000_0005 & 6 530 void getAMDcacheinfo() 531 { 532 uint c5, c6, d6; 533 asm pure nothrow @nogc { 534 mov EAX, 0x8000_0005; // L1 cache 535 cpuid; 536 // EAX has L1_TLB_4M. 537 // EBX has L1_TLB_4K 538 // EDX has L1 instruction cache 539 mov c5, ECX; 540 } 541 542 datacache[0].size = ( (c5>>24) & 0xFF); 543 datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF); 544 datacache[0].lineSize = c5 & 0xFF; 545 546 if (max_extended_cpuid >= 0x8000_0006) { 547 // AMD K6-III or K6-2+ or later. 548 ubyte numcores = 1; 549 if (max_extended_cpuid >=0x8000_0008) { 550 asm pure nothrow @nogc { 551 mov EAX, 0x8000_0008; 552 cpuid; 553 mov numcores, CL; 554 } 555 ++numcores; 556 if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores; 557 } 558 asm pure nothrow @nogc { 559 mov EAX, 0x8000_0006; // L2/L3 cache 560 cpuid; 561 mov c6, ECX; // L2 cache info 562 mov d6, EDX; // L3 cache info 563 } 564 565 static immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ]; 566 datacache[1].size = (c6>>16) & 0xFFFF; 567 datacache[1].associativity = assocmap[(c6>>12)&0xF]; 568 datacache[1].lineSize = c6 & 0xFF; 569 570 // The L3 cache value is TOTAL, not per core. 571 datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1. 572 datacache[2].associativity = assocmap[(d6>>12)&0xF]; 573 datacache[2].lineSize = d6 & 0xFF; 574 } 575 } 576 577 // For Intel CoreI7 and later, use function 0x0B 578 // to determine number of processors. 579 void getCpuInfo0B() 580 { 581 int level=0; 582 int threadsPerCore; 583 uint a, b, c, d; 584 do { 585 asm pure nothrow @nogc { 586 mov EAX, 0x0B; 587 mov ECX, level; 588 cpuid; 589 mov a, EAX; 590 mov b, EBX; 591 mov c, ECX; 592 mov d, EDX; 593 } 594 if (b!=0) { 595 // I'm not sure about this. The docs state that there 596 // are 2 hyperthreads per core if HT is factory enabled. 597 if (level==0) 598 threadsPerCore = b & 0xFFFF; 599 else if (level==1) { 600 cpuFeatures.maxThreads = b & 0xFFFF; 601 cpuFeatures.maxCores = cpuFeatures.maxThreads / threadsPerCore; 602 } 603 604 } 605 ++level; 606 } while (a!=0 || b!=0); 607 608 } 609 610 void cpuidX86() 611 { 612 auto cf = getCpuFeatures(); 613 614 char * venptr = cf.vendorID.ptr; 615 uint a, b, c, d, a2; 616 version(D_InlineAsm_X86) 617 { 618 asm pure nothrow @nogc { 619 mov EAX, 0; 620 cpuid; 621 mov a, EAX; 622 mov EAX, venptr; 623 mov [EAX], EBX; 624 mov [EAX + 4], EDX; 625 mov [EAX + 8], ECX; 626 } 627 } 628 else version(D_InlineAsm_X86_64) 629 { 630 asm pure nothrow @nogc { 631 mov EAX, 0; 632 cpuid; 633 mov a, EAX; 634 mov RAX, venptr; 635 mov [RAX], EBX; 636 mov [RAX + 4], EDX; 637 mov [RAX + 8], ECX; 638 } 639 } 640 asm pure nothrow @nogc { 641 mov EAX, 0x8000_0000; 642 cpuid; 643 mov a2, EAX; 644 } 645 max_cpuid = a; 646 max_extended_cpuid = a2; 647 648 649 cf.probablyIntel = cf.vendorID == "GenuineIntel"; 650 cf.probablyAMD = cf.vendorID == "AuthenticAMD"; 651 uint apic = 0; // brand index, apic id 652 asm pure nothrow @nogc { 653 mov EAX, 1; // model, stepping 654 cpuid; 655 mov a, EAX; 656 mov apic, EBX; 657 mov c, ECX; 658 mov d, EDX; 659 } 660 cf.features = d; 661 cf.miscfeatures = c; 662 663 if (max_cpuid >= 7) 664 { 665 uint ext; 666 667 asm pure nothrow @nogc 668 { 669 mov EAX, 7; // Structured extended feature leaf. 670 mov ECX, 0; // Main leaf. 671 cpuid; 672 mov ext, EBX; // HLE, AVX2, RTM, etc. 673 } 674 675 cf.extfeatures = ext; 676 } 677 678 if (cf.miscfeatures & OSXSAVE_BIT) 679 { 680 asm pure nothrow @nogc { 681 mov ECX, 0; 682 xgetbv; 683 mov d, EDX; 684 mov a, EAX; 685 } 686 cf.xfeatures = cast(ulong)d << 32 | a; 687 } 688 cf.amdfeatures = 0; 689 cf.amdmiscfeatures = 0; 690 if (max_extended_cpuid >= 0x8000_0001) { 691 asm pure nothrow @nogc { 692 mov EAX, 0x8000_0001; 693 cpuid; 694 mov c, ECX; 695 mov d, EDX; 696 } 697 cf.amdmiscfeatures = c; 698 cf.amdfeatures = d; 699 } 700 // Try to detect fraudulent vendorIDs 701 if (amd3dnow) cf.probablyIntel = false; 702 703 stepping = a & 0xF; 704 uint fbase = (a >> 8) & 0xF; 705 uint mbase = (a >> 4) & 0xF; 706 family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase; 707 model = ((fbase == 0xF) || (fbase == 6 && cf.probablyIntel) ) ? 708 mbase + ((a >> 12) & 0xF0) : mbase; 709 710 if (!cf.probablyIntel && max_extended_cpuid >= 0x8000_0008) { 711 // determine max number of cores for AMD 712 asm pure nothrow @nogc { 713 mov EAX, 0x8000_0008; 714 cpuid; 715 mov c, ECX; 716 } 717 //http://support.amd.com/TechDocs/25481.pdf pg.36 718 cf.maxCores = 1; 719 if (hyperThreadingBit) { 720 cf.maxCores += c & 0xFF; 721 } 722 } 723 724 if (max_extended_cpuid >= 0x8000_0004) { 725 char *procptr = cf.processorNameBuffer.ptr; 726 version(D_InlineAsm_X86) 727 { 728 asm pure nothrow @nogc { 729 push ESI; 730 mov ESI, procptr; 731 mov EAX, 0x8000_0002; 732 cpuid; 733 mov [ESI], EAX; 734 mov [ESI+4], EBX; 735 mov [ESI+8], ECX; 736 mov [ESI+12], EDX; 737 mov EAX, 0x8000_0003; 738 cpuid; 739 mov [ESI+16], EAX; 740 mov [ESI+20], EBX; 741 mov [ESI+24], ECX; 742 mov [ESI+28], EDX; 743 mov EAX, 0x8000_0004; 744 cpuid; 745 mov [ESI+32], EAX; 746 mov [ESI+36], EBX; 747 mov [ESI+40], ECX; 748 mov [ESI+44], EDX; 749 pop ESI; 750 } 751 } 752 else version(D_InlineAsm_X86_64) 753 { 754 asm pure nothrow @nogc { 755 push RSI; 756 mov RSI, procptr; 757 mov EAX, 0x8000_0002; 758 cpuid; 759 mov [RSI], EAX; 760 mov [RSI+4], EBX; 761 mov [RSI+8], ECX; 762 mov [RSI+12], EDX; 763 mov EAX, 0x8000_0003; 764 cpuid; 765 mov [RSI+16], EAX; 766 mov [RSI+20], EBX; 767 mov [RSI+24], ECX; 768 mov [RSI+28], EDX; 769 mov EAX, 0x8000_0004; 770 cpuid; 771 mov [RSI+32], EAX; 772 mov [RSI+36], EBX; 773 mov [RSI+40], ECX; 774 mov [RSI+44], EDX; 775 pop RSI; 776 } 777 } 778 // Intel P4 and PM pad at front with spaces. 779 // Other CPUs pad at end with nulls. 780 int start = 0, end = 0; 781 while (cf.processorNameBuffer[start] == ' ') { ++start; } 782 while (cf.processorNameBuffer[cf.processorNameBuffer.length-end-1] == 0) { ++end; } 783 cf.processorName = cast(string)(cf.processorNameBuffer[start..$-end]); 784 } else { 785 cf.processorName = "Unknown CPU"; 786 } 787 // Determine cache sizes 788 789 // Intel docs specify that they return 0 for 0x8000_0005. 790 // AMD docs do not specify the behaviour for 0004 and 0002. 791 // Centaur/VIA and most other manufacturers use the AMD method, 792 // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2! 793 // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour 794 // for CPUID80000005. But Geode GX uses the AMD method 795 796 // Deal with Geode GX1 - make it same as MediaGX MMX. 797 if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) { 798 max_extended_cpuid = 0x8000_0004; 799 } 800 // Therefore, we try the AMD method unless it's an Intel chip. 801 // If we still have no info, try the Intel methods. 802 datacache[0].size = 0; 803 if (max_cpuid<2 || !cf.probablyIntel) { 804 if (max_extended_cpuid >= 0x8000_0005) { 805 getAMDcacheinfo(); 806 } else if (cf.probablyAMD) { 807 // According to AMDProcRecognitionAppNote, this means CPU 808 // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4) 809 // Am5x86 has 16Kb 4-way unified data & code cache. 810 datacache[0].size = 8; 811 datacache[0].associativity = 4; 812 datacache[0].lineSize = 32; 813 } else { 814 // Some obscure CPU. 815 // Values for Cyrix 6x86MX (family 6, model 0) 816 datacache[0].size = 64; 817 datacache[0].associativity = 4; 818 datacache[0].lineSize = 32; 819 } 820 } 821 if ((datacache[0].size == 0) && max_cpuid>=4) { 822 getcacheinfoCPUID4(); 823 } 824 if ((datacache[0].size == 0) && max_cpuid>=2) { 825 getcacheinfoCPUID2(); 826 } 827 if (datacache[0].size == 0) { 828 // Pentium, PMMX, late model 486, or an obscure CPU 829 if (mmx) { // Pentium MMX. Also has 8kB code cache. 830 datacache[0].size = 16; 831 datacache[0].associativity = 4; 832 datacache[0].lineSize = 32; 833 } else { // Pentium 1 (which also has 8kB code cache) 834 // or 486. 835 // Cyrix 6x86: 16, 4way, 32 linesize 836 datacache[0].size = 8; 837 datacache[0].associativity = 2; 838 datacache[0].lineSize = 32; 839 } 840 } 841 if (max_cpuid >=0x0B) { 842 // For Intel i7 and later, use function 0x0B to determine 843 // cores and hyperthreads. 844 getCpuInfo0B(); 845 } else { 846 if (hyperThreadingBit) cf.maxThreads = (apic>>>16) & 0xFF; 847 else cf.maxThreads = cf.maxCores; 848 } 849 } 850 851 // Return true if the cpuid instruction is supported. 852 // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines. 853 bool hasCPUID() 854 { 855 version(D_InlineAsm_X86_64) 856 return true; 857 else version(D_InlineAsm_X86) 858 { 859 uint flags; 860 asm nothrow @nogc { 861 pushfd; 862 pop EAX; 863 mov flags, EAX; 864 xor EAX, 0x0020_0000; 865 push EAX; 866 popfd; 867 pushfd; 868 pop EAX; 869 xor flags, EAX; 870 } 871 return (flags & 0x0020_0000) !=0; 872 } 873 } 874 875 } else { // inline asm X86 876 877 bool hasCPUID() { return false; } 878 879 void cpuidX86() 880 { 881 datacache[0].size = 8; 882 datacache[0].associativity = 2; 883 datacache[0].lineSize = 32; 884 } 885 } 886 887 /* 888 // TODO: Implement this function with OS support 889 void cpuidPPC() 890 { 891 enum :int { PPC601, PPC603, PPC603E, PPC604, 892 PPC604E, PPC620, PPCG3, PPCG4, PPCG5 } 893 894 // TODO: 895 // asm { mfpvr; } returns the CPU version but unfortunately it can 896 // only be used in kernel mode. So OS support is required. 897 int cputype = PPC603; 898 899 // 601 has a 8KB combined data & code L1 cache. 900 uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64]; 901 ubyte ways[] = [8, 2, 4, 4, 4, 8, 8, 8, 8]; 902 uint L2size[]= [0, 0, 0, 0, 0, 0, 0, 256, 512]; 903 uint L3size[]= [0, 0, 0, 0, 0, 0, 0, 2048, 0]; 904 905 datacache[0].size = sizes[cputype]; 906 datacache[0].associativity = ways[cputype]; 907 datacache[0].lineSize = (cputype==PPCG5)? 128 : 908 (cputype == PPC620 || cputype == PPCG3)? 64 : 32; 909 datacache[1].size = L2size[cputype]; 910 datacache[2].size = L3size[cputype]; 911 datacache[1].lineSize = datacache[0].lineSize; 912 datacache[2].lineSize = datacache[0].lineSize; 913 } 914 915 // TODO: Implement this function with OS support 916 void cpuidSparc() 917 { 918 // UltaSparcIIi : L1 = 16, 2way. L2 = 512, 4 way. 919 // UltraSparcIII : L1 = 64, 4way. L2= 4096 or 8192. 920 // UltraSparcIIIi: L1 = 64, 4way. L2= 1024, 4 way 921 // UltraSparcIV : L1 = 64, 4way. L2 = 16*1024. 922 // UltraSparcIV+ : L1 = 64, 4way. L2 = 2048, L3=32*1024. 923 // Sparc64V : L1 = 128, 2way. L2 = 4096 4way. 924 } 925 */ 926 927 __gshared initializedCpuid = false; 928 929 shared static this() 930 { 931 initializeCpuid(); 932 } 933 934 /// Instead of a static this constructor, this 935 public void initializeCpuid() 936 { 937 if (initializedCpuid) 938 return; 939 940 initializedCpuid = true; 941 942 auto cf = getCpuFeatures(); 943 944 if (hasCPUID()) { 945 cpuidX86(); 946 } else { 947 // it's a 386 or 486, or a Cyrix 6x86. 948 //Probably still has an external cache. 949 } 950 if (datacache[0].size==0) { 951 // Guess same as Pentium 1. 952 datacache[0].size = 8; 953 datacache[0].associativity = 2; 954 datacache[0].lineSize = 32; 955 } 956 numCacheLevels = 1; 957 // And now fill up all the unused levels with full memory space. 958 for (size_t i=1; i< datacache.length; ++i) { 959 if (datacache[i].size==0) { 960 // Set all remaining levels of cache equal to full address space. 961 datacache[i].size = size_t.max/1024; 962 datacache[i].associativity = 1; 963 datacache[i].lineSize = datacache[i-1].lineSize; 964 } 965 else 966 ++numCacheLevels; 967 } 968 969 // Set the immortals 970 971 _dataCaches = datacache; 972 _vendor = cast(string)cf.vendorID; 973 _processor = cf.processorName; 974 _x87onChip = (cf.features&FPU_BIT)!=0; 975 _mmx = (cf.features&MMX_BIT)!=0; 976 _sse = (cf.features&SSE_BIT)!=0; 977 _sse2 = (cf.features&SSE2_BIT)!=0; 978 _sse3 = (cf.miscfeatures&SSE3_BIT)!=0; 979 _ssse3 = (cf.miscfeatures&SSSE3_BIT)!=0; 980 _sse41 = (cf.miscfeatures&SSE41_BIT)!=0; 981 _sse42 = (cf.miscfeatures&SSE42_BIT)!=0; 982 _sse4a = (cf.amdmiscfeatures&SSE4A_BIT)!=0; 983 _aes = (cf.miscfeatures&AES_BIT)!=0; 984 _hasPclmulqdq = (cf.miscfeatures&PCLMULQDQ_BIT)!=0; 985 _hasRdrand = (cf.miscfeatures&RDRAND_BIT)!=0; 986 987 enum avx_mask = XF_SSE_BIT|XF_YMM_BIT; 988 _avx = (cf.xfeatures & avx_mask) == avx_mask && (cf.miscfeatures&AVX_BIT)!=0; 989 990 _vaes = avx && aes; 991 _hasVpclmulqdq = avx && hasPclmulqdq; 992 _fma = avx && (cf.miscfeatures&FMA_BIT)!=0; 993 _fp16c = avx && (cf.miscfeatures&FP16C_BIT)!=0; 994 _avx2 = avx && (cf.extfeatures & AVX2_BIT) != 0; 995 _hle = (cf.extfeatures & HLE_BIT) != 0; 996 _rtm = (cf.extfeatures & RTM_BIT) != 0; 997 _hasRdseed = (cf.extfeatures&RDSEED_BIT)!=0; 998 _hasSha = (cf.extfeatures&SHA_BIT)!=0; 999 _amd3dnow = (cf.amdfeatures&AMD_3DNOW_BIT)!=0; 1000 _amd3dnowExt = (cf.amdfeatures&AMD_3DNOW_EXT_BIT)!=0; 1001 _amdMmx = (cf.amdfeatures&AMD_MMX_BIT)!=0; 1002 _hasFxsr = (cf.features&FXSR_BIT)!=0; 1003 _hasCmov = (cf.features&CMOV_BIT)!=0; 1004 _hasRdtsc = (cf.features&TIMESTAMP_BIT)!=0; 1005 _hasCmpxchg8b = (cf.features&CMPXCHG8B_BIT)!=0; 1006 _hasCmpxchg16b = (cf.miscfeatures&CMPXCHG16B_BIT)!=0; 1007 _hasSysEnterSysExit = 1008 // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII. 1009 // (REF: www.geoffchappell.com). 1010 (cf.probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3))))) 1011 ? false 1012 : (cf.features & SYSENTERSYSEXIT_BIT)!=0; 1013 _has3dnowPrefetch = (cf.amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0; 1014 _hasLahfSahf = (cf.amdmiscfeatures&LAHFSAHF_BIT)!=0; 1015 _hasPopcnt = (cf.miscfeatures&POPCNT_BIT)!=0; 1016 _hasLzcnt = (cf.amdmiscfeatures&LZCNT_BIT)!=0; 1017 _isX86_64 = (cf.amdfeatures&AMD64_BIT)!=0; 1018 _isItanium = (cf.features&IA64_BIT)!=0; 1019 _hyperThreading = cf.maxThreads>cf.maxCores; 1020 _threadsPerCPU = cf.maxThreads; 1021 _coresPerCPU = cf.maxCores; 1022 _preferAthlon = cf.probablyAMD && family >=6; 1023 _preferPentium4 = cf.probablyIntel && family == 0xF; 1024 _preferPentium1 = family < 6 || (family==6 && model < 0xF && !cf.probablyIntel); 1025 }