2016年8月17日星期三

枚举INTEL CPU的CACHE信息

#include "stdafx.h"

/*
功能:枚举INTEL CPU 的各个层次的CACHE信息,如大小,类型等。
英文说法是:enumerate the deterministic cache parameters for each level of the cache hierarchy.

参考:WRK和INTEL等的资料。

made by correy
made at 2016.07.01
homepage:http://correy.webs.com
*/

#include <Windows.h>
#include <assert.h>

#include <stdio.h>
#include <nmmintrin.h>
#include <immintrin.h>
#include <intrin.h>

//一下结构摘自:WRK,注释来自INTEL。

// Structure of Intel deterministic cache information returned by CPUID instruction
typedef enum _INTEL_CACHE_TYPE {//Bits 04-00: Cache Type Field
    IntelCacheNull,       //0 = Null - No more caches
    IntelCacheData,       //1 = Data Cache
    IntelCacheInstruction,//2 = Instruction Cache
    IntelCacheUnified,    //3 = Unified Cache
    IntelCacheRam,        //4-31 = Reserved
    IntelCacheTrace
} INTEL_CACHE_TYPE;

typedef union INTEL_CACHE_INFO_EAX {
    ULONG Ulong;
    struct {
        INTEL_CACHE_TYPE Type : 5; //Bits 04-00: Cache Type Field
        ULONG Level : 3;           //Bits 07-05: Cache Level (starts at 1)
        ULONG SelfInitializing : 1;//Bit 08: Self Initializing cache level (does not need SW initialization)
        ULONG FullyAssociative : 1;//Bit 09: Fully Associative cache
        ULONG Reserved : 4;        //Bits 13-10: Reserved
        ULONG ThreadsSharing : 12; //Bits 25-14: Maximum number of addressable IDs for logical processors sharing this cache**, ***
        ULONG ProcessorCores : 6;  //Bits 31-26: Maximum number of addressable IDs for processor cores in the physical package**, ****, *****
    };
} INTEL_CACHE_INFO_EAX, *PINTEL_CACHE_INFO_EAX;

typedef union INTEL_CACHE_INFO_EBX {
    ULONG Ulong;
    struct {
        ULONG LineSize : 12;     //Bits 11-00: L = System Coherency Line Size**
        ULONG Partitions : 10;   //Bits 21-12: P = Physical Line partitions**
        ULONG Associativity : 10;//Bits 31-22: W = Ways of associativity**
    };
} INTEL_CACHE_INFO_EBX, *PINTEL_CACHE_INFO_EBX;


BOOL is_support_intel()
{
    BOOL B = FALSE;
    char CPUString[0x20];
    int CPUInfo[4] = { -1 };
    unsigned    nIds;

    // __cpuid with an InfoType argument of 0 returns the number of valid Ids in CPUInfo[0] and the CPU identification string in the other three array elements.
    // The CPU identification string is not in linear order.
    // The code below arranges the information in a human readable form.
    __cpuid(CPUInfo, 0);
    nIds = CPUInfo[0];
    memset(CPUString, 0, sizeof(CPUString));
    *((int*) CPUString) = CPUInfo[1];
    *((int*) (CPUString + 4)) = CPUInfo[3];
    *((int*) (CPUString + 8)) = CPUInfo[2];

    //printf_s("\n\nCPU String: %s\n", CPUString);//GenuineIntel
    if (_stricmp(CPUString, "GenuineIntel") == 0)
    {
        B = TRUE;
    }

    return B;
}


BOOL is_support_cpuid()
/*
判断CPU是否支持CPUID指令。
*/
{
    BOOL B = FALSE;
    SIZE_T eflags1;
    SIZE_T eflags2;

    eflags1 = __readeflags();
    __writeeflags(eflags1 | 0x200000);
    eflags2 = __readeflags();
    __writeeflags(eflags1);
    if (eflags1 == eflags2) {
        B = FALSE;
    }
    else {
        B = TRUE;
    }

    return B;
}


int _tmain(int argc, _TCHAR* argv [])
{
    if (!is_support_cpuid())
    {
        return 0;
    }

    if (!is_support_intel())
    {
        return 0;
    }

    int CPUInfo[4] = { -1 };
    unsigned    nExIds;
    __cpuid(CPUInfo, 0);
    nExIds = CPUInfo[0];
    if (nExIds < 4)
    {
        return 0;
    }

    //下面的算法参照WindowsResearchKernel-WRK\WRK-v1.2\base\ntos\ke\amd64\initkr.c文件中的KiSetCacheInformationIntel函数,当然还有INTEL的文档。
    INTEL_CACHE_INFO_EAX CacheInfoEax;
    INTEL_CACHE_INFO_EBX CacheInfoEbx;
    ULONG Index = 0;//Valid index values start from 0.
    ULONGLONG CacheSize;

    int nCores = 0;
    int nCacheType = 0;
    int nCacheLevel = 0;
    int nMaxThread = 0;
    int nSysLineSize = 0;
    int nPhysicalLinePartitions = 0;
    int nWaysAssociativity = 0;
    int nNumberSets = 0;
    int    bSelfInit = false;
    int    bFullyAssociative = false;

    for (;; Index += 1)
    {
        __cpuidex(CPUInfo, 4, Index); //注意:80000006H还有个信息。
        CacheInfoEax.Ulong = CPUInfo[0];
        CacheInfoEbx.Ulong = CPUInfo[1];

        if (CacheInfoEax.Type == IntelCacheNull) {
            break;//下面INTEL也说出了结束的标志。
        }

        //另一种退出方式是:https://msdn.microsoft.com/en-us/library/hskdteyh(v=vs.100).aspx
        if (!(CPUInfo[0] & 0xf0))
            break;

        if (Index == 0)
        {
            nCores = CPUInfo[0] >> 26;
            printf_s("\n\nNumber of Cores = %d\n", nCores + 1);//感觉这个是错的。
        }

        nCacheType = (CPUInfo[0] & 0x1f);
        nCacheLevel = (CPUInfo[0] & 0xe0) >> 5;
        bSelfInit = (CPUInfo[0] & 0x100) >> 8;
        bFullyAssociative = (CPUInfo[0] & 0x200) >> 9;
        nMaxThread = (CPUInfo[0] & 0x03ffc000) >> 14;
        nSysLineSize = (CPUInfo[1] & 0x0fff);
        nPhysicalLinePartitions = (CPUInfo[1] & 0x03ff000) >> 12;
        nWaysAssociativity = (CPUInfo[1]) >> 22;
        nNumberSets = CPUInfo[2];

        printf_s("\n");
        printf_s("ECX Index %d\n", Index);
        switch (nCacheType)
        {
        case 0:
            printf_s("   Type: Null\n");
            break;
        case 1:
            printf_s("   Type: Data Cache\n");
            break;
        case 2:
            printf_s("   Type: Instruction Cache\n");
            break;
        case 3:
            printf_s("   Type: Unified Cache\n");
            break;
        default:
            printf_s("   Type: Unknown\n");
        }

        printf_s("   Level = %d\n", nCacheLevel + 1);//感觉无须加一。INTEL说了:starts at 1。估计微软的人还认为:starts at 0。
        if (bSelfInit) {
            printf_s("   Self Initializing\n");
        }
        else {
            printf_s("   Not Self Initializing\n");
        }

        if (bFullyAssociative) {
            printf_s("   Is Fully Associatve\n");
        }
        else {
            printf_s("   Is Not Fully Associatve\n");
        }

        printf_s("   Max Threads = %d\n", nMaxThread + 1);
        //printf_s("   System Line Size = %d\n", nSysLineSize + 1);
        //printf_s("   Physical Line Partions = %d\n", nPhysicalLinePartitions + 1);
        //printf_s("   Ways of Associativity = %d\n", nWaysAssociativity + 1);
        //printf_s("   Number of Sets = %d\n", nNumberSets + 1);

        //微软网站上的几个CPUID例子是没有计算CPU cache大小的,如:https://msdn.microsoft.com/en-us/library/hskdteyh(v=vs.100).aspx ,这只是简单的列出值而已。

        //WRK如是说:
        // Cache size = Ways x Partitions x LineSize x Sets.
        // N.B. For fully-associative cache, the "Sets" returned from cpuid is actually the number of entries, not the "Ways".
        // Therefore the formula of evaluating the cache size below will still hold.

        /*
        INTEL如是说:

        INPUT EAX = 04H: Returns Deterministic Cache Parameters for Each Level

        When CPUID executes with EAX set to 04H and ECX contains an index value,
        the processor returns encoded data that describe a set of deterministic cache parameters (for the cache level associated with the input in ECX).
        Valid index values start from 0.

        Software can enumerate the deterministic cache parameters for each level of the cache hierarchy starting with an index value of 0,
        until the parameters report the value associated with the cache type field is 0.
        The architecturally defined fields reported by deterministic cache parameters are documented in Table 3-17.

        This Cache Size in Bytes
        = (Ways + 1) * (Partitions + 1) * (Line_Size + 1) * (Sets + 1)
        = (EBX[31:22] + 1) * (EBX[21:12] + 1) * (EBX[11:0] + 1) * (ECX + 1)

        The CPUID leaf 04H also reports data that can be used to derive the topology of processor cores in a physical package.
        This information is constant for all valid index values.
        Software can query the raw data reported by executing CPUID with EAX=04H and ECX=0 and use it as part of the topology enumeration algorithm described in Chapter 8,
        “Multiple-Processor Management,” in the Intel? 64 and IA-32 Architectures Software Developer’s Manual, Volume 3A.
        */

        CacheSize = (CacheInfoEbx.Associativity + 1) * (CacheInfoEbx.Partitions + 1) * (CacheInfoEbx.LineSize + 1) * (CPUInfo[2] + 1);

        ULONGLONG Cache_Size = (nWaysAssociativity + 1) * (nPhysicalLinePartitions + 1) * (nSysLineSize + 1) * (nNumberSets + 1);
        assert(Cache_Size == CacheSize);
        /*
        其实:
        nWaysAssociativity可命名为Ways
        nPhysicalLinePartitions可命名为Partitions
        nSysLineSize可命名为Line_Size
        nNumberSets可命名为Sets
        */

        if (CacheSize >= (1024 * 1024)) {
            printf_s("   CacheSize = %dMB.\n", CacheSize / (1024 * 1024));
        }
        else {
            printf_s("   CacheSize = %dKB.\n", CacheSize / 1024);
        }
    }

    return 0;
}

没有评论:

发表评论