mirror of
https://github.com/rn10950/RetroZilla.git
synced 2024-11-14 03:30:17 +01:00
814 lines
23 KiB
C
814 lines
23 KiB
C
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "mpi.h"
|
|
|
|
/*
|
|
* This file implements a single function: s_mpi_getProcessorLineSize();
|
|
* s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
|
|
* if a cache exists, or zero if there is no cache. If more than one
|
|
* cache line exists, it should return the smallest line size (which is
|
|
* usually the L1 cache).
|
|
*
|
|
* mp_modexp uses this information to make sure that private key information
|
|
* isn't being leaked through the cache.
|
|
*
|
|
* Currently the file returns good data for most modern x86 processors, and
|
|
* reasonable data on 64-bit ppc processors. All other processors are assumed
|
|
* to have a cache line size of 32 bytes unless modified by target.mk.
|
|
*
|
|
*/
|
|
|
|
#if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
|
|
/* X86 processors have special instructions that tell us about the cache */
|
|
#include "string.h"
|
|
|
|
#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
|
|
#define AMD_64 1
|
|
#endif
|
|
|
|
/* Generic CPUID function */
|
|
#if defined(AMD_64)
|
|
|
|
#if defined(__GNUC__)
|
|
|
|
void freebl_cpuid(unsigned long op, unsigned long *eax,
|
|
unsigned long *ebx, unsigned long *ecx,
|
|
unsigned long *edx)
|
|
{
|
|
__asm__("cpuid\n\t"
|
|
: "=a" (*eax),
|
|
"=b" (*ebx),
|
|
"=c" (*ecx),
|
|
"=d" (*edx)
|
|
: "0" (op));
|
|
}
|
|
|
|
#elif defined(_MSC_VER)
|
|
|
|
#include <intrin.h>
|
|
|
|
void freebl_cpuid(unsigned long op, unsigned long *eax,
|
|
unsigned long *ebx, unsigned long *ecx,
|
|
unsigned long *edx)
|
|
{
|
|
int intrinsic_out[4];
|
|
|
|
__cpuid(intrinsic_out, op);
|
|
*eax = intrinsic_out[0];
|
|
*ebx = intrinsic_out[1];
|
|
*ecx = intrinsic_out[2];
|
|
*edx = intrinsic_out[3];
|
|
}
|
|
|
|
#endif
|
|
|
|
#else /* !defined(AMD_64) */
|
|
|
|
/* x86 */
|
|
|
|
#if defined(__GNUC__)
|
|
void freebl_cpuid(unsigned long op, unsigned long *eax,
|
|
unsigned long *ebx, unsigned long *ecx,
|
|
unsigned long *edx)
|
|
{
|
|
/* sigh GCC isn't smart enough to save the ebx PIC register on it's own
|
|
* in this case, so do it by hand. Use edi to store ebx and pass the
|
|
* value returned in ebx from cpuid through edi. */
|
|
__asm__("mov %%ebx,%%edi\n\t"
|
|
"cpuid\n\t"
|
|
"xchgl %%ebx,%%edi\n\t"
|
|
: "=a" (*eax),
|
|
"=D" (*ebx),
|
|
"=c" (*ecx),
|
|
"=d" (*edx)
|
|
: "0" (op));
|
|
}
|
|
|
|
/*
|
|
* try flipping a processor flag to determine CPU type
|
|
*/
|
|
static unsigned long changeFlag(unsigned long flag)
|
|
{
|
|
unsigned long changedFlags, originalFlags;
|
|
__asm__("pushfl\n\t" /* get the flags */
|
|
"popl %0\n\t"
|
|
"movl %0,%1\n\t" /* save the original flags */
|
|
"xorl %2,%0\n\t" /* flip the bit */
|
|
"pushl %0\n\t" /* set the flags */
|
|
"popfl\n\t"
|
|
"pushfl\n\t" /* get the flags again (for return) */
|
|
"popl %0\n\t"
|
|
"pushl %1\n\t" /* restore the original flags */
|
|
"popfl\n\t"
|
|
: "=r" (changedFlags),
|
|
"=r" (originalFlags),
|
|
"=r" (flag)
|
|
: "2" (flag));
|
|
return changedFlags ^ originalFlags;
|
|
}
|
|
|
|
#elif defined(_MSC_VER)
|
|
|
|
/*
|
|
* windows versions of the above assembler
|
|
*/
|
|
#define wcpuid __asm __emit 0fh __asm __emit 0a2h
|
|
void freebl_cpuid(unsigned long op, unsigned long *Reax,
|
|
unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
|
|
{
|
|
unsigned long Leax, Lebx, Lecx, Ledx;
|
|
__asm {
|
|
pushad
|
|
mov eax,op
|
|
wcpuid
|
|
mov Leax,eax
|
|
mov Lebx,ebx
|
|
mov Lecx,ecx
|
|
mov Ledx,edx
|
|
popad
|
|
}
|
|
*Reax = Leax;
|
|
*Rebx = Lebx;
|
|
*Recx = Lecx;
|
|
*Redx = Ledx;
|
|
}
|
|
|
|
static unsigned long changeFlag(unsigned long flag)
|
|
{
|
|
unsigned long changedFlags, originalFlags;
|
|
__asm {
|
|
push eax
|
|
push ebx
|
|
pushfd /* get the flags */
|
|
pop eax
|
|
push eax /* save the flags on the stack */
|
|
mov originalFlags,eax /* save the original flags */
|
|
mov ebx,flag
|
|
xor eax,ebx /* flip the bit */
|
|
push eax /* set the flags */
|
|
popfd
|
|
pushfd /* get the flags again (for return) */
|
|
pop eax
|
|
popfd /* restore the original flags */
|
|
mov changedFlags,eax
|
|
pop ebx
|
|
pop eax
|
|
}
|
|
return changedFlags ^ originalFlags;
|
|
}
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#if !defined(AMD_64)
|
|
#define AC_FLAG 0x40000
|
|
#define ID_FLAG 0x200000
|
|
|
|
/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
|
|
static int is386()
|
|
{
|
|
return changeFlag(AC_FLAG) == 0;
|
|
}
|
|
|
|
/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
|
|
static int is486()
|
|
{
|
|
return changeFlag(ID_FLAG) == 0;
|
|
}
|
|
#endif
|
|
|
|
|
|
/*
|
|
* table for Intel Cache.
|
|
* See Intel Application Note AP-485 for more information
|
|
*/
|
|
|
|
typedef unsigned char CacheTypeEntry;
|
|
|
|
typedef enum {
|
|
Cache_NONE = 0,
|
|
Cache_UNKNOWN = 1,
|
|
Cache_TLB = 2,
|
|
Cache_TLBi = 3,
|
|
Cache_TLBd = 4,
|
|
Cache_Trace = 5,
|
|
Cache_L1 = 6,
|
|
Cache_L1i = 7,
|
|
Cache_L1d = 8,
|
|
Cache_L2 = 9 ,
|
|
Cache_L2i = 10 ,
|
|
Cache_L2d = 11 ,
|
|
Cache_L3 = 12 ,
|
|
Cache_L3i = 13,
|
|
Cache_L3d = 14
|
|
} CacheType;
|
|
|
|
struct _cache {
|
|
CacheTypeEntry type;
|
|
unsigned char lineSize;
|
|
};
|
|
static const struct _cache CacheMap[256] = {
|
|
/* 00 */ {Cache_NONE, 0 },
|
|
/* 01 */ {Cache_TLBi, 0 },
|
|
/* 02 */ {Cache_TLBi, 0 },
|
|
/* 03 */ {Cache_TLBd, 0 },
|
|
/* 04 */ {Cache_TLBd, },
|
|
/* 05 */ {Cache_UNKNOWN, 0 },
|
|
/* 06 */ {Cache_L1i, 32 },
|
|
/* 07 */ {Cache_UNKNOWN, 0 },
|
|
/* 08 */ {Cache_L1i, 32 },
|
|
/* 09 */ {Cache_UNKNOWN, 0 },
|
|
/* 0a */ {Cache_L1d, 32 },
|
|
/* 0b */ {Cache_UNKNOWN, 0 },
|
|
/* 0c */ {Cache_L1d, 32 },
|
|
/* 0d */ {Cache_UNKNOWN, 0 },
|
|
/* 0e */ {Cache_UNKNOWN, 0 },
|
|
/* 0f */ {Cache_UNKNOWN, 0 },
|
|
/* 10 */ {Cache_UNKNOWN, 0 },
|
|
/* 11 */ {Cache_UNKNOWN, 0 },
|
|
/* 12 */ {Cache_UNKNOWN, 0 },
|
|
/* 13 */ {Cache_UNKNOWN, 0 },
|
|
/* 14 */ {Cache_UNKNOWN, 0 },
|
|
/* 15 */ {Cache_UNKNOWN, 0 },
|
|
/* 16 */ {Cache_UNKNOWN, 0 },
|
|
/* 17 */ {Cache_UNKNOWN, 0 },
|
|
/* 18 */ {Cache_UNKNOWN, 0 },
|
|
/* 19 */ {Cache_UNKNOWN, 0 },
|
|
/* 1a */ {Cache_UNKNOWN, 0 },
|
|
/* 1b */ {Cache_UNKNOWN, 0 },
|
|
/* 1c */ {Cache_UNKNOWN, 0 },
|
|
/* 1d */ {Cache_UNKNOWN, 0 },
|
|
/* 1e */ {Cache_UNKNOWN, 0 },
|
|
/* 1f */ {Cache_UNKNOWN, 0 },
|
|
/* 20 */ {Cache_UNKNOWN, 0 },
|
|
/* 21 */ {Cache_UNKNOWN, 0 },
|
|
/* 22 */ {Cache_L3, 64 },
|
|
/* 23 */ {Cache_L3, 64 },
|
|
/* 24 */ {Cache_UNKNOWN, 0 },
|
|
/* 25 */ {Cache_L3, 64 },
|
|
/* 26 */ {Cache_UNKNOWN, 0 },
|
|
/* 27 */ {Cache_UNKNOWN, 0 },
|
|
/* 28 */ {Cache_UNKNOWN, 0 },
|
|
/* 29 */ {Cache_L3, 64 },
|
|
/* 2a */ {Cache_UNKNOWN, 0 },
|
|
/* 2b */ {Cache_UNKNOWN, 0 },
|
|
/* 2c */ {Cache_L1d, 64 },
|
|
/* 2d */ {Cache_UNKNOWN, 0 },
|
|
/* 2e */ {Cache_UNKNOWN, 0 },
|
|
/* 2f */ {Cache_UNKNOWN, 0 },
|
|
/* 30 */ {Cache_L1i, 64 },
|
|
/* 31 */ {Cache_UNKNOWN, 0 },
|
|
/* 32 */ {Cache_UNKNOWN, 0 },
|
|
/* 33 */ {Cache_UNKNOWN, 0 },
|
|
/* 34 */ {Cache_UNKNOWN, 0 },
|
|
/* 35 */ {Cache_UNKNOWN, 0 },
|
|
/* 36 */ {Cache_UNKNOWN, 0 },
|
|
/* 37 */ {Cache_UNKNOWN, 0 },
|
|
/* 38 */ {Cache_UNKNOWN, 0 },
|
|
/* 39 */ {Cache_L2, 64 },
|
|
/* 3a */ {Cache_UNKNOWN, 0 },
|
|
/* 3b */ {Cache_L2, 64 },
|
|
/* 3c */ {Cache_L2, 64 },
|
|
/* 3d */ {Cache_UNKNOWN, 0 },
|
|
/* 3e */ {Cache_UNKNOWN, 0 },
|
|
/* 3f */ {Cache_UNKNOWN, 0 },
|
|
/* 40 */ {Cache_L2, 0 },
|
|
/* 41 */ {Cache_L2, 32 },
|
|
/* 42 */ {Cache_L2, 32 },
|
|
/* 43 */ {Cache_L2, 32 },
|
|
/* 44 */ {Cache_L2, 32 },
|
|
/* 45 */ {Cache_L2, 32 },
|
|
/* 46 */ {Cache_UNKNOWN, 0 },
|
|
/* 47 */ {Cache_UNKNOWN, 0 },
|
|
/* 48 */ {Cache_UNKNOWN, 0 },
|
|
/* 49 */ {Cache_UNKNOWN, 0 },
|
|
/* 4a */ {Cache_UNKNOWN, 0 },
|
|
/* 4b */ {Cache_UNKNOWN, 0 },
|
|
/* 4c */ {Cache_UNKNOWN, 0 },
|
|
/* 4d */ {Cache_UNKNOWN, 0 },
|
|
/* 4e */ {Cache_UNKNOWN, 0 },
|
|
/* 4f */ {Cache_UNKNOWN, 0 },
|
|
/* 50 */ {Cache_TLBi, 0 },
|
|
/* 51 */ {Cache_TLBi, 0 },
|
|
/* 52 */ {Cache_TLBi, 0 },
|
|
/* 53 */ {Cache_UNKNOWN, 0 },
|
|
/* 54 */ {Cache_UNKNOWN, 0 },
|
|
/* 55 */ {Cache_UNKNOWN, 0 },
|
|
/* 56 */ {Cache_UNKNOWN, 0 },
|
|
/* 57 */ {Cache_UNKNOWN, 0 },
|
|
/* 58 */ {Cache_UNKNOWN, 0 },
|
|
/* 59 */ {Cache_UNKNOWN, 0 },
|
|
/* 5a */ {Cache_UNKNOWN, 0 },
|
|
/* 5b */ {Cache_TLBd, 0 },
|
|
/* 5c */ {Cache_TLBd, 0 },
|
|
/* 5d */ {Cache_TLBd, 0 },
|
|
/* 5e */ {Cache_UNKNOWN, 0 },
|
|
/* 5f */ {Cache_UNKNOWN, 0 },
|
|
/* 60 */ {Cache_UNKNOWN, 0 },
|
|
/* 61 */ {Cache_UNKNOWN, 0 },
|
|
/* 62 */ {Cache_UNKNOWN, 0 },
|
|
/* 63 */ {Cache_UNKNOWN, 0 },
|
|
/* 64 */ {Cache_UNKNOWN, 0 },
|
|
/* 65 */ {Cache_UNKNOWN, 0 },
|
|
/* 66 */ {Cache_L1d, 64 },
|
|
/* 67 */ {Cache_L1d, 64 },
|
|
/* 68 */ {Cache_L1d, 64 },
|
|
/* 69 */ {Cache_UNKNOWN, 0 },
|
|
/* 6a */ {Cache_UNKNOWN, 0 },
|
|
/* 6b */ {Cache_UNKNOWN, 0 },
|
|
/* 6c */ {Cache_UNKNOWN, 0 },
|
|
/* 6d */ {Cache_UNKNOWN, 0 },
|
|
/* 6e */ {Cache_UNKNOWN, 0 },
|
|
/* 6f */ {Cache_UNKNOWN, 0 },
|
|
/* 70 */ {Cache_Trace, 1 },
|
|
/* 71 */ {Cache_Trace, 1 },
|
|
/* 72 */ {Cache_Trace, 1 },
|
|
/* 73 */ {Cache_UNKNOWN, 0 },
|
|
/* 74 */ {Cache_UNKNOWN, 0 },
|
|
/* 75 */ {Cache_UNKNOWN, 0 },
|
|
/* 76 */ {Cache_UNKNOWN, 0 },
|
|
/* 77 */ {Cache_UNKNOWN, 0 },
|
|
/* 78 */ {Cache_UNKNOWN, 0 },
|
|
/* 79 */ {Cache_L2, 64 },
|
|
/* 7a */ {Cache_L2, 64 },
|
|
/* 7b */ {Cache_L2, 64 },
|
|
/* 7c */ {Cache_L2, 64 },
|
|
/* 7d */ {Cache_UNKNOWN, 0 },
|
|
/* 7e */ {Cache_UNKNOWN, 0 },
|
|
/* 7f */ {Cache_UNKNOWN, 0 },
|
|
/* 80 */ {Cache_UNKNOWN, 0 },
|
|
/* 81 */ {Cache_UNKNOWN, 0 },
|
|
/* 82 */ {Cache_L2, 32 },
|
|
/* 83 */ {Cache_L2, 32 },
|
|
/* 84 */ {Cache_L2, 32 },
|
|
/* 85 */ {Cache_L2, 32 },
|
|
/* 86 */ {Cache_L2, 64 },
|
|
/* 87 */ {Cache_L2, 64 },
|
|
/* 88 */ {Cache_UNKNOWN, 0 },
|
|
/* 89 */ {Cache_UNKNOWN, 0 },
|
|
/* 8a */ {Cache_UNKNOWN, 0 },
|
|
/* 8b */ {Cache_UNKNOWN, 0 },
|
|
/* 8c */ {Cache_UNKNOWN, 0 },
|
|
/* 8d */ {Cache_UNKNOWN, 0 },
|
|
/* 8e */ {Cache_UNKNOWN, 0 },
|
|
/* 8f */ {Cache_UNKNOWN, 0 },
|
|
/* 90 */ {Cache_UNKNOWN, 0 },
|
|
/* 91 */ {Cache_UNKNOWN, 0 },
|
|
/* 92 */ {Cache_UNKNOWN, 0 },
|
|
/* 93 */ {Cache_UNKNOWN, 0 },
|
|
/* 94 */ {Cache_UNKNOWN, 0 },
|
|
/* 95 */ {Cache_UNKNOWN, 0 },
|
|
/* 96 */ {Cache_UNKNOWN, 0 },
|
|
/* 97 */ {Cache_UNKNOWN, 0 },
|
|
/* 98 */ {Cache_UNKNOWN, 0 },
|
|
/* 99 */ {Cache_UNKNOWN, 0 },
|
|
/* 9a */ {Cache_UNKNOWN, 0 },
|
|
/* 9b */ {Cache_UNKNOWN, 0 },
|
|
/* 9c */ {Cache_UNKNOWN, 0 },
|
|
/* 9d */ {Cache_UNKNOWN, 0 },
|
|
/* 9e */ {Cache_UNKNOWN, 0 },
|
|
/* 9f */ {Cache_UNKNOWN, 0 },
|
|
/* a0 */ {Cache_UNKNOWN, 0 },
|
|
/* a1 */ {Cache_UNKNOWN, 0 },
|
|
/* a2 */ {Cache_UNKNOWN, 0 },
|
|
/* a3 */ {Cache_UNKNOWN, 0 },
|
|
/* a4 */ {Cache_UNKNOWN, 0 },
|
|
/* a5 */ {Cache_UNKNOWN, 0 },
|
|
/* a6 */ {Cache_UNKNOWN, 0 },
|
|
/* a7 */ {Cache_UNKNOWN, 0 },
|
|
/* a8 */ {Cache_UNKNOWN, 0 },
|
|
/* a9 */ {Cache_UNKNOWN, 0 },
|
|
/* aa */ {Cache_UNKNOWN, 0 },
|
|
/* ab */ {Cache_UNKNOWN, 0 },
|
|
/* ac */ {Cache_UNKNOWN, 0 },
|
|
/* ad */ {Cache_UNKNOWN, 0 },
|
|
/* ae */ {Cache_UNKNOWN, 0 },
|
|
/* af */ {Cache_UNKNOWN, 0 },
|
|
/* b0 */ {Cache_TLBi, 0 },
|
|
/* b1 */ {Cache_UNKNOWN, 0 },
|
|
/* b2 */ {Cache_UNKNOWN, 0 },
|
|
/* b3 */ {Cache_TLBd, 0 },
|
|
/* b4 */ {Cache_UNKNOWN, 0 },
|
|
/* b5 */ {Cache_UNKNOWN, 0 },
|
|
/* b6 */ {Cache_UNKNOWN, 0 },
|
|
/* b7 */ {Cache_UNKNOWN, 0 },
|
|
/* b8 */ {Cache_UNKNOWN, 0 },
|
|
/* b9 */ {Cache_UNKNOWN, 0 },
|
|
/* ba */ {Cache_UNKNOWN, 0 },
|
|
/* bb */ {Cache_UNKNOWN, 0 },
|
|
/* bc */ {Cache_UNKNOWN, 0 },
|
|
/* bd */ {Cache_UNKNOWN, 0 },
|
|
/* be */ {Cache_UNKNOWN, 0 },
|
|
/* bf */ {Cache_UNKNOWN, 0 },
|
|
/* c0 */ {Cache_UNKNOWN, 0 },
|
|
/* c1 */ {Cache_UNKNOWN, 0 },
|
|
/* c2 */ {Cache_UNKNOWN, 0 },
|
|
/* c3 */ {Cache_UNKNOWN, 0 },
|
|
/* c4 */ {Cache_UNKNOWN, 0 },
|
|
/* c5 */ {Cache_UNKNOWN, 0 },
|
|
/* c6 */ {Cache_UNKNOWN, 0 },
|
|
/* c7 */ {Cache_UNKNOWN, 0 },
|
|
/* c8 */ {Cache_UNKNOWN, 0 },
|
|
/* c9 */ {Cache_UNKNOWN, 0 },
|
|
/* ca */ {Cache_UNKNOWN, 0 },
|
|
/* cb */ {Cache_UNKNOWN, 0 },
|
|
/* cc */ {Cache_UNKNOWN, 0 },
|
|
/* cd */ {Cache_UNKNOWN, 0 },
|
|
/* ce */ {Cache_UNKNOWN, 0 },
|
|
/* cf */ {Cache_UNKNOWN, 0 },
|
|
/* d0 */ {Cache_UNKNOWN, 0 },
|
|
/* d1 */ {Cache_UNKNOWN, 0 },
|
|
/* d2 */ {Cache_UNKNOWN, 0 },
|
|
/* d3 */ {Cache_UNKNOWN, 0 },
|
|
/* d4 */ {Cache_UNKNOWN, 0 },
|
|
/* d5 */ {Cache_UNKNOWN, 0 },
|
|
/* d6 */ {Cache_UNKNOWN, 0 },
|
|
/* d7 */ {Cache_UNKNOWN, 0 },
|
|
/* d8 */ {Cache_UNKNOWN, 0 },
|
|
/* d9 */ {Cache_UNKNOWN, 0 },
|
|
/* da */ {Cache_UNKNOWN, 0 },
|
|
/* db */ {Cache_UNKNOWN, 0 },
|
|
/* dc */ {Cache_UNKNOWN, 0 },
|
|
/* dd */ {Cache_UNKNOWN, 0 },
|
|
/* de */ {Cache_UNKNOWN, 0 },
|
|
/* df */ {Cache_UNKNOWN, 0 },
|
|
/* e0 */ {Cache_UNKNOWN, 0 },
|
|
/* e1 */ {Cache_UNKNOWN, 0 },
|
|
/* e2 */ {Cache_UNKNOWN, 0 },
|
|
/* e3 */ {Cache_UNKNOWN, 0 },
|
|
/* e4 */ {Cache_UNKNOWN, 0 },
|
|
/* e5 */ {Cache_UNKNOWN, 0 },
|
|
/* e6 */ {Cache_UNKNOWN, 0 },
|
|
/* e7 */ {Cache_UNKNOWN, 0 },
|
|
/* e8 */ {Cache_UNKNOWN, 0 },
|
|
/* e9 */ {Cache_UNKNOWN, 0 },
|
|
/* ea */ {Cache_UNKNOWN, 0 },
|
|
/* eb */ {Cache_UNKNOWN, 0 },
|
|
/* ec */ {Cache_UNKNOWN, 0 },
|
|
/* ed */ {Cache_UNKNOWN, 0 },
|
|
/* ee */ {Cache_UNKNOWN, 0 },
|
|
/* ef */ {Cache_UNKNOWN, 0 },
|
|
/* f0 */ {Cache_UNKNOWN, 0 },
|
|
/* f1 */ {Cache_UNKNOWN, 0 },
|
|
/* f2 */ {Cache_UNKNOWN, 0 },
|
|
/* f3 */ {Cache_UNKNOWN, 0 },
|
|
/* f4 */ {Cache_UNKNOWN, 0 },
|
|
/* f5 */ {Cache_UNKNOWN, 0 },
|
|
/* f6 */ {Cache_UNKNOWN, 0 },
|
|
/* f7 */ {Cache_UNKNOWN, 0 },
|
|
/* f8 */ {Cache_UNKNOWN, 0 },
|
|
/* f9 */ {Cache_UNKNOWN, 0 },
|
|
/* fa */ {Cache_UNKNOWN, 0 },
|
|
/* fb */ {Cache_UNKNOWN, 0 },
|
|
/* fc */ {Cache_UNKNOWN, 0 },
|
|
/* fd */ {Cache_UNKNOWN, 0 },
|
|
/* fe */ {Cache_UNKNOWN, 0 },
|
|
/* ff */ {Cache_UNKNOWN, 0 }
|
|
};
|
|
|
|
|
|
/*
|
|
* use the above table to determine the CacheEntryLineSize.
|
|
*/
|
|
static void
|
|
getIntelCacheEntryLineSize(unsigned long val, int *level,
|
|
unsigned long *lineSize)
|
|
{
|
|
CacheType type;
|
|
|
|
type = CacheMap[val].type;
|
|
/* only interested in data caches */
|
|
/* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
|
|
* this data check has the side effect of rejecting that entry. If
|
|
* that wasn't the case, we could have to reject it explicitly */
|
|
if (CacheMap[val].lineSize == 0) {
|
|
return;
|
|
}
|
|
/* look at the caches, skip types we aren't interested in.
|
|
* if we already have a value for a lower level cache, skip the
|
|
* current entry */
|
|
if ((type == Cache_L1)|| (type == Cache_L1d)) {
|
|
*level = 1;
|
|
*lineSize = CacheMap[val].lineSize;
|
|
} else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
|
|
*level = 2;
|
|
*lineSize = CacheMap[val].lineSize;
|
|
} else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
|
|
*level = 3;
|
|
*lineSize = CacheMap[val].lineSize;
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
static void
|
|
getIntelRegisterCacheLineSize(unsigned long val,
|
|
int *level, unsigned long *lineSize)
|
|
{
|
|
getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
|
|
getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
|
|
getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
|
|
getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
|
|
}
|
|
|
|
/*
|
|
* returns '0' if no recognized cache is found, or if the cache
|
|
* information is supported by this processor
|
|
*/
|
|
static unsigned long
|
|
getIntelCacheLineSize(int cpuidLevel)
|
|
{
|
|
int level = 4;
|
|
unsigned long lineSize = 0;
|
|
unsigned long eax, ebx, ecx, edx;
|
|
int repeat, count;
|
|
|
|
if (cpuidLevel < 2) {
|
|
return 0;
|
|
}
|
|
|
|
/* command '2' of the cpuid is intel's cache info call. Each byte of the
|
|
* 4 registers contain a potential descriptor for the cache. The CacheMap
|
|
* table maps the cache entry with the processor cache. Register 'al'
|
|
* contains a count value that cpuid '2' needs to be called in order to
|
|
* find all the cache descriptors. Only registers with the high bit set
|
|
* to 'zero' have valid descriptors. This code loops through all the
|
|
* required calls to cpuid '2' and passes any valid descriptors it finds
|
|
* to the getIntelRegisterCacheLineSize code, which breaks the registers
|
|
* down into their component descriptors. In the end the lineSize of the
|
|
* lowest level cache data cache is returned. */
|
|
freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
|
|
repeat = eax & 0xf;
|
|
for (count = 0; count < repeat; count++) {
|
|
if ((eax & 0x80000000) == 0) {
|
|
getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
|
|
}
|
|
if ((ebx & 0x80000000) == 0) {
|
|
getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
|
|
}
|
|
if ((ecx & 0x80000000) == 0) {
|
|
getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
|
|
}
|
|
if ((edx & 0x80000000) == 0) {
|
|
getIntelRegisterCacheLineSize(edx, &level, &lineSize);
|
|
}
|
|
if (count+1 != repeat) {
|
|
freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
|
|
}
|
|
}
|
|
return lineSize;
|
|
}
|
|
|
|
/*
|
|
* returns '0' if the cache info is not supported by this processor.
|
|
* This is based on the AMD extended cache commands for cpuid.
|
|
* (see "AMD Processor Recognition Application Note" Publication 20734).
|
|
* Some other processors use the identical scheme.
|
|
* (see "Processor Recognition, Transmeta Corporation").
|
|
*/
|
|
static unsigned long
|
|
getOtherCacheLineSize(unsigned long cpuidLevel)
|
|
{
|
|
unsigned long lineSize = 0;
|
|
unsigned long eax, ebx, ecx, edx;
|
|
|
|
/* get the Extended CPUID level */
|
|
freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
|
|
cpuidLevel = eax;
|
|
|
|
if (cpuidLevel >= 0x80000005) {
|
|
freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
|
|
lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
|
|
}
|
|
return lineSize;
|
|
}
|
|
|
|
static const char * const manMap[] = {
|
|
#define INTEL 0
|
|
"GenuineIntel",
|
|
#define AMD 1
|
|
"AuthenticAMD",
|
|
#define CYRIX 2
|
|
"CyrixInstead",
|
|
#define CENTAUR 2
|
|
"CentaurHauls",
|
|
#define NEXGEN 3
|
|
"NexGenDriven",
|
|
#define TRANSMETA 4
|
|
"GenuineTMx86",
|
|
#define RISE 5
|
|
"RiseRiseRise",
|
|
#define UMC 6
|
|
"UMC UMC UMC ",
|
|
#define SIS 7
|
|
"Sis Sis Sis ",
|
|
#define NATIONAL 8
|
|
"Geode by NSC",
|
|
};
|
|
|
|
static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]);
|
|
|
|
|
|
#define MAN_UNKNOWN 9
|
|
|
|
#if !defined(AMD_64)
|
|
#define SSE2_FLAG (1<<26)
|
|
unsigned long
|
|
s_mpi_is_sse2()
|
|
{
|
|
unsigned long eax, ebx, ecx, edx;
|
|
int manufacturer = MAN_UNKNOWN;
|
|
int i;
|
|
char string[13];
|
|
|
|
if (is386() || is486()) {
|
|
return 0;
|
|
}
|
|
freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
|
|
/* string holds the CPU's manufacturer ID string - a twelve
|
|
* character ASCII string stored in ebx, edx, ecx, and
|
|
* the 32-bit extended feature flags are in edx, ecx.
|
|
*/
|
|
*(int *)string = ebx;
|
|
*(int *)&string[4] = (int)edx;
|
|
*(int *)&string[8] = (int)ecx;
|
|
string[12] = 0;
|
|
|
|
/* has no SSE2 extensions */
|
|
if (eax == 0) {
|
|
return 0;
|
|
}
|
|
|
|
for (i=0; i < n_manufacturers; i++) {
|
|
if ( strcmp(manMap[i],string) == 0) {
|
|
manufacturer = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
freebl_cpuid(1,&eax,&ebx,&ecx,&edx);
|
|
return (edx & SSE2_FLAG) == SSE2_FLAG;
|
|
}
|
|
#endif
|
|
|
|
unsigned long
|
|
s_mpi_getProcessorLineSize()
|
|
{
|
|
unsigned long eax, ebx, ecx, edx;
|
|
unsigned long cpuidLevel;
|
|
unsigned long cacheLineSize = 0;
|
|
int manufacturer = MAN_UNKNOWN;
|
|
int i;
|
|
char string[65];
|
|
|
|
#if !defined(AMD_64)
|
|
if (is386()) {
|
|
return 0; /* 386 had no cache */
|
|
} if (is486()) {
|
|
return 32; /* really? need more info */
|
|
}
|
|
#endif
|
|
|
|
/* Pentium, cpuid command is available */
|
|
freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
|
|
cpuidLevel = eax;
|
|
/* string holds the CPU's manufacturer ID string - a twelve
|
|
* character ASCII string stored in ebx, edx, ecx, and
|
|
* the 32-bit extended feature flags are in edx, ecx.
|
|
*/
|
|
*(int *)string = ebx;
|
|
*(int *)&string[4] = (int)edx;
|
|
*(int *)&string[8] = (int)ecx;
|
|
string[12] = 0;
|
|
|
|
manufacturer = MAN_UNKNOWN;
|
|
for (i=0; i < n_manufacturers; i++) {
|
|
if ( strcmp(manMap[i],string) == 0) {
|
|
manufacturer = i;
|
|
}
|
|
}
|
|
|
|
if (manufacturer == INTEL) {
|
|
cacheLineSize = getIntelCacheLineSize(cpuidLevel);
|
|
} else {
|
|
cacheLineSize = getOtherCacheLineSize(cpuidLevel);
|
|
}
|
|
/* doesn't support cache info based on cpuid. This means
|
|
* an old pentium class processor, which have cache lines of
|
|
* 32. If we learn differently, we can use a switch based on
|
|
* the Manufacturer id */
|
|
if (cacheLineSize == 0) {
|
|
cacheLineSize = 32;
|
|
}
|
|
return cacheLineSize;
|
|
}
|
|
#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
|
|
#endif
|
|
|
|
#if defined(__ppc64__)
|
|
/*
|
|
* Sigh, The PPC has some really nice features to help us determine cache
|
|
* size, since it had lots of direct control functions to do so. The POWER
|
|
* processor even has an instruction to do this, but it was dropped in
|
|
* PowerPC. Unfortunately most of them are not available in user mode.
|
|
*
|
|
* The dcbz function would be a great way to determine cache line size except
|
|
* 1) it only works on write-back memory (it throws an exception otherwise),
|
|
* and 2) because so many mac programs 'knew' the processor cache size was
|
|
* 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
|
|
* G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
|
|
* these programs happy. dcbzl work if 64 bit instructions are supported.
|
|
* If you know 64 bit instructions are supported, and that stack is
|
|
* write-back, you can use this code.
|
|
*/
|
|
#include "memory.h"
|
|
|
|
/* clear the cache line that contains 'array' */
|
|
static inline void dcbzl(char *array)
|
|
{
|
|
register char *a asm("r2") = array;
|
|
__asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) );
|
|
}
|
|
|
|
|
|
#define PPC_DO_ALIGN(x,y) ((char *)\
|
|
((((long long) (x))+((y)-1))&~((y)-1)))
|
|
|
|
#define PPC_MAX_LINE_SIZE 256
|
|
unsigned long
|
|
s_mpi_getProcessorLineSize()
|
|
{
|
|
char testArray[2*PPC_MAX_LINE_SIZE+1];
|
|
char *test;
|
|
int i;
|
|
|
|
/* align the array on a maximum line size boundary, so we
|
|
* know we are starting to clear from the first address */
|
|
test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
|
|
/* set all the values to 1's */
|
|
memset(test, 0xff, PPC_MAX_LINE_SIZE);
|
|
/* clear one cache block starting at 'test' */
|
|
dcbzl(test);
|
|
|
|
/* find the size of the cleared area, that's our block size */
|
|
for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) {
|
|
if (test[i-1] == 0) {
|
|
return i;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
|
|
#endif
|
|
|
|
|
|
/*
|
|
* put other processor and platform specific cache code here
|
|
* return the smallest cache line size in bytes on the processor
|
|
* (usually the L1 cache). If the OS has a call, this would be
|
|
* a greate place to put it.
|
|
*
|
|
* If there is no cache, return 0;
|
|
*
|
|
* define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
|
|
* below aren't compiled.
|
|
*
|
|
*/
|
|
|
|
|
|
/* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or
|
|
* OS */
|
|
#if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
|
|
|
|
unsigned long
|
|
s_mpi_getProcessorLineSize()
|
|
{
|
|
return MPI_CACHE_LINE_SIZE;
|
|
}
|
|
#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
|
|
#endif
|
|
|
|
|
|
/* If no way to get the processor cache line size has been defined, assume
|
|
* it's 32 bytes (most common value, does not significantly impact performance)
|
|
*/
|
|
#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
|
|
unsigned long
|
|
s_mpi_getProcessorLineSize()
|
|
{
|
|
return 32;
|
|
}
|
|
#endif
|
|
|
|
#ifdef TEST_IT
|
|
#include <stdio.h>
|
|
|
|
main()
|
|
{
|
|
printf("line size = %d\n", s_mpi_getProcessorLineSize());
|
|
}
|
|
#endif
|