| Index: openssl/crypto/x86cpuid.pl
|
| ===================================================================
|
| --- openssl/crypto/x86cpuid.pl (revision 105093)
|
| +++ openssl/crypto/x86cpuid.pl (working copy)
|
| @@ -1,6 +1,7 @@
|
| #!/usr/bin/env perl
|
|
|
| -push(@INC,"perlasm");
|
| +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
| +push(@INC, "${dir}perlasm", "perlasm");
|
| require "x86asm.pl";
|
|
|
| &asm_init($ARGV[0],"x86cpuid");
|
| @@ -22,38 +23,90 @@
|
| &jnc (&label("done"));
|
| &xor ("eax","eax");
|
| &cpuid ();
|
| + &mov ("edi","eax"); # max value for standard query level
|
| +
|
| &xor ("eax","eax");
|
| &cmp ("ebx",0x756e6547); # "Genu"
|
| - &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax"));
|
| + &setne (&LB("eax"));
|
| &mov ("ebp","eax");
|
| &cmp ("edx",0x49656e69); # "ineI"
|
| - &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax"));
|
| + &setne (&LB("eax"));
|
| &or ("ebp","eax");
|
| &cmp ("ecx",0x6c65746e); # "ntel"
|
| - &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax"));
|
| - &or ("ebp","eax");
|
| + &setne (&LB("eax"));
|
| + &or ("ebp","eax"); # 0 indicates Intel CPU
|
| + &jz (&label("intel"));
|
| +
|
| + &cmp ("ebx",0x68747541); # "Auth"
|
| + &setne (&LB("eax"));
|
| + &mov ("esi","eax");
|
| + &cmp ("edx",0x69746E65); # "enti"
|
| + &setne (&LB("eax"));
|
| + &or ("esi","eax");
|
| + &cmp ("ecx",0x444D4163); # "cAMD"
|
| + &setne (&LB("eax"));
|
| + &or ("esi","eax"); # 0 indicates AMD CPU
|
| + &jnz (&label("intel"));
|
| +
|
| + # AMD specific
|
| + &mov ("eax",0x80000000);
|
| + &cpuid ();
|
| + &cmp ("eax",0x80000008);
|
| + &jb (&label("intel"));
|
| +
|
| + &mov ("eax",0x80000008);
|
| + &cpuid ();
|
| + &movz ("esi",&LB("ecx")); # number of cores - 1
|
| + &inc ("esi"); # number of cores
|
| +
|
| &mov ("eax",1);
|
| &cpuid ();
|
| + &bt ("edx",28);
|
| + &jnc (&label("done"));
|
| + &shr ("ebx",16);
|
| + &and ("ebx",0xff);
|
| + &cmp ("ebx","esi");
|
| + &ja (&label("done"));
|
| + &and ("edx",0xefffffff); # clear hyper-threading bit
|
| + &jmp (&label("done"));
|
| +
|
| +&set_label("intel");
|
| + &cmp ("edi",4);
|
| + &mov ("edi",-1);
|
| + &jb (&label("nocacheinfo"));
|
| +
|
| + &mov ("eax",4);
|
| + &mov ("ecx",0); # query L1D
|
| + &cpuid ();
|
| + &mov ("edi","eax");
|
| + &shr ("edi",14);
|
| + &and ("edi",0xfff); # number of cores -1 per L1D
|
| +
|
| +&set_label("nocacheinfo");
|
| + &mov ("eax",1);
|
| + &cpuid ();
|
| &cmp ("ebp",0);
|
| &jne (&label("notP4"));
|
| - &and ("eax",15<<8); # familiy ID
|
| - &cmp ("eax",15<<8); # P4?
|
| + &and (&HB("eax"),15); # familiy ID
|
| + &cmp (&HB("eax"),15); # P4?
|
| &jne (&label("notP4"));
|
| &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR
|
| &set_label("notP4");
|
| &bt ("edx",28); # test hyper-threading bit
|
| &jnc (&label("done"));
|
| + &and ("edx",0xefffffff);
|
| + &cmp ("edi",0);
|
| + &je (&label("done"));
|
| +
|
| + &or ("edx",0x10000000);
|
| &shr ("ebx",16);
|
| - &and ("ebx",0xff);
|
| - &cmp ("ebx",1); # see if cache is shared(*)
|
| + &cmp (&LB("ebx"),1);
|
| &ja (&label("done"));
|
| &and ("edx",0xefffffff); # clear hyper-threading bit if not
|
| &set_label("done");
|
| &mov ("eax","edx");
|
| &mov ("edx","ecx");
|
| &function_end("OPENSSL_ia32_cpuid");
|
| -# (*) on Core2 this value is set to 2 denoting the fact that L2
|
| -# cache is shared between cores.
|
|
|
| &external_label("OPENSSL_ia32cap_P");
|
|
|
| @@ -220,6 +273,40 @@
|
| }
|
| &function_end_B("OPENSSL_indirect_call");
|
|
|
| +&function_begin_B("OPENSSL_cleanse");
|
| + &mov ("edx",&wparam(0));
|
| + &mov ("ecx",&wparam(1));
|
| + &xor ("eax","eax");
|
| + &cmp ("ecx",7);
|
| + &jae (&label("lot"));
|
| + &cmp ("ecx",0);
|
| + &je (&label("ret"));
|
| +&set_label("little");
|
| + &mov (&BP(0,"edx"),"al");
|
| + &sub ("ecx",1);
|
| + &lea ("edx",&DWP(1,"edx"));
|
| + &jnz (&label("little"));
|
| +&set_label("ret");
|
| + &ret ();
|
| +
|
| +&set_label("lot",16);
|
| + &test ("edx",3);
|
| + &jz (&label("aligned"));
|
| + &mov (&BP(0,"edx"),"al");
|
| + &lea ("ecx",&DWP(-1,"ecx"));
|
| + &lea ("edx",&DWP(1,"edx"));
|
| + &jmp (&label("lot"));
|
| +&set_label("aligned");
|
| + &mov (&DWP(0,"edx"),"eax");
|
| + &lea ("ecx",&DWP(-4,"ecx"));
|
| + &test ("ecx",-4);
|
| + &lea ("edx",&DWP(4,"edx"));
|
| + &jnz (&label("aligned"));
|
| + &cmp ("ecx",0);
|
| + &jne (&label("little"));
|
| + &ret ();
|
| +&function_end_B("OPENSSL_cleanse");
|
| +
|
| &initseg("OPENSSL_cpuid_setup");
|
|
|
| &asm_finish();
|
|
|