mirror of
https://github.com/QuasarApp/openssl.git
synced 2025-05-08 07:29:41 +00:00
x86_64: Don't assume 8-byte pointer size
Since pointer in x32 is 4 bytes, add x86_64-support.pl to define pointer_size and pointer_register based on flavour to support stuctures like: struct { void *ptr; int blocks; } This fixes 90-test_sslapi.t on x32. Verified with $ ./Configure shared linux-x86_64 $ make $ make test and $ ./Configure shared linux-x32 $ make $ make test Reviewed-by: Richard Levitte <levitte@openssl.org> Reviewed-by: Tomas Mraz <tmraz@fedoraproject.org> (Merged from https://github.com/openssl/openssl/pull/10988)
This commit is contained in:
parent
21542a48ab
commit
0d51cf3ccc
@ -54,6 +54,11 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|||||||
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
||||||
die "can't locate x86_64-xlate.pl";
|
die "can't locate x86_64-xlate.pl";
|
||||||
|
|
||||||
|
push(@INC,"${dir}","${dir}../../perlasm");
|
||||||
|
require "x86_64-support.pl";
|
||||||
|
|
||||||
|
$ptr_size=&pointer_size($flavour);
|
||||||
|
|
||||||
$avx=0;
|
$avx=0;
|
||||||
|
|
||||||
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
|
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
|
||||||
@ -88,6 +93,8 @@ $inp="%rdi"; # 1st arg
|
|||||||
$key="%rsi"; # 2nd arg
|
$key="%rsi"; # 2nd arg
|
||||||
$num="%edx";
|
$num="%edx";
|
||||||
|
|
||||||
|
$inp_elm_size=2*$ptr_size+8+16;
|
||||||
|
|
||||||
@inptr=map("%r$_",(8..11));
|
@inptr=map("%r$_",(8..11));
|
||||||
@outptr=map("%r$_",(12..15));
|
@outptr=map("%r$_",(12..15));
|
||||||
|
|
||||||
@ -163,21 +170,25 @@ $code.=<<___;
|
|||||||
.Lenc4x_body:
|
.Lenc4x_body:
|
||||||
movdqu ($key),$zero # 0-round key
|
movdqu ($key),$zero # 0-round key
|
||||||
lea 0x78($key),$key # size optimization
|
lea 0x78($key),$key # size optimization
|
||||||
lea 40*2($inp),$inp
|
lea $inp_elm_size*2($inp),$inp
|
||||||
|
|
||||||
.Lenc4x_loop_grande:
|
.Lenc4x_loop_grande:
|
||||||
mov $num,24(%rsp) # original $num
|
mov $num,24(%rsp) # original $num
|
||||||
xor $num,$num
|
xor $num,$num
|
||||||
___
|
___
|
||||||
for($i=0;$i<4;$i++) {
|
for($i=0;$i<4;$i++) {
|
||||||
|
$inptr_reg=&pointer_register($flavour,@inptr[$i]);
|
||||||
|
$outptr_reg=&pointer_register($flavour,@outptr[$i]);
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov `40*$i+16-40*2`($inp),$one # borrow $one for number of blocks
|
# borrow $one for number of blocks
|
||||||
mov `40*$i+0-40*2`($inp),@inptr[$i]
|
mov `$inp_elm_size*$i+2*$ptr_size-$inp_elm_size*2`($inp),$one
|
||||||
|
mov `$inp_elm_size*$i+0-$inp_elm_size*2`($inp),$inptr_reg
|
||||||
cmp $num,$one
|
cmp $num,$one
|
||||||
mov `40*$i+8-40*2`($inp),@outptr[$i]
|
mov `$inp_elm_size*$i+$ptr_size-$inp_elm_size*2`($inp),$outptr_reg
|
||||||
cmovg $one,$num # find maximum
|
cmovg $one,$num # find maximum
|
||||||
test $one,$one
|
test $one,$one
|
||||||
movdqu `40*$i+24-40*2`($inp),@out[$i] # load IV
|
# load IV
|
||||||
|
movdqu `$inp_elm_size*$i+2*$ptr_size+8-$inp_elm_size*2`($inp),@out[$i]
|
||||||
mov $one,`32+4*$i`(%rsp) # initialize counters
|
mov $one,`32+4*$i`(%rsp) # initialize counters
|
||||||
cmovle %rsp,@inptr[$i] # cancel input
|
cmovle %rsp,@inptr[$i] # cancel input
|
||||||
___
|
___
|
||||||
@ -335,14 +346,15 @@ $code.=<<___;
|
|||||||
|
|
||||||
#pxor @inp[0],@out[0]
|
#pxor @inp[0],@out[0]
|
||||||
#pxor @inp[1],@out[1]
|
#pxor @inp[1],@out[1]
|
||||||
#movdqu @out[0],`40*0+24-40*2`($inp) # output iv FIX ME!
|
# output iv FIX ME!
|
||||||
|
#movdqu @out[0],`$inp_elm_size*0+2*$ptr_size+8-$inp_elm_size*2`($inp)
|
||||||
#pxor @inp[2],@out[2]
|
#pxor @inp[2],@out[2]
|
||||||
#movdqu @out[1],`40*1+24-40*2`($inp)
|
#movdqu @out[1],`$inp_elm_size*1+2*$ptr_size+8-$inp_elm_size*2`($inp)
|
||||||
#pxor @inp[3],@out[3]
|
#pxor @inp[3],@out[3]
|
||||||
#movdqu @out[2],`40*2+24-40*2`($inp) # won't fix, let caller
|
#movdqu @out[2],`$inp_elm_size*2+2*$ptr_size+8-$inp_elm_size*2`($inp) # won't fix, let caller
|
||||||
#movdqu @out[3],`40*3+24-40*2`($inp) # figure this out...
|
#movdqu @out[3],`$inp_elm_size*3+2*$ptr_size+8-$inp_elm_size*2`($inp) # figure this out...
|
||||||
|
|
||||||
lea `40*4`($inp),$inp
|
lea `$inp_elm_size*4`($inp),$inp
|
||||||
dec $num
|
dec $num
|
||||||
jnz .Lenc4x_loop_grande
|
jnz .Lenc4x_loop_grande
|
||||||
|
|
||||||
@ -440,21 +452,25 @@ $code.=<<___;
|
|||||||
.Ldec4x_body:
|
.Ldec4x_body:
|
||||||
movdqu ($key),$zero # 0-round key
|
movdqu ($key),$zero # 0-round key
|
||||||
lea 0x78($key),$key # size optimization
|
lea 0x78($key),$key # size optimization
|
||||||
lea 40*2($inp),$inp
|
lea $inp_elm_size*2($inp),$inp
|
||||||
|
|
||||||
.Ldec4x_loop_grande:
|
.Ldec4x_loop_grande:
|
||||||
mov $num,24(%rsp) # original $num
|
mov $num,24(%rsp) # original $num
|
||||||
xor $num,$num
|
xor $num,$num
|
||||||
___
|
___
|
||||||
for($i=0;$i<4;$i++) {
|
for($i=0;$i<4;$i++) {
|
||||||
|
$inptr_reg=&pointer_register($flavour,@inptr[$i]);
|
||||||
|
$outptr_reg=&pointer_register($flavour,@outptr[$i]);
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov `40*$i+16-40*2`($inp),$one # borrow $one for number of blocks
|
# borrow $one for number of blocks
|
||||||
mov `40*$i+0-40*2`($inp),@inptr[$i]
|
mov `$inp_elm_size*$i+2*$ptr_size-$inp_elm_size*2`($inp),$one
|
||||||
|
mov `$inp_elm_size*$i+0-$inp_elm_size*2`($inp),$inptr_reg
|
||||||
cmp $num,$one
|
cmp $num,$one
|
||||||
mov `40*$i+8-40*2`($inp),@outptr[$i]
|
mov `$inp_elm_size*$i+$ptr_size-$inp_elm_size*2`($inp),$outptr_reg
|
||||||
cmovg $one,$num # find maximum
|
cmovg $one,$num # find maximum
|
||||||
test $one,$one
|
test $one,$one
|
||||||
movdqu `40*$i+24-40*2`($inp),@inp[$i] # load IV
|
# load IV
|
||||||
|
movdqu `$inp_elm_size*$i+2*$ptr_size+8-$inp_elm_size*2`($inp),@inp[$i]
|
||||||
mov $one,`32+4*$i`(%rsp) # initialize counters
|
mov $one,`32+4*$i`(%rsp) # initialize counters
|
||||||
cmovle %rsp,@inptr[$i] # cancel input
|
cmovle %rsp,@inptr[$i] # cancel input
|
||||||
___
|
___
|
||||||
@ -610,7 +626,7 @@ $code.=<<___;
|
|||||||
.cfi_def_cfa %rax,8
|
.cfi_def_cfa %rax,8
|
||||||
mov 24(%rsp),$num
|
mov 24(%rsp),$num
|
||||||
|
|
||||||
lea `40*4`($inp),$inp
|
lea `$inp_elm_size*4`($inp),$inp
|
||||||
dec $num
|
dec $num
|
||||||
jnz .Ldec4x_loop_grande
|
jnz .Ldec4x_loop_grande
|
||||||
|
|
||||||
@ -709,7 +725,7 @@ $code.=<<___;
|
|||||||
vzeroupper
|
vzeroupper
|
||||||
vmovdqu ($key),$zero # 0-round key
|
vmovdqu ($key),$zero # 0-round key
|
||||||
lea 0x78($key),$key # size optimization
|
lea 0x78($key),$key # size optimization
|
||||||
lea 40*4($inp),$inp
|
lea `$inp_elm_size*4`($inp),$inp
|
||||||
shr \$1,$num
|
shr \$1,$num
|
||||||
|
|
||||||
.Lenc8x_loop_grande:
|
.Lenc8x_loop_grande:
|
||||||
@ -718,14 +734,20 @@ $code.=<<___;
|
|||||||
___
|
___
|
||||||
for($i=0;$i<8;$i++) {
|
for($i=0;$i<8;$i++) {
|
||||||
my $temp = $i ? $offload : $offset;
|
my $temp = $i ? $offload : $offset;
|
||||||
|
$ptr_reg=&pointer_register($flavour,@ptr[$i]);
|
||||||
|
$temp_reg=&pointer_register($flavour,$temp);
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov `40*$i+16-40*4`($inp),$one # borrow $one for number of blocks
|
# borrow $one for number of blocks
|
||||||
mov `40*$i+0-40*4`($inp),@ptr[$i] # input pointer
|
mov `$inp_elm_size*$i+2*$ptr_size-$inp_elm_size*4`($inp),$one
|
||||||
|
# input pointer
|
||||||
|
mov `$inp_elm_size*$i+0-$inp_elm_size*4`($inp),$ptr_reg
|
||||||
cmp $num,$one
|
cmp $num,$one
|
||||||
mov `40*$i+8-40*4`($inp),$temp # output pointer
|
# output pointer
|
||||||
|
mov `$inp_elm_size*$i+$ptr_size-$inp_elm_size*4`($inp),$temp_reg
|
||||||
cmovg $one,$num # find maximum
|
cmovg $one,$num # find maximum
|
||||||
test $one,$one
|
test $one,$one
|
||||||
vmovdqu `40*$i+24-40*4`($inp),@out[$i] # load IV
|
# load IV
|
||||||
|
vmovdqu `$inp_elm_size*$i+2*$ptr_size+8-$inp_elm_size*4`($inp),@out[$i]
|
||||||
mov $one,`32+4*$i`(%rsp) # initialize counters
|
mov $one,`32+4*$i`(%rsp) # initialize counters
|
||||||
cmovle %rsp,@ptr[$i] # cancel input
|
cmovle %rsp,@ptr[$i] # cancel input
|
||||||
sub @ptr[$i],$temp # distance between input and output
|
sub @ptr[$i],$temp # distance between input and output
|
||||||
@ -910,7 +932,7 @@ $code.=<<___;
|
|||||||
mov 16(%rsp),%rax # original %rsp
|
mov 16(%rsp),%rax # original %rsp
|
||||||
.cfi_def_cfa %rax,8
|
.cfi_def_cfa %rax,8
|
||||||
#mov 24(%rsp),$num
|
#mov 24(%rsp),$num
|
||||||
#lea `40*8`($inp),$inp
|
#lea `$inp_elm_size*8`($inp),$inp
|
||||||
#dec $num
|
#dec $num
|
||||||
#jnz .Lenc8x_loop_grande
|
#jnz .Lenc8x_loop_grande
|
||||||
|
|
||||||
@ -1002,7 +1024,7 @@ $code.=<<___;
|
|||||||
vzeroupper
|
vzeroupper
|
||||||
vmovdqu ($key),$zero # 0-round key
|
vmovdqu ($key),$zero # 0-round key
|
||||||
lea 0x78($key),$key # size optimization
|
lea 0x78($key),$key # size optimization
|
||||||
lea 40*4($inp),$inp
|
lea `$inp_elm_size*4`($inp),$inp
|
||||||
shr \$1,$num
|
shr \$1,$num
|
||||||
|
|
||||||
.Ldec8x_loop_grande:
|
.Ldec8x_loop_grande:
|
||||||
@ -1011,14 +1033,20 @@ $code.=<<___;
|
|||||||
___
|
___
|
||||||
for($i=0;$i<8;$i++) {
|
for($i=0;$i<8;$i++) {
|
||||||
my $temp = $i ? $offload : $offset;
|
my $temp = $i ? $offload : $offset;
|
||||||
|
$ptr_reg=&pointer_register($flavour,@ptr[$i]);
|
||||||
|
$temp_reg=&pointer_register($flavour,$temp);
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov `40*$i+16-40*4`($inp),$one # borrow $one for number of blocks
|
# borrow $one for number of blocks
|
||||||
mov `40*$i+0-40*4`($inp),@ptr[$i] # input pointer
|
mov `$inp_elm_size*$i+2*$ptr_size-$inp_elm_size*4`($inp),$one
|
||||||
|
# input pointer
|
||||||
|
mov `$inp_elm_size*$i+0-$inp_elm_size*4`($inp),$ptr_reg
|
||||||
cmp $num,$one
|
cmp $num,$one
|
||||||
mov `40*$i+8-40*4`($inp),$temp # output pointer
|
# output pointer
|
||||||
|
mov `$inp_elm_size*$i+$ptr_size-$inp_elm_size*4`($inp),$temp_reg
|
||||||
cmovg $one,$num # find maximum
|
cmovg $one,$num # find maximum
|
||||||
test $one,$one
|
test $one,$one
|
||||||
vmovdqu `40*$i+24-40*4`($inp),@out[$i] # load IV
|
# load IV
|
||||||
|
vmovdqu `$inp_elm_size*$i+2*$ptr_size+8-$inp_elm_size*4`($inp),@out[$i]
|
||||||
mov $one,`32+4*$i`(%rsp) # initialize counters
|
mov $one,`32+4*$i`(%rsp) # initialize counters
|
||||||
cmovle %rsp,@ptr[$i] # cancel input
|
cmovle %rsp,@ptr[$i] # cancel input
|
||||||
sub @ptr[$i],$temp # distance between input and output
|
sub @ptr[$i],$temp # distance between input and output
|
||||||
@ -1234,7 +1262,7 @@ $code.=<<___;
|
|||||||
mov 16(%rsp),%rax # original %rsp
|
mov 16(%rsp),%rax # original %rsp
|
||||||
.cfi_def_cfa %rax,8
|
.cfi_def_cfa %rax,8
|
||||||
#mov 24(%rsp),$num
|
#mov 24(%rsp),$num
|
||||||
#lea `40*8`($inp),$inp
|
#lea `$inp_elm_size*8`($inp),$inp
|
||||||
#dec $num
|
#dec $num
|
||||||
#jnz .Ldec8x_loop_grande
|
#jnz .Ldec8x_loop_grande
|
||||||
|
|
||||||
|
51
crypto/perlasm/x86_64-support.pl
Normal file
51
crypto/perlasm/x86_64-support.pl
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
#! /usr/bin/env perl
|
||||||
|
# Copyright 2020 The OpenSSL Project Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||||
|
# this file except in compliance with the License. You can obtain a copy
|
||||||
|
# in the file LICENSE in the source distribution or at
|
||||||
|
# https://www.openssl.org/source/license.html
|
||||||
|
|
||||||
|
|
||||||
|
package x86_64support;
|
||||||
|
|
||||||
|
# require "x86_64-support.pl";
|
||||||
|
# $ptr_size=&pointer_size($flavour);
|
||||||
|
# $ptr_reg=&pointer_register($flavour,$reg);
|
||||||
|
|
||||||
|
sub ::pointer_size
|
||||||
|
{
|
||||||
|
my($flavour)=@_;
|
||||||
|
my $ptr_size=8; $ptr_size=4 if ($flavour eq "elf32");
|
||||||
|
return $ptr_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub ::pointer_register
|
||||||
|
{
|
||||||
|
my($flavour,$reg)=@_;
|
||||||
|
if ($flavour eq "elf32") {
|
||||||
|
if ($reg eq "%rax") {
|
||||||
|
return "%eax";
|
||||||
|
} elsif ($reg eq "%rbx") {
|
||||||
|
return "%ebx";
|
||||||
|
} elsif ($reg eq "%rcx") {
|
||||||
|
return "%ecx";
|
||||||
|
} elsif ($reg eq "%rdx") {
|
||||||
|
return "%edx";
|
||||||
|
} elsif ($reg eq "%rdi") {
|
||||||
|
return "%edi";
|
||||||
|
} elsif ($reg eq "%rsi") {
|
||||||
|
return "%esi";
|
||||||
|
} elsif ($reg eq "%rbp") {
|
||||||
|
return "%ebp";
|
||||||
|
} elsif ($reg eq "%rsp") {
|
||||||
|
return "%esp";
|
||||||
|
} else {
|
||||||
|
return $reg."d";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return $reg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
1;
|
@ -50,6 +50,11 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|||||||
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
||||||
die "can't locate x86_64-xlate.pl";
|
die "can't locate x86_64-xlate.pl";
|
||||||
|
|
||||||
|
push(@INC,"${dir}","${dir}../../perlasm");
|
||||||
|
require "x86_64-support.pl";
|
||||||
|
|
||||||
|
$ptr_size=&pointer_size($flavour);
|
||||||
|
|
||||||
$avx=0;
|
$avx=0;
|
||||||
|
|
||||||
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
|
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
|
||||||
@ -89,6 +94,7 @@ $inp="%rsi"; # 2nd arg
|
|||||||
$num="%edx";
|
$num="%edx";
|
||||||
@ptr=map("%r$_",(8..11));
|
@ptr=map("%r$_",(8..11));
|
||||||
$Tbl="%rbp";
|
$Tbl="%rbp";
|
||||||
|
$inp_elm_size=2*$ptr_size;
|
||||||
|
|
||||||
@V=($A,$B,$C,$D,$E)=map("%xmm$_",(0..4));
|
@V=($A,$B,$C,$D,$E)=map("%xmm$_",(0..4));
|
||||||
($t0,$t1,$t2,$t3,$tx)=map("%xmm$_",(5..9));
|
($t0,$t1,$t2,$t3,$tx)=map("%xmm$_",(5..9));
|
||||||
@ -409,9 +415,12 @@ $code.=<<___;
|
|||||||
xor $num,$num
|
xor $num,$num
|
||||||
___
|
___
|
||||||
for($i=0;$i<4;$i++) {
|
for($i=0;$i<4;$i++) {
|
||||||
|
$ptr_reg=&pointer_register($flavour,@ptr[$i]);
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov `16*$i+0`($inp),@ptr[$i] # input pointer
|
# input pointer
|
||||||
mov `16*$i+8`($inp),%ecx # number of blocks
|
mov `$inp_elm_size*$i+0`($inp),$ptr_reg
|
||||||
|
# number of blocks
|
||||||
|
mov `$inp_elm_size*$i+$ptr_size`($inp),%ecx
|
||||||
cmp $num,%ecx
|
cmp $num,%ecx
|
||||||
cmovg %ecx,$num # find maximum
|
cmovg %ecx,$num # find maximum
|
||||||
test %ecx,%ecx
|
test %ecx,%ecx
|
||||||
@ -488,7 +497,7 @@ $code.=<<___;
|
|||||||
|
|
||||||
mov `$REG_SZ*17+8`(%rsp),$num
|
mov `$REG_SZ*17+8`(%rsp),$num
|
||||||
lea $REG_SZ($ctx),$ctx
|
lea $REG_SZ($ctx),$ctx
|
||||||
lea `16*$REG_SZ/4`($inp),$inp
|
lea `$inp_elm_size*$REG_SZ/4`($inp),$inp
|
||||||
dec $num
|
dec $num
|
||||||
jnz .Loop_grande
|
jnz .Loop_grande
|
||||||
|
|
||||||
@ -566,9 +575,12 @@ $code.=<<___;
|
|||||||
xor $num,$num
|
xor $num,$num
|
||||||
___
|
___
|
||||||
for($i=0;$i<2;$i++) {
|
for($i=0;$i<2;$i++) {
|
||||||
|
$ptr_reg=&pointer_register($flavour,@ptr[$i]);
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov `16*$i+0`($inp),@ptr[$i] # input pointer
|
# input pointer
|
||||||
mov `16*$i+8`($inp),%ecx # number of blocks
|
mov `$inp_elm_size*$i+0`($inp),$ptr_reg
|
||||||
|
# number of blocks
|
||||||
|
mov `$inp_elm_size*$i+$ptr_size`($inp),%ecx
|
||||||
cmp $num,%ecx
|
cmp $num,%ecx
|
||||||
cmovg %ecx,$num # find maximum
|
cmovg %ecx,$num # find maximum
|
||||||
test %ecx,%ecx
|
test %ecx,%ecx
|
||||||
@ -751,7 +763,7 @@ $code.=<<___;
|
|||||||
movq $E0,0x80-0x40($ctx) # e1.e0
|
movq $E0,0x80-0x40($ctx) # e1.e0
|
||||||
|
|
||||||
lea `$REG_SZ/2`($ctx),$ctx
|
lea `$REG_SZ/2`($ctx),$ctx
|
||||||
lea `16*2`($inp),$inp
|
lea `$inp_elm_size*2`($inp),$inp
|
||||||
dec $num
|
dec $num
|
||||||
jnz .Loop_grande_shaext
|
jnz .Loop_grande_shaext
|
||||||
|
|
||||||
@ -1071,9 +1083,12 @@ $code.=<<___;
|
|||||||
xor $num,$num
|
xor $num,$num
|
||||||
___
|
___
|
||||||
for($i=0;$i<4;$i++) {
|
for($i=0;$i<4;$i++) {
|
||||||
|
$ptr_reg=&pointer_register($flavour,@ptr[$i]);
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov `16*$i+0`($inp),@ptr[$i] # input pointer
|
# input pointer
|
||||||
mov `16*$i+8`($inp),%ecx # number of blocks
|
mov `$inp_elm_size*$i+0`($inp),$ptr_reg
|
||||||
|
# number of blocks
|
||||||
|
mov `$inp_elm_size*$i+$ptr_size`($inp),%ecx
|
||||||
cmp $num,%ecx
|
cmp $num,%ecx
|
||||||
cmovg %ecx,$num # find maximum
|
cmovg %ecx,$num # find maximum
|
||||||
test %ecx,%ecx
|
test %ecx,%ecx
|
||||||
@ -1144,7 +1159,7 @@ $code.=<<___;
|
|||||||
|
|
||||||
mov `$REG_SZ*17+8`(%rsp),$num
|
mov `$REG_SZ*17+8`(%rsp),$num
|
||||||
lea $REG_SZ($ctx),$ctx
|
lea $REG_SZ($ctx),$ctx
|
||||||
lea `16*$REG_SZ/4`($inp),$inp
|
lea `$inp_elm_size*$REG_SZ/4`($inp),$inp
|
||||||
dec $num
|
dec $num
|
||||||
jnz .Loop_grande_avx
|
jnz .Loop_grande_avx
|
||||||
|
|
||||||
@ -1240,9 +1255,12 @@ $code.=<<___;
|
|||||||
lea `$REG_SZ*16`(%rsp),%rbx
|
lea `$REG_SZ*16`(%rsp),%rbx
|
||||||
___
|
___
|
||||||
for($i=0;$i<8;$i++) {
|
for($i=0;$i<8;$i++) {
|
||||||
|
$ptr_reg=&pointer_register($flavour,@ptr[$i]);
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov `16*$i+0`($inp),@ptr[$i] # input pointer
|
# input pointer
|
||||||
mov `16*$i+8`($inp),%ecx # number of blocks
|
mov `$inp_elm_size*$i+0`($inp),$ptr_reg
|
||||||
|
# number of blocks
|
||||||
|
mov `$inp_elm_size*$i+$ptr_size`($inp),%ecx
|
||||||
cmp $num,%ecx
|
cmp $num,%ecx
|
||||||
cmovg %ecx,$num # find maximum
|
cmovg %ecx,$num # find maximum
|
||||||
test %ecx,%ecx
|
test %ecx,%ecx
|
||||||
@ -1313,7 +1331,7 @@ $code.=<<___;
|
|||||||
|
|
||||||
#mov `$REG_SZ*17+8`(%rsp),$num
|
#mov `$REG_SZ*17+8`(%rsp),$num
|
||||||
#lea $REG_SZ($ctx),$ctx
|
#lea $REG_SZ($ctx),$ctx
|
||||||
#lea `16*$REG_SZ/4`($inp),$inp
|
#lea `$inp_elm_size*$REG_SZ/4`($inp),$inp
|
||||||
#dec $num
|
#dec $num
|
||||||
#jnz .Loop_grande_avx2
|
#jnz .Loop_grande_avx2
|
||||||
|
|
||||||
|
@ -51,6 +51,11 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|||||||
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
||||||
die "can't locate x86_64-xlate.pl";
|
die "can't locate x86_64-xlate.pl";
|
||||||
|
|
||||||
|
push(@INC,"${dir}","${dir}../../perlasm");
|
||||||
|
require "x86_64-support.pl";
|
||||||
|
|
||||||
|
$ptr_size=&pointer_size($flavour);
|
||||||
|
|
||||||
$avx=0;
|
$avx=0;
|
||||||
|
|
||||||
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
|
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
|
||||||
@ -93,6 +98,7 @@ $inp="%rsi"; # 2nd arg
|
|||||||
$num="%edx"; # 3rd arg
|
$num="%edx"; # 3rd arg
|
||||||
@ptr=map("%r$_",(8..11));
|
@ptr=map("%r$_",(8..11));
|
||||||
$Tbl="%rbp";
|
$Tbl="%rbp";
|
||||||
|
$inp_elm_size=2*$ptr_size;
|
||||||
|
|
||||||
@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("%xmm$_",(8..15));
|
@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("%xmm$_",(8..15));
|
||||||
($t1,$t2,$t3,$axb,$bxc,$Xi,$Xn,$sigma)=map("%xmm$_",(0..7));
|
($t1,$t2,$t3,$axb,$bxc,$Xi,$Xn,$sigma)=map("%xmm$_",(0..7));
|
||||||
@ -291,9 +297,12 @@ $code.=<<___;
|
|||||||
xor $num,$num
|
xor $num,$num
|
||||||
___
|
___
|
||||||
for($i=0;$i<4;$i++) {
|
for($i=0;$i<4;$i++) {
|
||||||
|
$ptr_reg=&pointer_register($flavour,@ptr[$i]);
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov `16*$i+0`($inp),@ptr[$i] # input pointer
|
# input pointer
|
||||||
mov `16*$i+8`($inp),%ecx # number of blocks
|
mov `$inp_elm_size*$i+0`($inp),$ptr_reg
|
||||||
|
# number of blocks
|
||||||
|
mov `$inp_elm_size*$i+$ptr_size`($inp),%ecx
|
||||||
cmp $num,%ecx
|
cmp $num,%ecx
|
||||||
cmovg %ecx,$num # find maximum
|
cmovg %ecx,$num # find maximum
|
||||||
test %ecx,%ecx
|
test %ecx,%ecx
|
||||||
@ -392,7 +401,7 @@ $code.=<<___;
|
|||||||
|
|
||||||
mov `$REG_SZ*17+8`(%rsp),$num
|
mov `$REG_SZ*17+8`(%rsp),$num
|
||||||
lea $REG_SZ($ctx),$ctx
|
lea $REG_SZ($ctx),$ctx
|
||||||
lea `16*$REG_SZ/4`($inp),$inp
|
lea `$inp_elm_size*$REG_SZ/4`($inp),$inp
|
||||||
dec $num
|
dec $num
|
||||||
jnz .Loop_grande
|
jnz .Loop_grande
|
||||||
|
|
||||||
@ -470,9 +479,12 @@ $code.=<<___;
|
|||||||
xor $num,$num
|
xor $num,$num
|
||||||
___
|
___
|
||||||
for($i=0;$i<2;$i++) {
|
for($i=0;$i<2;$i++) {
|
||||||
|
$ptr_reg=&pointer_register($flavour,@ptr[$i]);
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov `16*$i+0`($inp),@ptr[$i] # input pointer
|
# input pointer
|
||||||
mov `16*$i+8`($inp),%ecx # number of blocks
|
mov `$inp_elm_size*$i+0`($inp),$ptr_reg
|
||||||
|
# number of blocks
|
||||||
|
mov `$inp_elm_size*$i+$ptr_size`($inp),%ecx
|
||||||
cmp $num,%ecx
|
cmp $num,%ecx
|
||||||
cmovg %ecx,$num # find maximum
|
cmovg %ecx,$num # find maximum
|
||||||
test %ecx,%ecx
|
test %ecx,%ecx
|
||||||
@ -753,7 +765,7 @@ $code.=<<___;
|
|||||||
movq @MSG0[1],0xe0-0x80($ctx) # H1.H0
|
movq @MSG0[1],0xe0-0x80($ctx) # H1.H0
|
||||||
|
|
||||||
lea `$REG_SZ/2`($ctx),$ctx
|
lea `$REG_SZ/2`($ctx),$ctx
|
||||||
lea `16*2`($inp),$inp
|
lea `$inp_elm_size*2`($inp),$inp
|
||||||
dec $num
|
dec $num
|
||||||
jnz .Loop_grande_shaext
|
jnz .Loop_grande_shaext
|
||||||
|
|
||||||
@ -990,9 +1002,12 @@ $code.=<<___;
|
|||||||
xor $num,$num
|
xor $num,$num
|
||||||
___
|
___
|
||||||
for($i=0;$i<4;$i++) {
|
for($i=0;$i<4;$i++) {
|
||||||
|
$ptr_reg=&pointer_register($flavour,@ptr[$i]);
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov `16*$i+0`($inp),@ptr[$i] # input pointer
|
# input pointer
|
||||||
mov `16*$i+8`($inp),%ecx # number of blocks
|
mov `$inp_elm_size*$i+0`($inp),$ptr_reg
|
||||||
|
# number of blocks
|
||||||
|
mov `$inp_elm_size*$i+$ptr_size`($inp),%ecx
|
||||||
cmp $num,%ecx
|
cmp $num,%ecx
|
||||||
cmovg %ecx,$num # find maximum
|
cmovg %ecx,$num # find maximum
|
||||||
test %ecx,%ecx
|
test %ecx,%ecx
|
||||||
@ -1089,7 +1104,7 @@ $code.=<<___;
|
|||||||
|
|
||||||
mov `$REG_SZ*17+8`(%rsp),$num
|
mov `$REG_SZ*17+8`(%rsp),$num
|
||||||
lea $REG_SZ($ctx),$ctx
|
lea $REG_SZ($ctx),$ctx
|
||||||
lea `16*$REG_SZ/4`($inp),$inp
|
lea `$inp_elm_size*$REG_SZ/4`($inp),$inp
|
||||||
dec $num
|
dec $num
|
||||||
jnz .Loop_grande_avx
|
jnz .Loop_grande_avx
|
||||||
|
|
||||||
@ -1180,9 +1195,12 @@ $code.=<<___;
|
|||||||
lea `$REG_SZ*16`(%rsp),%rbx
|
lea `$REG_SZ*16`(%rsp),%rbx
|
||||||
___
|
___
|
||||||
for($i=0;$i<8;$i++) {
|
for($i=0;$i<8;$i++) {
|
||||||
|
$ptr_reg=&pointer_register($flavour,@ptr[$i]);
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
mov `16*$i+0`($inp),@ptr[$i] # input pointer
|
# input pointer
|
||||||
mov `16*$i+8`($inp),%ecx # number of blocks
|
mov `$inp_elm_size*$i+0`($inp),$ptr_reg
|
||||||
|
# number of blocks
|
||||||
|
mov `$inp_elm_size*$i+$ptr_size`($inp),%ecx
|
||||||
cmp $num,%ecx
|
cmp $num,%ecx
|
||||||
cmovg %ecx,$num # find maximum
|
cmovg %ecx,$num # find maximum
|
||||||
test %ecx,%ecx
|
test %ecx,%ecx
|
||||||
@ -1279,7 +1297,7 @@ $code.=<<___;
|
|||||||
|
|
||||||
#mov `$REG_SZ*17+8`(%rsp),$num
|
#mov `$REG_SZ*17+8`(%rsp),$num
|
||||||
#lea $REG_SZ($ctx),$ctx
|
#lea $REG_SZ($ctx),$ctx
|
||||||
#lea `16*$REG_SZ/4`($inp),$inp
|
#lea `$inp_elm_size*$REG_SZ/4`($inp),$inp
|
||||||
#dec $num
|
#dec $num
|
||||||
#jnz .Loop_grande_avx2
|
#jnz .Loop_grande_avx2
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user