Unify all assembler file generators

They now generally conform to the following argument sequence:

    script.pl "$(PERLASM_SCHEME)" [ C preprocessor arguments ... ] \
              $(PROCESSOR) <output file>

However, in the spirit of being able to use these scripts manually,
they also allow for no argument, or for only the flavour, or for only
the output file.  This is done by only using the last argument as
output file if it's a file (it has an extension), and only using the
first argument as flavour if it isn't a file (it doesn't have an
extension).

While we're at it, we make all $xlate calls the same, i.e. the $output
argument is always quoted, and we always die on error when trying to
start $xlate.

There's a perl lesson in this, regarding operator priority...

This will always succeed, even when it fails:

    open FOO, "something" || die "ERR: $!";

The reason is that '||' has higher priority than list operators (a
function is essentially a list operator and gobbles up everything
following it that isn't lower priority), and since a non-empty string
is always true, so that ends up being exactly the same as:

    open FOO, "something";

This, however, will fail if "something" can't be opened:

    open FOO, "something" or die "ERR: $!";

The reason is that 'or' has lower priority that list operators,
i.e. it's performed after the 'open' call.

Reviewed-by: Matt Caswell <matt@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/9884)
master
Richard Levitte 3 years ago
parent a1c8befd66
commit 1aa89a7a3a

@ -198,9 +198,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
$output = pop;
open OUT,">$output";
*STDOUT=*OUT;
$output = pop and open STDOUT,">$output";
&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386");
&static_label("AES_Te");

@ -39,9 +39,10 @@
# Profiler-assisted and platform-specific optimization resulted in 16%
# improvement on Cortex A8 core and ~21.5 cycles per byte.
$flavour = shift;
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour && $flavour ne "void") {
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
@ -49,9 +50,10 @@ if ($flavour && $flavour ne "void") {
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open STDOUT,"| \"$^X\" $xlate $flavour $output";
open STDOUT,"| \"$^X\" $xlate $flavour \"$output\""
or die "can't call $xlate: $!";
} else {
open STDOUT,">$output";
$output and open STDOUT,">$output";
}
$s0="r0";

@ -37,8 +37,7 @@
# cost of 8x increased pressure on L1D. 8x because you'd have
# to interleave both Te and Td tables...
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
($TEA,$TEB)=("A5","B5");
($KPA,$KPB)=("A3","B1");

@ -65,8 +65,12 @@
# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
#
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$flavour ||= "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
$PTR_LA="dla";
@ -95,17 +99,13 @@ $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
$big_endian=(`echo MIPSEB | $ENV{CC} -E -`=~/MIPSEB/)?0:1 if ($ENV{CC});
for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); }
open STDOUT,">$output";
if (!defined($big_endian))
{ $big_endian=(unpack('L',pack('N',1))==1); }
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
my ($MSB,$LSB)=(0,3); # automatically converted to little-endian
$output and open STDOUT,">$output";
$code.=<<___;
#include "mips_arch.h"

@ -28,9 +28,12 @@
#
# Special thanks to polarhome.com for providing HP-UX account.
$flavour = shift;
$output = shift;
open STDOUT,">$output";
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$output and open STDOUT,">$output";
if ($flavour =~ /64/) {
$LEVEL ="2.0W";

@ -36,7 +36,10 @@
# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
$flavour = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour =~ /64/) {
$SIZE_T =8;
@ -59,7 +62,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
open STDOUT,"| $^X $xlate $flavour \"$output\""
or die "can't call $xlate: $!";
$FRAME=32*$SIZE_T;

@ -89,7 +89,10 @@
# instructions, which deliver ~70% improvement at 8KB block size over
# vanilla km-based code, 37% - at most like 512-bytes block size.
$flavour = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour =~ /3[12]/) {
$SIZE_T=4;
@ -99,8 +102,7 @@ if ($flavour =~ /3[12]/) {
$g="g";
}
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$output and open STDOUT,">$output";
$softonly=0; # allow hardware support

@ -37,8 +37,7 @@
# optimal decrypt procedure]. Compared to GNU C generated code both
# procedures are more than 60% faster:-)
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
$frame="STACK_FRAME";
$bias="STACK_BIAS";

@ -33,9 +33,10 @@
#
# (*) with hyper-threading off
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
@ -44,7 +45,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
$verticalspin=1; # unlike 32-bit version $verticalspin performs

@ -33,8 +33,7 @@
# instructions and improve single-block and short-input performance
# with misaligned data.
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
{
my ($inp,$out,$key,$rounds,$tmp,$mask) = map("%o$_",(0..5));

@ -42,9 +42,10 @@
# (*) Sandy/Ivy Bridge are known to handle high interleave factors
# suboptimally;
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
@ -74,7 +75,8 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([
$avx = ($2>=3.0) + ($2>3.0);
}
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
# void aesni_multi_cbc_encrypt (

@ -88,9 +88,10 @@
# (**) Execution is fully dominated by integer code sequence and
# SIMD still hardly shows [in single-process benchmark;-]
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
@ -114,7 +115,8 @@ $shaext=1; ### set to zero if compiling for 1.0.1
$stitched_decrypt=0;
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
# void aesni_cbc_sha1_enc(const void *inp,

@ -44,9 +44,10 @@
# -evp aes-256-cbc-hmac-sha256' will vary by percent or two;
# (***) these are SHAEXT results;
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
@ -77,7 +78,8 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([
$shaext=$avx; ### set to zero if compiling for 1.0.1
$avx=1 if (!$shaext && $avx);
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
$func="aesni_cbc_sha256_enc";

@ -76,9 +76,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
$output = pop;
open OUT,">$output";
*STDOUT=*OUT;
$output = pop and open STDOUT,">$output";
&asm_init($ARGV[0]);

@ -192,9 +192,10 @@ $PREFIX="aesni"; # if $PREFIX is set to "AES", the script
# generates drop-in replacement for
# crypto/aes/asm/aes-x86_64.pl:-)
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
@ -203,7 +204,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
$movkey = $PREFIX eq "aesni" ? "movups" : "movups";

@ -43,7 +43,10 @@
# POWER9[le] 4.02/0.86 0.84 1.05
# POWER9[be] 3.99/0.78 0.79 0.97
$flavour = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour =~ /64/) {
$SIZE_T =8;
@ -70,7 +73,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
open STDOUT,"| $^X $xlate $flavour \"$output\""
or die "can't call $xlate: $!";
$FRAME=8*$SIZE_T;
$prefix="aes_p8";

@ -75,8 +75,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "sparcv9_modes.pl";
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
$::evp=1; # if $evp is set to 0, script generates module with
# AES_[en|de]crypt, AES_set_[en|de]crypt_key and AES_cbc_encrypt entry

@ -56,15 +56,18 @@
# (**) numbers after slash are for 32-bit code, which is 3x-
# interleaved;
$flavour = shift;
$output = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open OUT,"| \"$^X\" $xlate $flavour $output";
open OUT,"| \"$^X\" $xlate $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
$prefix="aes_v8";

@ -50,9 +50,10 @@
# April-August 2013
# Add CBC, CTR and XTS subroutines and adapt for kernel use; courtesy of Ard.
$flavour = shift;
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour && $flavour ne "void") {
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
@ -60,9 +61,10 @@ if ($flavour && $flavour ne "void") {
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open STDOUT,"| \"$^X\" $xlate $flavour $output";
open STDOUT,"| \"$^X\" $xlate $flavour \"$output\""
or die "can't call $xlate: $!";
} else {
open STDOUT,">$output";
$output and open STDOUT,">$output";
}
my ($inp,$out,$len,$key)=("r0","r1","r2","r3");

@ -97,9 +97,10 @@
#
# <appro@openssl.org>
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
@ -108,7 +109,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx");

@ -38,15 +38,18 @@
# code, but it's constant-time and therefore preferred;
# (***) presented for reference/comparison purposes;
$flavour = shift;
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open OUT,"| \"$^X\" $xlate $flavour $output";
open OUT,"| \"$^X\" $xlate $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
$code.=<<___;

@ -35,7 +35,10 @@
# (**) Inadequate POWER6 performance is due to astronomic AltiVec
# latency, 9 cycles per simple logical operation.
$flavour = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour =~ /64/) {
$SIZE_T =8;
@ -61,7 +64,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
open STDOUT,"| $^X $xlate $flavour \"$output\""
|| die "can't call $xlate: $!";
$code.=<<___;
.machine "any"

@ -58,9 +58,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
$output = pop;
open OUT,">$output";
*STDOUT=*OUT;
$output = pop and open STDOUT,">$output";
&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386");

@ -54,9 +54,10 @@
#
# <appro@openssl.org>
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
@ -65,7 +66,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
$PREFIX="vpaes";

@ -7,8 +7,7 @@
# https://www.openssl.org/source/license.html
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
print <<'___';
.text

@ -7,15 +7,18 @@
# https://www.openssl.org/source/license.html
$flavour = shift;
$output = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open OUT,"| \"$^X\" $xlate $flavour $output";
open OUT,"| \"$^X\" $xlate $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
$code.=<<___;

@ -7,15 +7,18 @@
# https://www.openssl.org/source/license.html
$flavour = shift;
$output = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open OUT,"| \"$^X\" $xlate $flavour $output";
open OUT,"| \"$^X\" $xlate $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
$code.=<<___;

@ -12,8 +12,7 @@ push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
require "cbc.pl";
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
&asm_init($ARGV[0],$ARGV[$#ARGV] eq "386");

@ -22,8 +22,7 @@
# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x
# difference.
$output=pop;
open STDOUT,">$output";
$output=pop and open STDOUT,">$output";
# int bn_mul_mont(
$rp="a0"; # BN_ULONG *rp,

@ -39,9 +39,10 @@
#
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
$flavour = shift;
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour && $flavour ne "void") {
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
@ -49,9 +50,10 @@ if ($flavour && $flavour ne "void") {
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open STDOUT,"| \"$^X\" $xlate $flavour $output";
open STDOUT,"| \"$^X\" $xlate $flavour \"$output\""
or die "can't call $xlate: $1";
} else {
open STDOUT,">$output";
$output and open STDOUT,">$output";
}
$code=<<___;

@ -54,9 +54,10 @@
# integer-only on Cortex-A8, ~10-210% on Cortex-A15, ~70-450% on
# Snapdragon S4.
$flavour = shift;
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour && $flavour ne "void") {
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
@ -64,9 +65,10 @@ if ($flavour && $flavour ne "void") {
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open STDOUT,"| \"$^X\" $xlate $flavour $output";
open STDOUT,"| \"$^X\" $xlate $flavour \"$output\""
or die "can't call $xlate: $1";
} else {
open STDOUT,">$output";
$output and open STDOUT,">$output";
}
$num="r0"; # starts as num argument, but holds &tp[num-1]

@ -40,15 +40,18 @@
# 50-70% improvement for RSA4096 sign. RSA2048 sign is ~25% faster
# on Cortex-A57 and ~60-100% faster on others.
$flavour = shift;
$output = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open OUT,"| \"$^X\" $xlate $flavour $output";
open OUT,"| \"$^X\" $xlate $flavour \"$output\""
or die "can't call $xlate: $1";
*STDOUT=*OUT;
($lo0,$hi0,$aj,$m0,$alo,$ahi,

@ -11,8 +11,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
&asm_init($ARGV[0]);

@ -23,8 +23,7 @@
# totally unfair, because this module utilizes Galois Field Multiply
# instruction.
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
($rp,$a1,$a0,$b1,$b0)=("A4","B4","A6","B6","A8"); # argument vector

@ -10,8 +10,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
&asm_init($ARGV[0]);

@ -67,7 +67,8 @@
# hereafter less for longer keys, while verify - by 74-13%.
# DSA performance improves by 115-30%.
$output=pop;
# $output is the last argument if it looks like a file (it has an extension)
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
if ($^O eq "hpux") {
$ADDP="addp4";

@ -52,8 +52,12 @@
# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
#
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
# supported flavours are o32,n32,64,nubi32,nubi64, default is o32
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : "o32";
if ($flavour =~ /64|n32/i) {
$PTR_ADD="daddu"; # incidentally works even on n32
@ -74,8 +78,7 @@ $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000;
#
######################################################################
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$output and open STDOUT,">$output";
if ($flavour =~ /64|n32/i) {
$LD="ld";

@ -54,9 +54,10 @@
# has to content with 40-85% improvement depending on benchmark and
# key length, more for longer keys.
$flavour = shift || "o32";
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : "o32";
if ($flavour =~ /64|n32/i) {
$LD="ld";
@ -91,6 +92,8 @@ if ($flavour =~ /64|n32/i) {
$code="#if !(defined (__mips_isa_rev) && (__mips_isa_rev >= 6))\n.set mips2\n#endif\n";
}
$output and open STDOUT,">$output";
# Below is N32/64 register layout used in the original module.
#
($zero,$at,$v0,$v1)=map("\$$_",(0..3));

@ -69,10 +69,12 @@
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
$flavour = shift;
$output = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
open STDOUT,">$output";
$output and open STDOUT,">$output";
if ($flavour =~ /64/) {
$LEVEL ="2.0W";

@ -41,7 +41,10 @@
# builds. On low-end 32-bit processors performance improvement turned
# to be marginal...
$flavour = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour =~ /32/) {
$BITS= 32;
@ -94,7 +97,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
open STDOUT,"| $^X $xlate $flavour \"$output\""
or die "can't call $xlate: $!";
$sp="r1";
$toc="r2";

@ -103,7 +103,10 @@
# Performance increase of ~60%
# Based on submission from Suresh N. Chari of IBM
$flavour = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour =~ /32/) {
$BITS= 32;
@ -159,7 +162,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
open STDOUT,"| $^X $xlate $flavour \"$output\""
or die "can't call $xlate: $!";
$data=<<EOF;
#--------------------------------------------------------------------

@ -80,7 +80,10 @@
# ppc-mont.pl, but improvement coefficient is not as impressive
# for longer keys...
$flavour = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour =~ /32/) {
$SIZE_T=4;
@ -108,7 +111,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
open STDOUT,"| $^X $xlate $flavour \"$output\""
or die "can't call $xlate: $!";
$FRAME=64; # padded frame header
$TRANSFER=16*8;

@ -37,9 +37,10 @@
# (***) scalar AD*X code is faster than AVX2 and is preferred code
# path for Broadwell;
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
@ -72,7 +73,8 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-
$addx = ($ver>=3.03);
}
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT = *OUT;
if ($avx>1) {{{

@ -52,9 +52,10 @@
# purposes;
# (**) MULX was attempted, but found to give only marginal improvement;
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
@ -63,7 +64,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`

@ -32,7 +32,10 @@
# so that improvement coefficients can vary from one specific
# setup to another.
$flavour = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour =~ /3[12]/) {
$SIZE_T=4;
@ -42,8 +45,7 @@ if ($flavour =~ /3[12]/) {
$g="g";
}
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$output and open STDOUT,">$output";
$stdframe=16*$SIZE_T+4*8;

@ -51,7 +51,10 @@
# On z990 it was measured to perform 2.6-2.2 times better than
# compiler-generated code, less for longer keys...
$flavour = shift;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour =~ /3[12]/) {
$SIZE_T=4;
@ -61,8 +64,7 @@ if ($flavour =~ /3[12]/) {
$g="g";
}
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$output and open STDOUT,">$output";
$stdframe=16*$SIZE_T+4*8;

@ -83,8 +83,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "sparcv9_modes.pl";
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
$code.=<<___;
#include "sparc_arch.h"

@ -25,8 +25,7 @@
# ~100-230% faster than gcc-generated code and ~35-90% faster than
# the pure SPARCv9 code path.
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
$locals=16*8;

@ -49,8 +49,7 @@
# module still have hidden potential [see TODO list there], which is
# estimated to be larger than 20%...
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
# int bn_mul_mont(
$rp="%i0"; # BN_ULONG *rp,

@ -62,8 +62,10 @@
# key length, more for longer keys] on USI&II cores and 30-80% - on
# USIII&IV.
$output = pop;
open STDOUT,">$output";
# $output is the last argument if it looks like a file (it has an extension)
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$output and open STDOUT,">$output";
$fname="bn_mul_mont_fpu";

@ -88,8 +88,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
&asm_init($ARGV[0]);

@ -25,8 +25,7 @@
# for reference purposes, because T4 has dedicated Montgomery
# multiplication and squaring *instructions* that deliver even more.
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
$frame = "STACK_FRAME";
$bias = "STACK_BIAS";

@ -43,8 +43,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
&asm_init($ARGV[0],$x86only = $ARGV[$#ARGV] eq "386");

@ -37,8 +37,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
$output = pop;
open STDOUT,">$output";
$output = pop and open STDOUT,">$output";
&asm_init($ARGV[0]);

@ -27,9 +27,10 @@
# these coefficients are not ones for bn_GF2m_mul_2x2 itself, as not
# all CPU time is burnt in it...
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
@ -38,7 +39,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;
($lo,$hi)=("%rax","%rdx"); $a=$lo;

@ -46,9 +46,10 @@
#
# Add MULX/ADOX/ADCX code path.
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
@ -57,7 +58,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
or die "can't call $xlate: $!";
*STDOUT=*OUT;