add speedtest script

parent 3b9aefd5
......@@ -11,7 +11,7 @@ LDFLAGS ?=
C_SRC := $(wildcard *.c)
OBJECTS := $(C_SRC:.c=.o)
#OBJECTS := rinterpolate.o rinterpolate-debug.o
CFLAGS := -fPIC -O3 -march=native -mtune=native -Wall -Wstrict-prototypes -ffast-math -D__RINTERPOLATE__ $(COPTFLAGS)
CFLAGS := -fPIC -O2 -march=native -mtune=native -Wall -Wstrict-prototypes -ffast-math -D__RINTERPOLATE__ $(COPTFLAGS)
ifeq ($(PREFIX),)
PREFIX := /usr/local
endif
......
......@@ -39,7 +39,8 @@ fi
export COPTFLAGS=$GENERATE
$CLEAN
$MAKE
$TEST
echo "Running quietly... please wait"
$TEST > /dev/null
$MID
......
......@@ -33,7 +33,6 @@ rinterpolate_Boolean_t rinterpolate_check_cache(
iloop++)
{
iline = iloop % table->cache_length;
/*
* Which is quicker, using MEMCMP or using a direct comparison?
* try them!
......@@ -57,13 +56,8 @@ rinterpolate_Boolean_t rinterpolate_check_cache(
Iprint("memcmp = %d\n",
memcmp(RINTERPOLATE_CACHE_PARAM(iline),x,table->n_float_sizeof));
}
Iprint("end1\n");
#endif//RINTERPOLATE_DEBUG
Iprint("end2\n");
match = (memcmp(RINTERPOLATE_CACHE_PARAM(iline),x,table->n_float_sizeof)==0) ? TRUE : FALSE;
Iprint("Match0 = %d\n",match);
#else // RINTERPOLATE_CACHE_USE_MEMCMP
rinterpolate_counter_t m;
rinterpolate_float_t * cacheline = RINTERPOLATE_CACHE_PARAM(iline);
......@@ -77,8 +71,6 @@ rinterpolate_Boolean_t rinterpolate_check_cache(
break;
}
}
Iprint("end of for loop\n");
#endif // RINTERPOLATE_CACHE_USE_MEMCMP
Iprint("Post Match = %d\n",match);
......@@ -100,7 +92,7 @@ rinterpolate_Boolean_t rinterpolate_check_cache(
* rather than the beginning of the cache
*/
table->cache_match_line = iline;
/* skip everything else */
goto cache_match;
}
......
Intel(R) Core(TM) i7-5960X CPU @ 3.00GHz
-------------
gcc-8 O3
Fixed input tests
Without cache : 57.091, maxdiff 0.0000 %
With cache : 13.743, maxdiff = 0.0000 %
Cache speed up : 315.422 %
Random input tests
Without cache : 109.624, maxdiff 0.0001 %
With cache : 121.301, maxdiff = 0.0001 %
Cache speed up : -10.6515 %
Spun input (n=5) tests
With cache_length 0 : 6.274, maxdiff = 0.0000 %
Cache speed up : 0.000 %
With cache_length 1 : 7.157, maxdiff = 0.0000 %
Cache speed up : -14.089 %
With cache_length 2 : 9.137, maxdiff = 0.0000 %
Cache speed up : -45.643 %
With cache_length 3 : 8.452, maxdiff = 0.0000 %
Cache speed up : -34.718 %
With cache_length 4 : 7.792, maxdiff = 0.0000 %
Cache speed up : -24.205 %
With cache_length 5 : 2.263, maxdiff = 0.0000 %
Cache speed up : 177.264 %
With cache_length 6 : 2.261, maxdiff = 0.0000 %
Cache speed up : 177.447 %
With cache_length 7 : 2.268, maxdiff = 0.0000 %
Cache speed up : 176.603 %
With cache_length 8 : 2.312, maxdiff = 0.0000 %
Cache speed up : 171.380 %
With cache_length 9 : 2.397, maxdiff = 0.0000 %
Cache speed up : 161.740 %
------------------------------------------------------------
gcc-8 O2
Fixed input tests
Without cache : 53.337, maxdiff 0.0000 %
With cache : 18.462, maxdiff = 0.0000 %
Cache speed up : 188.895 %
Random input tests
Without cache : 100.039, maxdiff 0.0001 %
With cache : 114.300, maxdiff = 0.0001 %
Cache speed up : -14.2549 %
Spun input (n=5) tests
With cache_length 0 : 5.578, maxdiff = 0.0001 %
Cache speed up : 0.000 %
With cache_length 1 : 6.560, maxdiff = 0.0001 %
Cache speed up : -17.617 %
With cache_length 2 : 7.073, maxdiff = 0.0001 %
Cache speed up : -26.809 %
With cache_length 3 : 6.929, maxdiff = 0.0001 %
Cache speed up : -24.221 %
With cache_length 4 : 6.882, maxdiff = 0.0001 %
Cache speed up : -23.386 %
With cache_length 5 : 2.110, maxdiff = 0.0001 %
Cache speed up : 164.369 %
With cache_length 6 : 2.137, maxdiff = 0.0001 %
Cache speed up : 160.997 %
With cache_length 7 : 2.162, maxdiff = 0.0001 %
Cache speed up : 157.960 %
With cache_length 8 : 2.166, maxdiff = 0.0001 %
Cache speed up : 157.461 %
With cache_length 9 : 2.213, maxdiff = 0.0001 %
Cache speed up : 151.992 %
------------------------------------------------------------
gcc-8 O3 pgo
Fixed input tests
Without cache : 36.120, maxdiff 0.0000 %
With cache : 19.510, maxdiff = 0.0000 %
Cache speed up : 85.139 %
Random input tests
Without cache : 109.246, maxdiff 0.0001 %
With cache : 119.688, maxdiff = 0.0001 %
Cache speed up : -9.5583 %
Spun input (n=5) tests
With cache_length 0 : 3.553, maxdiff = 0.0000 %
Cache speed up : 0.000 %
With cache_length 1 : 4.040, maxdiff = 0.0000 %
Cache speed up : -13.690 %
With cache_length 2 : 4.258, maxdiff = 0.0000 %
Cache speed up : -19.836 %
With cache_length 3 : 4.549, maxdiff = 0.0000 %
Cache speed up : -28.017 %
With cache_length 4 : 4.560, maxdiff = 0.0000 %
Cache speed up : -28.330 %
With cache_length 5 : 2.009, maxdiff = 0.0000 %
Cache speed up : 76.876 %
With cache_length 6 : 2.021, maxdiff = 0.0000 %
Cache speed up : 75.786 %
With cache_length 7 : 2.055, maxdiff = 0.0000 %
Cache speed up : 72.871 %
With cache_length 8 : 2.068, maxdiff = 0.0000 %
Cache speed up : 71.786 %
With cache_length 9 : 2.195, maxdiff = 0.0000 %
Cache speed up : 61.912 %
------------------------------------------------------------
gcc-8 O2 pgo
Fixed input tests
Without cache : 33.898, maxdiff 0.0000 %
With cache : 17.995, maxdiff = 0.0000 %
Cache speed up : 88.375 %
Random input tests
Without cache : 109.224, maxdiff 0.0001 %
With cache : 120.231, maxdiff = 0.0001 %
Cache speed up : -10.0769 %
Spun input (n=5) tests
With cache_length 0 : 3.420, maxdiff = 0.0000 %
Cache speed up : 0.000 %
With cache_length 1 : 4.021, maxdiff = 0.0000 %
Cache speed up : -17.584 %
With cache_length 2 : 4.480, maxdiff = 0.0000 %
Cache speed up : -31.014 %
With cache_length 3 : 4.568, maxdiff = 0.0000 %
Cache speed up : -33.572 %
With cache_length 4 : 4.658, maxdiff = 0.0000 %
Cache speed up : -36.221 %
With cache_length 5 : 1.983, maxdiff = 0.0000 %
Cache speed up : 72.447 %
With cache_length 6 : 1.953, maxdiff = 0.0000 %
Cache speed up : 75.095 %
With cache_length 7 : 1.988, maxdiff = 0.0000 %
Cache speed up : 71.997 %
With cache_length 8 : 2.005, maxdiff = 0.0000 %
Cache speed up : 70.530 %
With cache_length 9 : 2.068, maxdiff = 0.0000 %
Cache speed up : 65.337 %
------------------------------------------------------------
gcc-7 O3
Fixed input tests
Without cache : 57.091, maxdiff 0.0000 %
With cache : 13.743, maxdiff = 0.0000 %
Cache speed up : 315.422 %
Random input tests
Without cache : 109.624, maxdiff 0.0001 %
With cache : 121.301, maxdiff = 0.0001 %
Cache speed up : -10.6515 %
Spun input (n=5) tests
With cache_length 0 : 6.274, maxdiff = 0.0000 %
Cache speed up : 0.000 %
With cache_length 1 : 7.157, maxdiff = 0.0000 %
Cache speed up : -14.089 %
With cache_length 2 : 9.137, maxdiff = 0.0000 %
Cache speed up : -45.643 %
With cache_length 3 : 8.452, maxdiff = 0.0000 %
Cache speed up : -34.718 %
With cache_length 4 : 7.792, maxdiff = 0.0000 %
Cache speed up : -24.205 %
With cache_length 5 : 2.263, maxdiff = 0.0000 %
Cache speed up : 177.264 %
With cache_length 6 : 2.261, maxdiff = 0.0000 %
Cache speed up : 177.447 %
With cache_length 7 : 2.268, maxdiff = 0.0000 %
Cache speed up : 176.603 %
With cache_length 8 : 2.312, maxdiff = 0.0000 %
Cache speed up : 171.380 %
With cache_length 9 : 2.397, maxdiff = 0.0000 %
Cache speed up : 161.740 %
------------------------------------------------------------
gcc-7 O2
Fixed input tests
Without cache : 55.646, maxdiff 0.0000 %
With cache : 18.802, maxdiff = 0.0000 %
Cache speed up : 195.965 %
Random input tests
Without cache : 105.036, maxdiff 0.0001 %
With cache : 114.886, maxdiff = 0.0001 %
Cache speed up : -9.3774 %
Spun input (n=5) tests
With cache_length 0 : 5.989, maxdiff = 0.0000 %
Cache speed up : 0.000 %
With cache_length 1 : 6.856, maxdiff = 0.0000 %
Cache speed up : -14.488 %
With cache_length 2 : 7.013, maxdiff = 0.0000 %
Cache speed up : -17.113 %
With cache_length 3 : 7.124, maxdiff = 0.0000 %
Cache speed up : -18.957 %
With cache_length 4 : 7.221, maxdiff = 0.0000 %
Cache speed up : -20.576 %
With cache_length 5 : 2.241, maxdiff = 0.0000 %
Cache speed up : 167.250 %
With cache_length 6 : 2.258, maxdiff = 0.0000 %
Cache speed up : 165.269 %
With cache_length 7 : 2.297, maxdiff = 0.0000 %
Cache speed up : 160.664 %
With cache_length 8 : 2.344, maxdiff = 0.0000 %
Cache speed up : 155.445 %
With cache_length 9 : 2.399, maxdiff = 0.0000 %
Cache speed up : 149.615 %
#!/usr/bin/env perl
use strict;
# run various combinations of optimization and compiler
# to see what is fastest when testing the rinterpolation library
my %fastest;
my $n=0;
my $vb = 0;
foreach my $compiler (compilers())
{
foreach my $optflags ('-O2',
'-O3')
{
foreach my $executable ({cmd => './test_rinterpolate', label => 'STANDARD'},
{cmd => './pgo.sh', label => 'PGO'})
{
my $label = "CC=$compiler COPTFLAGS=$optflags : $executable->{label}";
my $cmd = "make clean; CC=$compiler COPTFLAGS=$optflags make ; $executable->{cmd}";
print $cmd,"\n";
my $r = `$cmd 2>/dev/null`;
$n++;
while($r=~/Report\:\s+([^\:]+)\:\s+(\S+)/g)
{
print "CF $1 ($2) to $fastest{$1}\n"if($vb);
if(!defined $fastest{$1} ||
$2 < $fastest{$1})
{
$fastest{$1} = $2.' '.$label;
}
print "NOW $fastest{$1}\n"if($vb);
}
#goto END if($n>2);
}
}
}
END:
print "\n\nFastest:\n\n\n";
foreach my $k (sort keys %fastest)
{
printf "%20s : %s\n",$k,$fastest{$k};
}
exit;
############################################################x
sub compilers
{
my @c = `dpkg --list |grep 'C compiler'`;
my %v;
my %compilers;
my @compilers_list;
my $vb=0;
foreach my $cc (@c)
{
print "CC $cc\n"if($vb);
my @cc = split(/\s+/,$cc);
my $which = `which $cc[1]`;chomp $which;
if($which)
{
my $fullpath = `readlink -e $which`; chomp $fullpath;
my $v = `$cc[1] --version 2>\&1| head -1`;
if($v=~/(\d+\.\d+(?:\.\d+))/)
{
my $version = $1;
chomp $v;
print "Found compiler : $cc[1] (at $fullpath) -> $v -> $version\n"if($vb);
if($compilers{$fullpath} eq $version)
{
print "Already in use\n"if($vb);
}
else
{
$compilers{$fullpath} = $version;
push(@compilers_list,$cc[1]);
}
}
}
}
return @compilers_list;
}
......@@ -14,6 +14,14 @@ ticks getticks(void);
/*
* Test program for librinterpolate
*/
#define FIXED_TESTS 1
#define RANDOM_TESTS 1
#define SPIN_TESTS 1
#define Report(TEST,TICKS) \
printf("Report: %20s : %8.3f\n",TEST,TICKS);
void f(const rinterpolate_float_t u,
const rinterpolate_float_t v,
rinterpolate_float_t * RESTRICT x);
......@@ -29,7 +37,9 @@ int main (int argc,
* 2 is more.
*/
const rinterpolate_counter_t vb = 1;
#define TICKSCALE 100000000.0
/*
* Resolution multiplier (>=2).
* NB errors are likely to be tiny when > 1000
......@@ -42,7 +52,15 @@ int main (int argc,
*/
#define NTESTS 10000000
//#define NTESTS 10
/*
* Number of tests when checking the cache
*/
#define NSPINTESTS 1000000
//#define NSPINTESTS 10
/*
* Use current time as seed for random generator
*/
......@@ -169,9 +187,9 @@ int main (int argc,
x[i] = _random_number;
}
ticks t_cache,t_nocache;
rinterpolate_float_t t_cache = 0, t_nocache = 0;
if(1)
if(FIXED_TESTS)
{
printf("\nFixed input tests\n");
/* without cache */
......@@ -192,8 +210,9 @@ int main (int argc,
diffmax = MAXDIFF;
COMPARE;
}
t_nocache = getticks() - tstart;
printf("Without cache : %lld, maxdiff %6.4f %%\n",t_nocache,100.0*diffmax);
t_nocache = (getticks() - tstart)/TICKSCALE;
printf("%7s cache : %8.3f, maxdiff %6.4f %%\n","Without",t_nocache,100.0*diffmax);
Report("Fixed nocache",t_nocache);
/* with cache */
tstart = getticks();
......@@ -213,67 +232,134 @@ int main (int argc,
diffmax = MAXDIFF;
COMPARE;
}
t_cache = getticks() - tstart;
printf("With cache : %lld, maxdiff = %6.4f %%\n",t_cache,100.0*diffmax);
t_cache = (getticks() - tstart)/TICKSCALE;
printf("%7s cache : %8.3f, maxdiff = %6.4f %%\n","With",t_cache,100.0*diffmax);
printf("Cache speed up : %5.3f %%\n",
100.0*((rinterpolate_float_t)t_nocache - (rinterpolate_float_t)t_cache)/
MIN((rinterpolate_float_t)t_cache,(rinterpolate_float_t)t_nocache));
Report("Fixed cache",t_cache);
}
printf("\nRandom input tests\n");
/* without cache */
diffmax = 0.0;
tstart = getticks();
for(i=0;i<NTESTS;i++)
if(RANDOM_TESTS)
{
for(j=0;j<N;j++)
printf("\nRandom input tests\n");
/* without cache */
diffmax = 0.0;
tstart = getticks();
for(i=0;i<NTESTS;i++)
{
for(j=0;j<N;j++)
{
x[j] = _random_number;
}
rinterpolate(table,
rinterpolate_data,
N,
D,
L,
x,
r,
0);
rinterpolate_float_t rr[D];
f(x[0],x[1],rr);
diffmax = MAXDIFF;
COMPARE;
}
t_nocache = (getticks() - tstart)/TICKSCALE;
printf("%7s cache : %8.3f, maxdiff %6.4f %%\n","Without",t_nocache,100.0*diffmax);
Report("Random nocache",t_nocache);
/* with cache */
diffmax = 0.0;
tstart = getticks();
for(i=0;i<NTESTS;i++)
{
x[j] = _random_number;
x[0] = _random_number;
x[1] = _random_number;
rinterpolate(table,
rinterpolate_data,
N,
D,
L,
x,
r,
5);
rinterpolate_float_t rr[D];
f(x[0],x[1],rr);
diffmax = MAXDIFF;
COMPARE;
}
rinterpolate(table,
rinterpolate_data,
N,
D,
L,
x,
r,
0);
rinterpolate_float_t rr[D];
f(x[0],x[1],rr);
diffmax = MAXDIFF;
COMPARE;
t_cache = (getticks() - tstart)/TICKSCALE;
Report("Random cache",t_cache);
printf("%7s cache : %8.3f, maxdiff = %6.4f %%\n","With",t_cache,100.0*diffmax);
printf("Cache speed up : %6.4f %%\n",
100.0*((rinterpolate_float_t)t_nocache - (rinterpolate_float_t)t_cache)/
MIN((rinterpolate_float_t)t_cache,(rinterpolate_float_t)t_nocache));
}
t_nocache = getticks() - tstart;
printf("Without cache : %lld, maxdiff %6.4f %%\n",t_nocache,100.0*diffmax);
/* with cache */
diffmax = 0.0;
tstart = getticks();
for(i=0;i<NTESTS;i++)
if(SPIN_TESTS)
{
x[0] = _random_number;
x[1] = _random_number;
rinterpolate(table,
rinterpolate_data,
N,
D,
L,
x,
r,
5);
rinterpolate_float_t rr[D];
f(x[0],x[1],rr);
diffmax = MAXDIFF;
COMPARE;
const rinterpolate_counter_t n = 5;
printf("Spun input (n=%d) tests \n",n);
double ** xx = malloc(n*sizeof(rinterpolate_float_t*));
for(i=0;i<n;i++)
{
int kk;
*(xx+i) = malloc(n*sizeof(rinterpolate_float_t));
for(kk=0;kk<N;kk++)
{
xx[i][kk] = _random_number;
}
}
rinterpolate_counter_t ncache;
const rinterpolate_counter_t maxncache = n*2;
for(ncache=0; ncache < maxncache; ncache++)
{
diffmax = 0.0;
tstart = getticks();
for(i=0;i<NSPINTESTS;i++)
{
memcpy(x,xx[i%n],sizeof(rinterpolate_float_t)*N);
rinterpolate(table,
rinterpolate_data,
N,
D,
L,
x,
r,
ncache);
rinterpolate_float_t rr[D];
f(x[0],x[1],rr);
diffmax = MAXDIFF;
COMPARE;
}
t_cache = (getticks() - tstart)/TICKSCALE;
if(ncache==0)
{
t_nocache = t_cache;
}
printf("%7s cache_length %2d : %8.3f, maxdiff = %6.4f %%\n","With",ncache,t_cache,100.0*diffmax);
printf(" Cache speed up : %5.3f %%\n",
100.0*((rinterpolate_float_t)t_nocache - (rinterpolate_float_t)t_cache)/
MIN((rinterpolate_float_t)t_cache,(rinterpolate_float_t)t_nocache));
char c[100];
sprintf(c,"Spun %d",ncache);
Report(c,t_cache);
}
for(i=0;i<n;i++)
{
free(xx[i]);
}
free(xx);
}
t_cache = getticks() - tstart;
printf("With cache : %lld, maxdiff = %6.4f %%\n",t_cache,100.0*diffmax);
printf("Cache speed up : %6.4f %%\n",
100.0*((rinterpolate_float_t)t_nocache - (rinterpolate_float_t)t_cache)/
MIN((rinterpolate_float_t)t_cache,(rinterpolate_float_t)t_nocache));
}
rinterpolate_free_data(rinterpolate_data);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment