Commit 7fc2f764 authored by Cédric Boutillier's avatar Cédric Boutillier

Merge tag 'upstream/20140914'

Upstream version 20140914

# gpg: Signature faite le mer. 15 oct. 2014 23:44:31 CEST avec la clef RSA d'identifiant 39CD217A
# gpg: Bonne signature de « Cédric Boutillier <boutil@debian.org> »
# gpg:                 alias « Cédric Boutillier <cedric.boutillier@upmc.fr> »
# gpg:                 alias « Cédric Boutillier <cedric.boutillier@polytechnique.org> »
# gpg:                 alias « Cédric Boutillier <cedric.boutillier@gmail.com> »
parents f99c2b50 dc978556
# use glob syntax.
syntax: glob
Makefile.in
aclocal.m4
config.guess
configure
config.sub
depcomp
install-sh
ltmain.sh
m4/libtool.m4
m4/ltoptions.m4
m4/ltsugar.m4
m4/ltversion.m4
m4/lt~obsolete.m4
missing
src/config.h.in~
src/cscope.out
src/cscope.files
.deps
autom4te.cache
Makefile
bench/Makefile
config.log
config.status
libtool
src/config.h
src/stamp-h1
doc
AUTOMAKE_OPTIONS = gnu
AUTOMAKE_OPTIONS = foreign subdir-objects
ACLOCAL_AMFLAGS = -I m4
AM_CFLAGS=${SIMD_FLAGS} ${OPENMP_CFLAGS} ${DEBUG_FLAGS} ${M4RIE_M4RI_CFLAGS} ${M4RI_CFLAGS}
......@@ -11,6 +11,9 @@ libm4rie_la_SOURCES = m4rie/gf2e.c \
m4rie/strassen.c \
m4rie/mzd_slice.c \
m4rie/mzd_poly.c \
m4rie/mzd_ptr.c \
m4rie/karatsuba.c \
m4rie/blm.c \
m4rie/trsm.c \
m4rie/ple.c \
m4rie/conversion.c \
......@@ -36,46 +39,47 @@ pkgincludesub_HEADERS = m4rie/gf2x.h \
m4rie/strassen.h \
m4rie/mzd_slice.h \
m4rie/mzd_poly.h \
m4rie/mzd_ptr.h \
m4rie/blm.h \
m4rie/trsm.h \
m4rie/ple.h \
m4rie/permutation.h \
m4rie/conversion.h \
gf2e_cxx/finite_field_givaro.h
m4rie/conversion.h
libm4rie_la_LDFLAGS = -release 0.0.$(RELEASE) -no-undefined ${M4RIE_M4RI_LDFLAGS}
libm4rie_la_LIBADD = -lm4ri
#testing
TESTCXXFLAGS = ${AM_CFLAGS} @CXXFLAGS@ ${M4RIE_M4RI_CFLAGS} ${M4RI_CFLAGS} -I./tests
TESTLIBADD = -lm4ri -lm4rie
TESTLDADD =
TESTLDFLAGS =
TESTCFLAGS = ${AM_CFLAGS} @CFLAGS@ ${M4RIE_M4RI_CFLAGS} ${M4RI_CFLAGS} -I./tests
TESTLIBADD = -lm4ri -lm4rie -lm
TESTLDADD =
TESTLDFLAGS = ${M4RIE_M4RI_LDFLAGS}
check_PROGRAMS = test_trsm test_elimination test_multiplication test_smallops test_ple
test_elimination_SOURCES = tests/test_elimination.cc tests/testing.h
test_elimination_SOURCES = tests/test_elimination.c tests/testing.h
test_elimination_LDADD = ${TESTLDADD} ${TESTLIBADD}
test_elimination_LDFLAGS = ${TESTLDFLAGS}
test_elimination_CXXFLAGS = ${TESTCXXFLAGS}
test_elimination_CFLAGS = ${TESTCFLAGS}
test_multiplication_SOURCES = tests/test_multiplication.cc tests/testing.h
test_multiplication_SOURCES = tests/test_multiplication.c tests/testing.h
test_multiplication_LDADD = ${TESTLDADD} ${TESTLIBADD}
test_multiplication_LDFLAGS = ${TESTLDFLAGS}
test_multiplication_CXXFLAGS = ${TESTCXXFLAGS}
test_multiplication_CFLAGS = ${TESTCFLAGS}
test_smallops_SOURCES = tests/test_smallops.cc tests/testing.h
test_smallops_SOURCES = tests/test_smallops.c tests/testing.h
test_smallops_LDADD = ${TESTLDADD} ${TESTLIBADD}
test_smallops_LDFLAGS = ${TESTLDFLAGS}
test_smallops_CXXFLAGS = ${TESTCXXFLAGS}
test_smallops_CFLAGS = ${TESTCFLAGS}
test_trsm_SOURCES = tests/test_trsm.cc tests/testing.h
test_trsm_SOURCES = tests/test_trsm.c tests/testing.h
test_trsm_LDADD = ${TESTLDADD} ${TESTLIBADD}
test_trsm_LDFLAGS = ${TESTLDFLAGS}
test_trsm_CXXFLAGS = ${TESTCXXFLAGS}
test_trsm_CFLAGS = ${TESTCFLAGS}
test_ple_SOURCES = tests/test_ple.cc tests/testing.h
test_ple_SOURCES = tests/test_ple.c tests/testing.h
test_ple_LDADD = ${TESTLDADD} ${TESTLIBADD}
test_ple_LDFLAGS = ${TESTLDFLAGS}
test_ple_CXXFLAGS = ${TESTCXXFLAGS}
test_ple_CFLAGS = ${TESTCFLAGS}
TESTS = test_trsm test_elimination test_multiplication test_smallops test_ple
......
This diff is collapsed.
This diff is collapsed.
CPUCYCLES_DIR=./cpucycles-20060326
BENCHCXXFLAGS = $(AM_CFLAGS) -I.. -I${CPUCYCLES_DIR} @CXXFLAGS@ -DNDEBUG
BENCHLIBADD = -lm4ri cpucycles.o
BENCHLDFLAGS = -Wl,-rpath,../.libs/ ../.libs/libm4rie.so -L${CPUCYCLES_DIR} -Wl,-rpath,${M4RIE_M4RI_PREFIX}/lib
BENCHCFLAGS = $(AM_CFLAGS) -I.. -I${CPUCYCLES_DIR} @CXXFLAGS@ -DNDEBUG ${M4RIE_M4RI_CFLAGS}
BENCHLIBADD = -lm4ri -lm cpucycles.o
BENCHLDFLAGS = -Wl,-rpath,../.libs/ ../.libs/libm4rie.so -L${CPUCYCLES_DIR} ${M4RIE_M4RI_LDFLAGS} -Wl,-rpath,${M4RIE_M4RI_PREFIX}/lib
EXTRA_PROGRAMS = bench_multiplication bench_elimination bench_smallops bench_trsm bench_ple
......@@ -17,24 +17,24 @@ clean-local:
-rm -f cpucycles.h
-rm -f $(EXTRA_PROGRAMS)
bench_elimination_SOURCES=bench_elimination.cc cpucycles.h benchmarking.cc
bench_elimination_CXXFLAGS=${BENCHCXXFLAGS}
bench_elimination_SOURCES=bench_elimination.c cpucycles.h benchmarking.c
bench_elimination_CFLAGS=${BENCHCFLAGS}
bench_elimination_LDFLAGS=${BENCHLDFLAGS} ${BENCHLIBADD}
bench_multiplication_SOURCES=bench_multiplication.cc cpucycles.h benchmarking.cc
bench_multiplication_CXXFLAGS=${BENCHCXXFLAGS}
bench_multiplication_SOURCES=bench_multiplication.c cpucycles.h benchmarking.c
bench_multiplication_CFLAGS=${BENCHCFLAGS}
bench_multiplication_LDFLAGS=${BENCHLDFLAGS} ${BENCHLIBADD}
bench_smallops_SOURCES=bench_smallops.cc cpucycles.h benchmarking.cc
bench_smallops_CXXFLAGS=${BENCHCXXFLAGS}
bench_smallops_SOURCES=bench_smallops.c cpucycles.h benchmarking.c
bench_smallops_CFLAGS=${BENCHCFLAGS}
bench_smallops_LDFLAGS=${BENCHLDFLAGS} ${BENCHLIBADD}
bench_trsm_SOURCES=bench_trsm.cc cpucycles.h benchmarking.cc
bench_trsm_CXXFLAGS=${BENCHCXXFLAGS}
bench_trsm_SOURCES=bench_trsm.c cpucycles.h benchmarking.c
bench_trsm_CFLAGS=${BENCHCFLAGS}
bench_trsm_LDFLAGS=${BENCHLDFLAGS} ${BENCHLIBADD}
bench_ple_SOURCES=bench_ple.cc cpucycles.h benchmarking.cc
bench_ple_CXXFLAGS=${BENCHCXXFLAGS}
bench_ple_SOURCES=bench_ple.c cpucycles.h benchmarking.c
bench_ple_CFLAGS=${BENCHCFLAGS}
bench_ple_LDFLAGS=${BENCHLDFLAGS} ${BENCHLIBADD}
bench: cpucycles.o ${EXTRA_PROGRAMS}
\ No newline at end of file
bench: cpucycles.o ${EXTRA_PROGRAMS}
This diff is collapsed.
......@@ -97,7 +97,7 @@ void print_help() {
printf(" newton-john -- Newton-John tables (mzed_t) \n");
printf(" strassen -- Strassen+Newton-John (mzed_t)\n");
printf(" karatsuba -- Karatsuba (mzed_t)\n");
printf(" type -- mzed_t or mzd_slice_t (default: mzed_t)\n");
printf(" type -- mzed_t or mzd_slice_t (default: mzd_slice_t)\n");
printf("\n");
bench_print_global_options(stdout);
}
......@@ -128,7 +128,7 @@ int main(int argc, char **argv) {
else
m4ri_die("unknown type '%s'\n",argv[5]);
} else {
params.type = 0;
params.type = 1;
}
......@@ -139,9 +139,11 @@ int main(int argc, char **argv) {
else
run_bench(run_mzd_slice, (void*)&params, data, 2);
double cc_per_op = ((double)data[1])/ ( (double)params.m * powl((double)params.n,1.807) );
double cc_per_op = ((double)data[1])/ ( (double)params.m * powl((double)params.n,1.807) * (double)params.k * log2((double)params.k));
printf("e: %2d, m: %5d, n: %5d, type: %d, algo: %10s, cpu cycles: %10llu, cc/(mn^1.807): %.5lf, wall time: %lf\n", params.k, params.m, params.n, params.type, params.algorithm, data[1], cc_per_op, data[0] / 1000000.0);
const char *complexity = (params.m == params.n) ? "n^2.807·d·log(d)" : "mn^1.807·d·log(d)";
printf("e: %2d, m: %5d, n: %5d, type: %d, algo: %10s, cpu cycles: %12llu, cc/(%s): %.5lf, wall time: %8.5lf\n", params.k, params.m, params.n, params.type, params.algorithm, data[1], complexity, cc_per_op, data[0] / 1000000.0);
}
/*
* benchmarking.cc
* benchmarking.c
*
* Benchmark engine.
*
......
#ifndef BENCHMARKETING_H
#define BENCHMARKETING_H
#include <stdint.h>
/*
* Command line options. See benchmarking.h for documentation.
*/
extern int bench_quiet;
extern int bench_dump;
extern int bench_minimum;
extern int bench_maximum;
extern unsigned long long bench_maxtime;
extern double bench_accuracy;
extern int bench_confidence_index;
extern char const* progname;
extern uint64_t bench_count;
unsigned long long walltime(unsigned long long t0);
int global_options(int* argcp, char*** argvp);
void bench_print_global_options(FILE*);
int run_bench(
int (*f)(void* params, unsigned long long* data, int *data_len),
void* params,
unsigned long long* data,
int data_len);
#ifdef HAVE_LIBPAPI
extern int papi_events[];
extern int papi_array_len;
char* papi_event_name(int event);
#endif
#define __M4RIE_OMEGA 2.80735492205760
#endif //BENCHMARKETING_H
/*
cpucycles/alpha.c version 20060316
D. J. Bernstein
Public domain.
*/
#include <time.h>
#include <unistd.h>
#include <sys/time.h>
static long long tod(void)
{
struct timeval t;
gettimeofday(&t,(struct timezone *) 0);
return t.tv_sec * (long long) 1000000 + t.tv_usec;
}
static long long rpcc(void)
{
unsigned long long t;
asm volatile("rpcc %0" : "=r"(t));
return t & 0xffffffff;
}
static long long firstrpcc;
static long long firsttod;
static long long lastrpcc;
static long long lasttod;
static double mhz = 0;
static void init(void)
{
firstrpcc = rpcc();
firsttod = tod();
do {
lastrpcc = rpcc();
lasttod = tod();
} while (lasttod - firsttod < 10000);
lastrpcc -= firstrpcc; lastrpcc &= 0xffffffff;
lasttod -= firsttod;
mhz = (double) lastrpcc / (double) lasttod;
}
long long cpucycles_alpha(void)
{
double x;
long long y;
if (!mhz) init();
lastrpcc = rpcc();
lasttod = tod();
lastrpcc -= firstrpcc; lastrpcc &= 0xffffffff;
lasttod -= firsttod;
/* Number of cycles since firstrpcc is lastrpcc + 2^32 y for unknown y. */
/* Number of microseconds since firsttod is lasttod. */
x = (lasttod * mhz - lastrpcc) * 0.00000000023283064365386962890625;
y = x;
while (x > y + 0.5) y += 1;
while (x < y - 0.5) y -= 1;
y *= 4294967296ULL;
lastrpcc += y;
mhz = (double) lastrpcc / (double) lasttod;
return firstrpcc + lastrpcc;
}
long long cpucycles_alpha_persecond(void)
{
if (!mhz) init();
return 1000000.0 * mhz;
}
/*
cpucycles alpha.h version 20060318
D. J. Bernstein
Public domain.
*/
#ifndef CPUCYCLES_alpha_h
#define CPUCYCLES_alpha_h
#ifdef __cplusplus
extern "C" {
#endif
extern long long cpucycles_alpha(void);
extern long long cpucycles_alpha_persecond(void);
#ifdef __cplusplus
}
#endif
#ifndef cpucycles_implementation
#define cpucycles_implementation "alpha"
#define cpucycles cpucycles_alpha
#define cpucycles_persecond cpucycles_alpha_persecond
#endif
#endif
#include <stdio.h>
#include <sys/types.h>
long long cpucycles_amd64cpuinfo(void)
{
unsigned long long result;
asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
: "=a" (result) :: "%rdx");
return result;
}
long long cpucycles_amd64cpuinfo_persecond(void)
{
FILE *f;
double result;
int s;
f = fopen("/proc/cpuinfo","r");
if (!f) return 0;
for (;;) {
s = fscanf(f,"cpu MHz : %lf",&result);
if (s > 0) break;
if (s == 0) s = fscanf(f,"%*[^\n]\n");
if (s < 0) { result = 0; break; }
}
fclose(f);
return 1000000.0 * result;
}
#include <stdio.h>
#include <sys/types.h>
long long cpucycles_amd64tscfreq(void)
{
unsigned long long result;
asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
: "=a" (result) :: "%rdx");
return result;
}
long long cpucycles_amd64tscfreq_persecond(void)
{
long result = 0;
size_t resultlen = sizeof(long);
sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0);
return result;
}
/*
cpucycles amd64tscfreq.h version 20060318
D. J. Bernstein
Public domain.
*/
#ifndef CPUCYCLES_amd64tscfreq_h
#define CPUCYCLES_amd64tscfreq_h
#ifdef __cplusplus
extern "C" {
#endif
extern long long cpucycles_amd64tscfreq(void);
extern long long cpucycles_amd64tscfreq_persecond(void);
#ifdef __cplusplus
}
#endif
#ifndef cpucycles_implementation
#define cpucycles_implementation "amd64tscfreq"
#define cpucycles cpucycles_amd64tscfreq
#define cpucycles_persecond cpucycles_amd64tscfreq_persecond
#endif
#endif
#include <time.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/sysctl.h>
static double cpufrequency = 0;
static void init(void)
{
long result = 0; size_t resultlen = sizeof(long);
sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0);
cpufrequency = result;
}
long long cpucycles_clockmonotonic(void)
{
double result;
struct timespec t;
if (!cpufrequency) init();
clock_gettime(CLOCK_MONOTONIC,&t);
result = t.tv_nsec;
result *= 0.000000001;
result += (double) t.tv_sec;
result *= cpufrequency;
return result;
}
long long cpucycles_clockmonotonic_persecond(void)
{
if (!cpufrequency) init();
return cpufrequency;
}
/*
cpucycles clockmonotonic.h version 20060318
D. J. Bernstein
Public domain.
*/
#ifndef CPUCYCLES_clockmonotonic_h
#define CPUCYCLES_clockmonotonic_h
#ifdef __cplusplus
extern "C" {
#endif
extern long long cpucycles_clockmonotonic(void);
extern long long cpucycles_clockmonotonic_persecond(void);
#ifdef __cplusplus
}
#endif
#ifndef cpucycles_implementation
#define cpucycles_implementation "clockmonotonic"
#define cpucycles cpucycles_clockmonotonic
#define cpucycles_persecond cpucycles_clockmonotonic_persecond
#endif
#endif
#!/bin/sh
case "$COMPILER" in
suncc)
case "$ARCHITECTURE" in
64) /opt/SUNWspro/bin/cc -xarch=v9 -O2 "$@" ;;
32) /opt/SUNWspro/bin/cc -xarch=v8 -O2 "$@" ;;
*) /opt/SUNWspro/bin/cc -O2 "$@" ;;
esac
;;
ibmcc)
case "$ARCHITECTURE" in
64) xlc -q64 -O2 "$@" ;;
32) xlc -q32 -O2 "$@" ;;
*) xlc -O2 "$@" ;;
esac
;;
hpcc)
case "$ARCHITECTURE" in
64) /opt/ansic/bin/cc +DD64 -O2 "$@" ;;
32) /opt/ansic/bin/cc +DD32 -O2 "$@" ;;
*) /opt/ansic/bin/cc -O2 "$@" ;;
esac
;;
*)
case "$ARCHITECTURE" in
64) gcc -m64 -O2 "$@" ;;
32) gcc -m32 -O2 "$@" ;;
*) gcc -O2 "$@" ;;
esac
;;
esac
This diff is collapsed.
#!/bin/sh
output="cpucycles.o cpucycles.h"
cleanup="test cpucycles-impl.o cpucycles-impl.h cpucycles-impl.c"
exec 2>do.notes
rm -f $output $cleanup
(
echo amd64tscfreq gcc
echo amd64cpuinfo gcc
echo x86tscfreq gcc
echo x86cpuinfo gcc
echo powerpclinux gcc
echo powerpcmacos gcc
echo powerpcaix gcc
echo powerpcaix ibmcc
echo sparcpsrinfo gcc
echo sparcpsrinfo suncc
echo sparc32psrinfo gcc
echo sparc32psrinfo suncc
echo hppapstat gcc
echo hppapstat hpcc
echo alpha gcc
echo clockmonotonic gcc
echo gettimeofday gcc
) | (
while read name compiler
do
echo ===== Trying $name.c with $compiler... >&2
rm -f $cleanup
cp $name.c cpucycles-impl.c || continue
cp $name.h cpucycles-impl.h || continue
env COMPILER=$compiler ./compile -c cpucycles-impl.c || continue
env COMPILER=$compiler ./compile -o test test.c cpucycles-impl.o || continue
./test || continue
echo ===== Success. Using $name.c. >&2
mv cpucycles-impl.o cpucycles.o
mv cpucycles-impl.h cpucycles.h
rm -f $cleanup
exit 0
done
echo ===== Giving up. >&2
rm -f $output $cleanup
exit 111
)
===== Trying amd64tscfreq.c with gcc...
cpucycles-impl.o: In function `cpucycles_amd64tscfreq_persecond':
cpucycles-impl.c:(.text+0x3a): undefined reference to `sysctlbyname'
collect2: error: ld returned 1 exit status
===== Trying amd64cpuinfo.c with gcc...
===== Success. Using amd64cpuinfo.c.
#include <time.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/time.h>
static double cpufrequency = 0;
static void init(void)
{
FILE *f;
double result;
int s;
f = fopen("/proc/cpuinfo","r");
if (!f) return;
for (;;) {
s = fscanf(f,"cpu MHz : %lf",&result);
if (s > 0) break;
if (s == 0) s = fscanf(f,"%*[^\n]\n");
if (s < 0) { result = 0; break; }
}
fclose(f);
cpufrequency = 1000000.0 * result;
}
long long cpucycles_gettimeofday(void)
{
double result;
struct timeval t;
if (!cpufrequency) init();
gettimeofday(&t,(struct timezone *) 0);
result = t.tv_usec;
result *= 0.000001;
result += (double) t.tv_sec;
result *= cpufrequency;
return result;
}
long long cpucycles_gettimeofday_persecond(void)
{
if (!cpufrequency) init();
return cpufrequency;
}
/*
cpucycles gettimeofday.h version 20060318
D. J. Bernstein
Public domain.
*/
#ifndef CPUCYCLES_gettimeofday_h
#define CPUCYCLES_gettimeofday_h
#ifdef __cplusplus
extern "C" {
#endif
extern long long cpucycles_gettimeofday(void);
extern long long cpucycles_gettimeofday_persecond(void);
#ifdef __cplusplus
}
#endif
#ifndef cpucycles_implementation
#define cpucycles_implementation "gettimeofday"
#define cpucycles cpucycles_gettimeofday
#define cpucycles_persecond cpucycles_gettimeofday_persecond
#endif
#endif
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/pstat.h>
#include <machine/inline.h>
long long cpucycles_hppapstat(void)
{
register long long result;
_MFCTL(16,result);
return result;
}
long long cpucycles_hppapstat_persecond(void)
{
struct pst_processor pst;
union pstun pu;
double result;
pu.pst_processor = &pst;
if (pstat(PSTAT_PROCESSOR,pu,sizeof(pst),1,0) < 0) return 0;
result = pst.psp_iticksperclktick;
result *= (double) sysconf(_SC_CLK_TCK);
return result;
}
/*
cpucycles hppapstat.h version 20060319
D. J. Bernstein
Public domain.
*/