New upstream version 1.0.9

parents
Copyright (C) 2014-2017, New York University
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include README.rst
include LICENSE.txt
graft native
Metadata-Version: 1.1
Name: reprozip
Version: 1.0.9
Summary: Linux tool enabling reproducible experiments (packer)
Home-page: http://vida-nyu.github.io/reprozip/
Author: Remi Rampin
Author-email: remirampin@gmail.com
License: BSD
Description: ReproZip
========
`ReproZip <https://www.reprozip.org/>`__ is a tool aimed at simplifying the process of creating reproducible
experiments from command-line executions, a frequently-used common denominator
in computational science. It tracks operating system calls and creates a package
that contains all the binaries, files and dependencies required to run a given
command on the author's computational environment (packing step).
A reviewer can then extract the experiment in his environment to reproduce the results (unpacking step).
reprozip
--------
This is the component responsible for the packing step on Linux distributions.
Please refer to `reprounzip <https://pypi.python.org/pypi/reprounzip>`_,
`reprounzip-vagrant <https://pypi.python.org/pypi/reprounzip-vagrant>`_,
and `reprounzip-docker <https://pypi.python.org/pypi/reprounzip-docker>`_
for other components and plugins.
Additional Information
----------------------
For more detailed information, please refer to our `website <https://www.reprozip.org/>`_, as well as to
our `documentation <https://reprozip.readthedocs.io/>`_.
ReproZip is currently being developed at `NYU <http://engineering.nyu.edu/>`_. The team includes:
* `Fernando Chirigati <https://vgc.poly.edu/~fchirigati/>`_
* `Juliana Freire <https://vgc.poly.edu/~juliana/>`_
* `Remi Rampin <https://remirampin.com/>`_
* `Dennis Shasha <http://cs.nyu.edu/shasha/>`_
* `Vicky Steeves <https://vickysteeves.com/>`_
Keywords: reprozip,reprounzip,reproducibility,provenance,vida,nyu
Platform: UNKNOWN
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Science/Research
Classifier: License :: OSI Approved :: BSD License
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Programming Language :: Python :: 3.5
Classifier: Operating System :: POSIX :: Linux
Classifier: Programming Language :: C
Classifier: Topic :: Scientific/Engineering
Classifier: Topic :: System :: Archiving
ReproZip
========
`ReproZip <https://www.reprozip.org/>`__ is a tool aimed at simplifying the process of creating reproducible
experiments from command-line executions, a frequently-used common denominator
in computational science. It tracks operating system calls and creates a package
that contains all the binaries, files and dependencies required to run a given
command on the author's computational environment (packing step).
A reviewer can then extract the experiment in his environment to reproduce the results (unpacking step).
reprozip
--------
This is the component responsible for the packing step on Linux distributions.
Please refer to `reprounzip <https://pypi.python.org/pypi/reprounzip>`_,
`reprounzip-vagrant <https://pypi.python.org/pypi/reprounzip-vagrant>`_,
and `reprounzip-docker <https://pypi.python.org/pypi/reprounzip-docker>`_
for other components and plugins.
Additional Information
----------------------
For more detailed information, please refer to our `website <https://www.reprozip.org/>`_, as well as to
our `documentation <https://reprozip.readthedocs.io/>`_.
ReproZip is currently being developed at `NYU <http://engineering.nyu.edu/>`_. The team includes:
* `Fernando Chirigati <https://vgc.poly.edu/~fchirigati/>`_
* `Juliana Freire <https://vgc.poly.edu/~juliana/>`_
* `Remi Rampin <https://remirampin.com/>`_
* `Dennis Shasha <http://cs.nyu.edu/shasha/>`_
* `Vicky Steeves <https://vickysteeves.com/>`_
#ifndef CONFIG_H
#define CONFIG_H
#define WORD_SIZE sizeof(int)
#if !defined(X86) && !defined(X86_64)
# if defined(__x86_64__) || defined(__x86_64)
# define X86_64
# elif defined(__i386__) || defined(__i386) || defined(_M_I86) || defined(_M_IX86)
# define I386
# else
# error Unrecognized architecture!
# endif
#endif
/* Static assertion trick */
#define STATIC_ASSERT(name, condition) \
enum { name = 1/(!!( \
condition \
)) }
STATIC_ASSERT(ASSERT_POINTER_FITS_IN_LONG_INT,
sizeof(long int) >= sizeof(void*));
#endif
This diff is collapsed.
#ifndef DATABASE_H
#define DATABASE_H
#define FILE_READ 0x01
#define FILE_WRITE 0x02
#define FILE_WDIR 0x04 /* File is used as a process's working dir */
#define FILE_STAT 0x08 /* File is stat()d (only metadata is read) */
#define FILE_LINK 0x10 /* The link itself is accessed, no dereference */
int db_init(const char *filename);
int db_close(int rollback);
int db_add_process(unsigned int *id, unsigned int parent_id,
const char *working_dir, int is_thread);
int db_add_exit(unsigned int id, int exitcode);
int db_add_first_process(unsigned int *id, const char *working_dir);
int db_add_file_open(unsigned int process,
const char *name, unsigned int mode,
int is_dir);
int db_add_exec(unsigned int process, const char *binary,
const char *const *argv, const char *const *envp,
const char *workingdir);
#endif
#include <assert.h>
#include <errno.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "log.h"
extern int trace_verbosity;
static FILE *logfile = NULL;
int log_open_file(const char *filename)
{
assert(logfile == NULL);
logfile = fopen(filename, "ab");
if(logfile == NULL)
{
log_critical(0, "couldn't open log file: %s", strerror(errno));
return -1;
}
return 0;
}
void log_close_file(void)
{
if(logfile != NULL)
{
fclose(logfile);
logfile = NULL;
}
}
void log_real_(pid_t tid, const char *tag, int lvl, const char *format, ...)
{
va_list args;
char datestr[13]; /* HH:MM:SS.mmm */
static char *buffer = NULL;
static size_t bufsize = 4096;
int length;
if(buffer == NULL)
buffer = malloc(bufsize);
{
struct timeval tv;
gettimeofday(&tv, NULL);
strftime(datestr, 13, "%H:%M:%S", localtime(&tv.tv_sec));
sprintf(datestr+8, ".%03u", (unsigned int)(tv.tv_usec / 1000));
}
va_start(args, format);
length = vsnprintf(buffer, bufsize, format, args);
va_end(args);
if(length >= bufsize)
{
while(length >= bufsize)
bufsize *= 2;
free(buffer);
buffer = malloc(bufsize);
va_start(args, format);
length = vsnprintf(buffer, bufsize, format, args);
va_end(args);
}
if(trace_verbosity >= lvl)
{
fprintf(stderr, "[REPROZIP] %s %s: ", datestr, tag);
if(tid > 0)
fprintf(stderr, "[%d] ", tid);
fwrite(buffer, length, 1, stderr);
}
if(logfile && lvl <= 2)
{
fprintf(logfile, "[REPROZIP] %s %s: ", datestr, tag);
if(tid > 0)
fprintf(logfile, "[%d] ", tid);
fwrite(buffer, length, 1, logfile);
fflush(logfile);
}
}
#ifndef LOG_H
#define LOG_H
#include <stdio.h>
#include <time.h>
#include <sys/time.h>
#include <sys/types.h>
int log_open_file(const char *filename);
void log_close_file(void);
void log_real_(pid_t tid, const char *tag, int lvl, const char *format, ...);
#ifdef __GNUC__
#define log_critical(i, s, ...) log_critical_(i, s "\n", ## __VA_ARGS__)
#define log_error(i, s, ...) log_critical_(i, s "\n", ## __VA_ARGS__)
#define log_warn(i, s, ...) log_warn_(i, s "\n", ## __VA_ARGS__)
#define log_info(i, s, ...) log_info_(i, s "\n", ## __VA_ARGS__)
#define log_debug(i, s, ...) log_debug_(i, s "\n", ## __VA_ARGS__)
#define log_critical_(i, s, ...) log_real_(i, "CRITICAL", 0, s, ## __VA_ARGS__)
#define log_error_(i, s, ...) log_real_(i, "ERROR", 0, s, ## __VA_ARGS__)
#define log_warn_(i, s, ...) log_real_(i, "WARNING", 1, s, ## __VA_ARGS__)
#define log_info_(i, s, ...) log_real_(i, "INFO", 2, s, ## __VA_ARGS__)
#define log_debug_(i, s, ...) log_real_(i, "DEBUG", 3, s, ## __VA_ARGS__)
#else
#define log_critical(i, s, ...) log_critical_(i, s "\n", __VA_ARGS__)
#define log_error(i, s, ...) log_critical_(i, s "\n", __VA_ARGS__)
#define log_warn(i, s, ...) log_warn_(i, s "\n", __VA_ARGS__)
#define log_info(i, s, ...) log_info_(i, s "\n", __VA_ARGS__)
#define log_debug(i, s, ...) log_debug_(i, s "\n", __VA_ARGS__)
#define log_critical_(i, s, ...) log_real_(i, "CRITICAL", 0, s, __VA_ARGS__)
#define log_error_(i, s, ...) log_real_(i, "ERROR", 0, s, __VA_ARGS__)
#define log_warn_(i, s, ...) log_real_(i, "WARNING", 1, s, __VA_ARGS__)
#define log_info_(i, s, ...) log_real_(i, "INFO", 2, s, __VA_ARGS__)
#define log_debug_(i, s, ...) log_real_(i, "DEBUG", 3, s, __VA_ARGS__)
#endif
#endif
#include <errno.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <unistd.h>
#include "config.h"
#include "log.h"
#include "ptrace_utils.h"
#include "tracer.h"
static long tracee_getword(pid_t tid, const void *addr)
{
long res;
errno = 0;
res = ptrace(PTRACE_PEEKDATA, tid, addr, NULL);
if(errno)
{
/* LCOV_EXCL_START : We only do that on things that went through the
* kernel successfully, and so should be valid. The exception is
* execve(), which will dup arguments when entering the syscall */
log_error(tid, "tracee_getword() failed: %s", strerror(errno));
return 0;
/* LCOV_EXCL_END */
}
return res;
}
void *tracee_getptr(int mode, pid_t tid, const void *addr)
{
if(mode == MODE_I386)
{
/* Pointers are 32 bits */
uint32_t ptr;
tracee_read(tid, (void*)&ptr, addr, sizeof(ptr));
return (void*)(uint64_t)ptr;
}
else /* mode == MODE_X86_64 */
{
/* Pointers are 64 bits */
uint64_t ptr;
tracee_read(tid, (void*)&ptr, addr, sizeof(ptr));
return (void*)ptr;
}
}
uint64_t tracee_getlong(int mode, pid_t tid, const void *addr)
{
if(mode == MODE_I386)
{
/* Longs are 32 bits */
uint32_t val;
tracee_read(tid, (void*)&val, addr, sizeof(val));
return (uint64_t)val;
}
else /* mode == MODE_X86_64 */
{
/* Longs are 64 bits */
uint64_t val;
tracee_read(tid, (void*)&val, addr, sizeof(val));
return val;
}
}
size_t tracee_getwordsize(int mode)
{
if(mode == MODE_I386)
/* Pointers are 32 bits */
return 4;
else /* mode == MODE_X86_64 */
/* Pointers are 64 bits */
return 8;
}
size_t tracee_strlen(pid_t tid, const char *str)
{
uintptr_t ptr = (uintptr_t)str;
size_t j = ptr % WORD_SIZE;
uintptr_t i = ptr - j;
size_t size = 0;
int done = 0;
for(; !done; i += WORD_SIZE)
{
unsigned long data = tracee_getword(tid, (const void*)i);
for(; !done && j < WORD_SIZE; ++j)
{
unsigned char byte = data >> (8 * j);
if(byte == 0)
done = 1;
else
++size;
}
j = 0;
}
return size;
}
void tracee_read(pid_t tid, char *dst, const char *src, size_t size)
{
uintptr_t ptr = (uintptr_t)src;
size_t j = ptr % WORD_SIZE;
uintptr_t i = ptr - j;
uintptr_t end = ptr + size;
for(; i < end; i += WORD_SIZE)
{
unsigned long data = tracee_getword(tid, (const void*)i);
for(; j < WORD_SIZE && i + j < end; ++j)
*dst++ = data >> (8 * j);
j = 0;
}
}
char *tracee_strdup(pid_t tid, const char *str)
{
size_t length = tracee_strlen(tid, str);
char *res = malloc(length + 1);
tracee_read(tid, res, str, length);
res[length] = '\0';
return res;
}
char **tracee_strarraydup(int mode, pid_t tid, const char *const *argv)
{
/* FIXME : This is probably broken on x32 */
char **array;
/* Reads number of pointers in pointer array */
size_t nb_args = 0;
{
const char *const *a = argv;
/* xargv = *a */
const char *xargv = tracee_getptr(mode, tid, a);
while(xargv != NULL)
{
++nb_args;
++a;
xargv = tracee_getptr(mode, tid, a);
}
}
/* Allocs pointer array */
array = malloc((nb_args + 1) * sizeof(char*));
/* Dups array elements */
{
size_t i = 0;
/* xargv = argv[0] */
const char *xargv = tracee_getptr(mode, tid, argv);
while(xargv != NULL)
{
array[i] = tracee_strdup(tid, xargv);
++i;
/* xargv = argv[i] */
xargv = tracee_getptr(mode, tid, argv + i);
}
array[i] = NULL;
}
return array;
}
void free_strarray(char **array)
{
char **ptr = array;
while(*ptr)
{
free(*ptr);
++ptr;
}
free(array);
}
#ifndef PTRACE_UTILS_H
#define PTRACE_UTILS_H
void *tracee_getptr(int mode, pid_t tid, const void *addr);
uint64_t tracee_getlong(int mode, pid_t tid, const void *addr);
size_t tracee_getwordsize(int mode);
size_t tracee_strlen(pid_t tid, const char *str);
void tracee_read(pid_t tid, char *dst, const char *src, size_t size);
char *tracee_strdup(pid_t tid, const char *str);
char **tracee_strarraydup(int mode, pid_t tid, const char *const *argv);
void free_strarray(char **array);
#endif
#include <Python.h>
#include "database.h"
#include "tracer.h"
PyObject *Err_Base;
/**
* Makes a C string from a Python unicode or bytes object.
*
* If successful, the result is a string that the caller must free().
* Else, returns NULL.
*/
static char *get_string(PyObject *obj)
{
if(PyUnicode_Check(obj))
{
const char *str;
PyObject *pyutf8 = PyUnicode_AsUTF8String(obj);
if(pyutf8 == NULL)
return NULL;
#if PY_MAJOR_VERSION >= 3
str = PyBytes_AsString(pyutf8);
#else
str = PyString_AsString(pyutf8);
#endif
if(str == NULL)
return NULL;
{
char *ret = strdup(str);
Py_DECREF(pyutf8);
return ret;
}
}
else if(
#if PY_MAJOR_VERSION >= 3
PyBytes_Check(obj)
#else
PyString_Check(obj)
#endif
)
{
const char *str;
#if PY_MAJOR_VERSION >= 3
str = PyBytes_AsString(obj);
#else
str = PyString_AsString(obj);
#endif
if(str == NULL)
return NULL;
return strdup(str);
}
else
return NULL;
}
static PyObject *pytracer_execute(PyObject *self, PyObject *args)
{
PyObject *ret = NULL;
int exit_status;
/* Reads arguments */
char *binary = NULL, *databasepath = NULL;
char **argv = NULL;
size_t argv_len;
int verbosity;
PyObject *py_binary, *py_argv, *py_databasepath;
if(!PyArg_ParseTuple(args, "OO!Oi",
&py_binary,
&PyList_Type, &py_argv,
&py_databasepath,
&verbosity))
return NULL;
if(verbosity < 0)
{
PyErr_SetString(Err_Base, "verbosity should be >= 0");
return NULL;
}
trace_verbosity = verbosity;
binary = get_string(py_binary);
if(binary == NULL)
goto done;
databasepath = get_string(py_databasepath);
if(databasepath == NULL)
goto done;
/* Converts argv from Python list to char[][] */
{
size_t i;
int bad = 0;
argv_len = PyList_Size(py_argv);
argv = malloc((argv_len + 1) * sizeof(char*));
for(i = 0; i < argv_len; ++i)
{
PyObject *arg = PyList_GetItem(py_argv, i);
char *str = get_string(arg);
if(str == NULL)
{
bad = 1;
break;
}
argv[i] = str;
}
if(bad)
{
size_t j;
for(j = 0; j < i; ++j)
free(argv[j]);
free(argv);
argv = NULL;
goto done;
}
argv[argv_len] = NULL;
}
if(fork_and_trace(binary, argv_len, argv, databasepath, &exit_status) == 0)
{
ret = PyLong_FromLong(exit_status);
}
else
{
PyErr_SetString(Err_Base, "Error occurred");
ret = NULL;
}
done:
free(binary);
free(databasepath);
/* Deallocs argv */
if(argv)
{
size_t i;
for(i = 0; i < argv_len; ++i)
free(argv[i]);
free(argv);
}
return ret;
}
static PyMethodDef methods[] = {
{"execute", pytracer_execute, METH_VARARGS,
"execute(binary, argv, databasepath, verbosity)\n"
"\n"
"Runs the specified binary with the argument list argv under trace and "
"writes\nthe captured events to SQLite3 database databasepath."},
{ NULL, NULL, 0, NULL }
};
#if PY_MAJOR_VERSION >= 3
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"reprozip._pytracer", /* m_name */
"C interface to tracer", /* m_doc */
-1, /* m_size */
methods, /* m_methods */
NULL, /* m_reload */
NULL, /* m_traverse */
NULL, /* m_clear */
NULL, /* m_free */
};
#endif
#if PY_MAJOR_VERSION >= 3
PyMODINIT_FUNC PyInit__pytracer(void)
#else
PyMODINIT_FUNC init_pytracer(void)
#endif
{
PyObject *mod;
#if PY_MAJOR_VERSION >= 3
mod = PyModule_Create(&moduledef);
#else
mod = Py_InitModule("reprozip._pytracer", methods);
#endif
if(mod == NULL)
{
#if PY_MAJOR_VERSION >= 3
return NULL;
#else
return;
#endif
}
Err_Base = PyErr_NewException("_pytracer.Error", NULL, NULL);
Py_INCREF(Err_Base);
PyModule_AddObject(mod, "Error", Err_Base);
#if PY_MAJOR_VERSION >= 3
return mod;
#endif
}
This diff is collapsed.
#ifndef SYSCALL_H
#define SYSCALL_H
#include "tracer.h"
void syscall_build_table(void);
int syscall_handle(struct Process *process);
int syscall_execve_event(struct Process *process);
int syscall_fork_event(struct Process *process, unsigned int event);
#endif
This diff is collapsed.
#ifndef TRACER_H
#define TRACER_H
#include "config.h"
int fork_and_trace(const char *binary, int argc, char **argv,
const char *database_path, int *exit_status);
extern int trace_verbosity;
/* This is NOT a union because sign-extension rules depend on actual register
* sizes. */
typedef struct S_register_type {
signed long int i;
unsigned long int u;
void *p;
} register_type;
#define PROCESS_ARGS 6
struct ExecveInfo {
char *binary;
char **argv;
char **envp;
};