Skip to content
Commits on Source (3)
The MIT License (MIT)
Copyright (c) 2015 Brent Pedersen and Aaron Quinlan
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
grabix (0.1.6-1) UNRELEASED; urgency=low
grabix (0.1.6+git20171023-1) unstable; urgency=low
[ Steffen Moeller ]
* Initial release.
* Initial release (Closes: #897037).
[ Andreas Tille ]
* debhelper 10
......
......@@ -5,7 +5,7 @@ Section: science
Priority: optional
Build-Depends: debhelper (>= 10),
zlib1g-dev
Standards-Version: 4.1.1
Standards-Version: 4.1.3
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/grabix.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/grabix.git
Homepage: https://github.com/arq5x/grabix
......
......@@ -3,7 +3,7 @@
.\"
.TH GRABIX 1 "July 18, 2013"
.SH NAME
grabix \- program to do something
grabix \- random access on large compressed sequence data
.SH SYNOPSIS
.B bgzip
.RI bedfile
......
Index: grabix/grabix.cpp
===================================================================
--- grabix.orig/grabix.cpp
+++ grabix/grabix.cpp
@@ -78,7 +78,7 @@ int create_grabix_index(string bgzf_file
int status;
kstring_t *line = new kstring_t;
- line->s = '\0';
+ line->s = (char *)0;
line->l = 0;
line->m = 0;
@@ -210,7 +210,7 @@ int grab(string bgzf_file, int64_t from_
// dump the header if there is one
int status;
kstring_t *line = new kstring_t;
- line->s = '\0';
+ line->s = (char *)0;
line->l = 0;
line->m = 0;
@@ -282,7 +282,7 @@ int random(string bgzf_file, uint64_t K)
vector<string> sample;
int status;
kstring_t *line = new kstring_t;
- line->s = '\0';
+ line->s = (char *)0;
line->l = 0;
line->m = 0;
Index: grabix/Makefile
===================================================================
--- grabix.orig/Makefile
+++ grabix/Makefile
@@ -1,5 +1,7 @@
LDFLAGS+= -lstdc++ -lz
CFLAGS+=-Wall
+CFLAGS+= -flto
+LDFLAGS+= -Wl,-flto
all: grabix
fix_assignment_of_char_to_pointer.patch
Hardening.patch
warnings.patch
tests.patch
introduceLTO.patch
Index: grabix/Makefile
===================================================================
--- grabix.orig/Makefile
+++ grabix/Makefile
@@ -1,9 +1,15 @@
LDFLAGS+= -lstdc++ -lz
CFLAGS+=-Wall
-all:
+all: grabix
+
+grabix:
gcc $(CFLAGS) -o grabix grabix_main.cpp grabix.cpp bgzf.c $(LDFLAGS)
+test:
+ bash ./test.sh
+
clean:
rm -f grabix
+ rm -f tests/empty.fastq.gz.gbi
Index: grabix/test.sh
===================================================================
--- grabix.orig/test.sh
+++ grabix/test.sh
@@ -1,5 +1,3 @@
-make
-
FQ=test.cnt.gz
rm -f ${FQ}{,.gbi}
--- a/grabix.cpp
+++ b/grabix.cpp
@@ -299,7 +299,7 @@ int random(string bgzf_file, uint64_t K)
Index: grabix/grabix.cpp
===================================================================
--- grabix.orig/grabix.cpp
+++ grabix/grabix.cpp
@@ -276,7 +276,6 @@ int random(string bgzf_file, uint64_t K)
srand(seed);
// reservoir sample
- uint64_t s = 0;
uint64_t N = 0;
uint64_t result_size = 0;
vector<string> sample;
@@ -307,7 +306,7 @@ int random(string bgzf_file, uint64_t K)
}
else
{
......
#!/usr/bin/make -f
# -*- makefile -*-
# Uncomment this to turn on verbose mode.
#export DH_VERBOSE=1
# Set default flags with dpkg-buildflags
export DEB_BUILD_MAINT_OPTIONS = hardening=+all
DPKG_EXPORT_BUILDFLAGS = 1
include /usr/share/dpkg/buildflags.mk
......
#include <cstdlib>
#include <iostream>
#include <stdio.h>
#include <fstream>
#include <vector>
#include <unistd.h>
......@@ -56,15 +57,21 @@ random access via the BGZF seek utility
*/
int create_grabix_index(string bgzf_file)
{
if (!bgzf_is_bgzf(bgzf_file.c_str()))
{
cerr << "[grabix] " << bgzf_file << " doesn't exist or wasn't compressed with bgzip" << endl;
exit (1);
}
BGZF *bgzf_fp = bgzf_open(bgzf_file.c_str(), "r");
if (bgzf_fp == NULL)
{
cerr << "[grabix] could not open file:" << bgzf_file << endl;
cerr << "[grabix] could not open file: " << bgzf_file << endl;
exit (1);
}
// create an index for writing
string index_file_name = bgzf_file + ".gbi";
string index_file_name = bgzf_file + ".gbi.tmp";
ofstream index_file(index_file_name.c_str(), ios::out);
// add the offset for the end of the header to the index
......@@ -79,7 +86,7 @@ int create_grabix_index(string bgzf_file)
int64_t offset = 0;
while ((status = bgzf_getline(bgzf_fp, '\n', line)) >= 0)
{
offset = bgzf_tell (bgzf_fp);
offset = bgzf_tell(bgzf_fp);
if (line->s[0] != '#')
break;
prev_offset = offset;
......@@ -92,15 +99,15 @@ int create_grabix_index(string bgzf_file)
int64_t total_lines = 1;
vector<int64_t> chunk_positions;
chunk_positions.push_back (prev_offset);
bool eof = false;
int eof = 1;
while (true)
{
// grab the next line and store the offset
eof = bgzf_getline_counting(bgzf_fp);
offset = bgzf_tell (bgzf_fp);
eof = bgzf_getline(bgzf_fp, '\n', line);
offset = bgzf_tell(bgzf_fp);
chunk_count++;
// stop if we have encountered an empty line
if (eof)
if (eof < 0 || offset == prev_offset)
{
if (bgzf_check_EOF(bgzf_fp) == 1) {
if (offset > prev_offset) {
......@@ -109,6 +116,7 @@ int create_grabix_index(string bgzf_file)
}
break;
}
break;
}
// store the offset of this chunk start
else if (chunk_count == CHUNK_SIZE)
......@@ -129,7 +137,7 @@ int create_grabix_index(string bgzf_file)
}
index_file.close();
return EXIT_SUCCESS;
return std::rename((bgzf_file + ".gbi.tmp").c_str(), (bgzf_file + ".gbi").c_str());
}
/*
......@@ -195,7 +203,7 @@ int grab(string bgzf_file, int64_t from_line, int64_t to_line)
BGZF *bgzf_fp = bgzf_open(bgzf_file.c_str(), "r");
if (bgzf_fp == NULL)
{
cerr << "[grabix] could not open file:" << bgzf_file << endl;
cerr << "[grabix] could not open file: " << bgzf_file << endl;
exit (1);
}
......@@ -206,7 +214,7 @@ int grab(string bgzf_file, int64_t from_line, int64_t to_line)
line->l = 0;
line->m = 0;
while ((status = bgzf_getline(bgzf_fp, '\n', line)) != 0)
while ((status = bgzf_getline(bgzf_fp, '\n', line)) > 0)
{
if (line->s[0] == '#')
printf("%s\n", line->s);
......@@ -259,7 +267,7 @@ int random(string bgzf_file, uint64_t K)
BGZF *bgzf_fp = bgzf_open(bgzf_file.c_str(), "r");
if (bgzf_fp == NULL)
{
cerr << "[grabix] could not open file:" << bgzf_file << endl;
cerr << "[grabix] could not open file: " << bgzf_file << endl;
exit (1);
}
......
......@@ -7,7 +7,7 @@ using namespace std;
#include "bgzf.h"
#define VERSION "0.1.6"
#define VERSION "0.1.8"
// we only want to store the offset for every 10000th
// line. otherwise, were we to store the position of every
// line in the file, the index could become very large for
......
......@@ -3,17 +3,35 @@ make
FQ=test.cnt.gz
rm -f ${FQ}{,.gbi}
lines=50000
lines=500000
python tests/make-test-fastq.py $lines | bgzip -c > $FQ
./grabix index $FQ
echo "indexing"
time ./grabix index $FQ
echo "indexed"
python tests/test-fastq.py $FQ
a=$(grabix grab test.cnt.gz $(($lines * 4)))
a=$(./grabix grab test.cnt.gz $(($lines * 4)))
b=$(zless $FQ | tail -1)
if [[ "$a" != "$b" ]]; then
echo FAIL last record
exit 1
else
echo OK last record
fi
rm -f ${FQ}{,.gbi}
rm -f tests/empty.fastq.gz.gbi
./grabix index tests/empty.fastq.gz
a=$(cat tests/empty.fastq.gz.gbi | awk 'NR == 2')
if [[ "$a" != "16" ]]; then
echo FAIL index wrong size
exit 1
else
echo "OK index size"
fi
for V in \
test.PLs.vcf \
test.auto_dom.no_parents.2.vcf \
......