Skip to content
Commits on Source (8)
## 1.0.4 (in progress)
## 1.0.6
- Fix for the python3 fix.
## 1.0.5
- Fix for cb_filter with python3.
## 1.0.4
- Enable cb_histogram to be used on samples without UMIs.
- Enable filtering of cells during `demultiplex_cells`.
- Fix incorrect pandas.read_csv call with header=-1.
## 1.0.3
- Python 3 support
......
umis (1.0.6-1) unstable; urgency=medium
* Team upload.
* New upstream version
* debhelper-compat 12 (routine-update)
* Standards-Version: 4.4.1 (routine-update)
* Set upstream metadata fields: Bug-Database, Bug-Submit, Repository,
Repository-Browse.
-- Steffen Moeller <moeller@debian.org> Sat, 18 Jan 2020 22:48:47 +0100
umis (1.0.3-2) unstable; urgency=medium
* Build-Depends-Arch: python3-pysam
......
......@@ -3,13 +3,13 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.
Uploaders: Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 12~),
Build-Depends: debhelper-compat (= 12),
dh-python,
cython3,
python3-dev,
python3-setuptools
Build-Depends-Arch: python3-pysam
Standards-Version: 4.3.0
Standards-Version: 4.4.1
Vcs-Browser: https://salsa.debian.org/med-team/umis
Vcs-Git: https://salsa.debian.org/med-team/umis.git
Homepage: https://github.com/vals/umis
......@@ -41,7 +41,7 @@ Description: tools for processing UMI RNA-tag data
Package: umis-examples
Architecture: all
Depends: ${shlibs:Depends},
${misc:Depends},
${misc:Depends}
Recommends: umis
Description: tools for processing UMI RNA-tag data (examples)
Umis provides tools for estimating expression in RNA-Seq data which
......
......@@ -18,3 +18,8 @@ override_dh_install:
mv debian/python3-$(PYBUILD_NAME)/usr debian/$(PYBUILD_NAME)
rmdir debian/python3-$(PYBUILD_NAME)
find debian -type d -name __pycache__ | xargs rm -rf
override_dh_auto_clean:
dh_auto_clean
rm -rf umis.egg-info
rm umis/utils.c
......@@ -3,7 +3,7 @@ Reference:
Valentine Svensson and Kedar Nath Natarajan and Lam-Ha Ly and Ricardo
J Miragaia and Charlotte Labalette and Iain C Macaulay and Ana Cvejic
and Sarah A Teichmann
Title: "Power analysis of single-cell RNA-sequencing experiments"
Title: Power analysis of single-cell RNA-sequencing experiments
Journal: Nature methods
Year: 2017
Volume: 14
......@@ -18,3 +18,7 @@ Registry:
Entry: OMICS_12783
- Name: bio.tools
Entry: NA
Bug-Database: https://github.com/vals/umis/issues
Bug-Submit: https://github.com/vals/umis/issues/new
Repository: https://github.com/vals/umis.git
Repository-Browse: https://github.com/vals/umis
......@@ -8,7 +8,7 @@ def read(fname):
setup(
name='umis',
version='1.0.3',
version='1.0.6',
description='Package for estimating UMI counts in Transcript Tag Counting data.',
packages=find_packages(),
install_requires=['click', 'pysam>=0.8.3', 'pandas', 'regex', 'scipy', 'toolz'],
......
......@@ -24,7 +24,7 @@ import numpy as np
import scipy.io, scipy.sparse
import click
VERSION = "1.0.3"
VERSION = "1.0.6"
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
......@@ -35,6 +35,9 @@ BARCODEINFO = {"sample": BarcodeInfo(bamtag="XS", readprefix="SAMPLE"),
"molecular": BarcodeInfo(bamtag="RX", readprefix="UMI")}
def open_gzipsafe(f):
if is_python3():
return gzip.open(f, mode="rt") if f.endswith(".gz") else open(f)
else:
return gzip.open(f) if f.endswith(".gz") else open(f)
def safe_makedir(dname):
......@@ -75,7 +78,7 @@ def read_fastq(filename):
if filename == "-":
filename_fh = sys.stdin
elif filename.endswith('gz'):
if is_python3:
if is_python3():
filename_fh = gzip.open(filename, mode='rt')
else:
filename_fh = BufferedReader(gzip.open(filename, mode='rt'))
......@@ -485,7 +488,7 @@ def tagcount(sam, out, genemap, output_evidence_table, positional, minevidence,
cb_hist = None
filter_cb = False
if cb_histogram:
cb_hist = pd.read_csv(cb_histogram, index_col=0, header=-1, squeeze=True, sep="\t")
cb_hist = pd.read_csv(cb_histogram, index_col=0, header=None, squeeze=True, sep="\t")
total_num_cbs = cb_hist.shape[0]
cb_hist = cb_hist[cb_hist > cb_cutoff]
logger.info('Keeping {} out of {} cellular barcodes.'.format(cb_hist.shape[0], total_num_cbs))
......@@ -758,7 +761,7 @@ def fasttagcount(sam, out, genemap, positional, minevidence, cb_histogram,
cb_hist = None
filter_cb = False
if cb_histogram:
cb_hist = pd.read_csv(cb_histogram, index_col=0, header=-1, squeeze=True, sep="\t")
cb_hist = pd.read_csv(cb_histogram, index_col=0, header=None, squeeze=True, sep="\t")
total_num_cbs = cb_hist.shape[0]
cb_hist = cb_hist[cb_hist > cb_cutoff]
logger.info('Keeping {} out of {} cellular barcodes.'.format(cb_hist.shape[0], total_num_cbs))
......@@ -971,9 +974,9 @@ def cb_histogram(fastq, umi_histogram):
for read in read_fastq(fastq):
match = parser_re.search(read).groupdict()
cb = match['CB']
umi = match['MB']
cb_counter[cb] += 1
if umi_histogram:
umi = match['MB']
umi_counter[(cb, umi)] += 1
for bc, count in cb_counter.most_common():
......@@ -1054,9 +1057,9 @@ def cb_filter(fastq, bc1, bc2, bc3, cores, nedit):
''' Filters reads with non-matching barcodes
Expects formatted fastq files.
'''
with open_gzipsafe(bc1) as bc1_fh:
bc1 = set(cb.strip() for cb in bc1_fh)
if bc2:
with open_gzipsafe(bc2) as bc2_fh:
bc2 = set(cb.strip() for cb in bc2_fh)
......@@ -1312,7 +1315,10 @@ def is_python3():
@click.option('--out_dir', default=".")
@click.option('--readnumber', default="")
@click.option('--prefix', default="")
def demultiplex_cells(fastq, out_dir, readnumber, prefix=""):
@click.option('--cb_histogram', default=None)
@click.option('--cb_cutoff', default=0)
def demultiplex_cells(fastq, out_dir, readnumber, prefix, cb_histogram,
cb_cutoff):
''' Demultiplex a fastqtransformed FASTQ file into a FASTQ file for
each cell.
'''
......@@ -1321,7 +1327,9 @@ def demultiplex_cells(fastq, out_dir, readnumber, prefix=""):
parser_re = re.compile(re_string)
readstring = "" if not readnumber else "_R{}".format(readnumber)
filestring = "{prefix}{sample}{readstring}.fq"
cb_set = set()
if cb_histogram:
cb_set = get_cb_depth_set(cb_histogram, cb_cutoff)
sample_set = set()
batch = collections.defaultdict(list)
parsed = 0
......@@ -1330,6 +1338,8 @@ def demultiplex_cells(fastq, out_dir, readnumber, prefix=""):
parsed += 1
match = parser_re.search(read).groupdict()
sample = match['CB']
if cb_set and sample not in cb_set:
continue
sample_set.add(sample)
batch[sample].append(read)
# write in batches to avoid opening up file handles repeatedly
......