Skip to content
Commits on Source (6)
......@@ -5,6 +5,8 @@ __pycache__/
# C extensions
*.so
#atom files
*.nfs*
# Distribution / packaging
.Python
env/
......
language: python
matrix:
include:
- os: linux
dist: trusty
sudo: required
python: 2.7
before_install: "sudo apt-get install -y -f python python-dev python-pip pkg-config python-wxgtk2.8 libpng-dev libjpeg8-dev libfreetype6-dev"
install: "pip install --upgrade pip setuptools numpy scipy pillow matplotlib pytest"
DISPLAY: 0.0
notifications:
email:
on_success: change # default: change
on_failure: change # default: always
before_script: cd /home/travis/build/mad-lab/transit/tests
script: travis_wait 30 pytest
......@@ -2,6 +2,25 @@
All notable changes to this project will be documented in this file.
## Version 2.1.0 - 2017-06-23
- TRANSIT:
- Added tooltips next to most parameters to explain their functionality.
- Added Quality Control window, with choice for normalization method.
- Added more normalization options to the HMM method.
- Added LOESS correction functionality back to TRANSIT
- Added ability to scale Track View based on mean-count of the window.
- Added ability to scale individual tracks in Track View.
- Added ability to add tracks of features to Track View.
- New documentation on normalization.
- TPP:
- TPP can now accept empty primer prefix (in case reads have been trimmed).
- TPP can now process reads obtained using Mme1 enzyme and protocol.
- TPP can now pass flags to BWA.
## Version 2.0.2 - 2016-08-19
- TRANSIT:
......
# TRANSIT 2.0
**New in Version 2.0+**
- Support for Tn5 datasets.
- New analysis methods.
- New way to export normalized datasets.
# TRANSIT 2.1.0
[![Build Status](https://travis-ci.org/mad-lab/transit.svg?branch=master)](https://travis-ci.org/mad-lab/transit) [![Documentation Status](https://readthedocs.org/projects/transit/badge/?version=latest)](http://transit.readthedocs.io/en/latest/?badge=latest)
**Version 2.1.0 changes (June, 20017)**
- Added tooltips next to most parameters to explain their functionality.
- Added Quality Control window, with choice for normalization method.
- Added more normalization options to the HMM method.
- Added LOESS correction functionality back to TRANSIT
- Added ability to scale Track View based on mean-count of the window.
- Added ability to scale individual tracks in Track View.
- Added ability to add tracks of features to Track View.
- Better status messages for TrackView
- TPP can now accept empty primer prefix (in case reads have been trimmed).
- TPP can now process reads obtained using Mme1 enzyme and protocol.
- TPP can now pass flags to BWA.
- Lots of bug fixes.
**Version 2.0.2 changes (August, 2016)**
- Added support for for custom primers in TPP.
......@@ -14,6 +32,12 @@
- **New [mailing list](https://groups.google.com/forum/#!forum/tnseq-transit/join)**
**New in Version 2.0+**
- Support for Tn5 datasets.
- New analysis methods.
- New way to export normalized datasets.
Welcome! This is the distribution for the TRANSIT and TPP tools developed by the Ioerger Lab.
......
version: 2.0.1-58-g0a3b-mod
version: 2.1.0-18-g4192-mod
tnseq-transit (2.1.1-1) unstable; urgency=medium
* New upstream version
* Standards-Version: 4.1.3
* debhelper 11
* d/rules: do not parse d/changelog
-- Andreas Tille <tille@debian.org> Mon, 19 Feb 2018 11:40:06 +0100
tnseq-transit (2.0.2-1) unstable; urgency=medium
* Initial release (Closes: #856661)
......
......@@ -3,10 +3,10 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging@lists.alioth.debian.
Uploaders: Andreas Tille <tille@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 10),
Build-Depends: debhelper (>= 11~),
python-all-dev,
python-setuptools
Standards-Version: 3.9.8
Standards-Version: 4.1.3
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/tnseq-transit.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/tnseq-transit.git
Homepage: http://pythonhosted.org/tnseq-transit/transit_overview.html
......@@ -20,9 +20,9 @@ Depends: ${python:Depends},
python-pillow,
python-matplotlib,
python-wxgtk3.0
Breaks: transit
Provides: transit
Replaces: transit
Breaks: transit
Description: statistical calculations of essentiality of genes or genomic regions
This is a software that can be used to analyze Tn-Seq datasets. It
includes various statistical calculations of essentiality of genes or
......
......@@ -3,11 +3,11 @@
# DH_VERBOSE := 1
export LC_ALL=C.UTF-8
DEBPKGNAME := $(shell dpkg-parsechangelog | awk '/^Source:/ {print $$2}')
include /usr/share/dpkg/default.mk
%:
dh $@ --with python2
override_dh_install:
dh_install
mv debian/$(DEBPKGNAME)/usr/bin/tpp debian/$(DEBPKGNAME)/usr/bin/transit-tpp
mv debian/$(DEB_SOURCE)/usr/bin/tpp debian/$(DEB_SOURCE)/usr/bin/transit-tpp
......@@ -78,7 +78,7 @@ setup(
# your project is installed. For an analysis of "install_requires" vs pip's
# requirements files see:
# https://packaging.python.org/en/latest/requirements.html
install_requires=['setuptools', 'numpy', 'scipy', 'pillow', 'matplotlib>1.2.0,<1.5.0'],
install_requires=['setuptools', 'numpy', 'scipy', 'pillow', 'matplotlib'],
#dependency_links = [
# "git+https://github.com/wxWidgets/wxPython.git#egg=wxPython"
......
......@@ -34,13 +34,11 @@ from tpp_tools import *
from tpp_gui import *
def main():
# if -nowin is command-line arg, skip the GUI and set filenames in vars
def main(arguments=[]):
vars = Globals()
#vars.version = "$Revision: 1.5 $".split()[1]
if len(sys.argv) <= 1 and hasWx:
if len(arguments) <= 1 and hasWx:
app = wx.App(False)
form = MyForm(vars)
form.update_dataset_list()
......@@ -57,70 +55,59 @@ def main():
else: msg = 'running pre-processing on %s and %s' % (vars.fq1,vars.fq2)
message(msg)
message("transposon type: %s" % vars.transposon)
message("protocol: %s" % vars.protocol)
save_config(vars)
driver(vars)
else:
pass
elif len(sys.argv) <= 1 and not hasWx:
elif len(arguments) <= 1 and not hasWx:
print "Please install wxPython to run in GUI Mode."
print "To run in Console Mode please follow these instructions:"
print ""
show_help()
else:
flag = False
initialize_globals(vars)
i,n = 1,len(sys.argv)
while i<n:
if sys.argv[i] == '-help':
(args, kwargs) = cleanargs(arguments)
# Show help if needed
if "help" in kwargs or "-help" in kwargs:
show_help()
sys.exit()
elif sys.argv[i] == '-tn5':
vars.transposon = 'Tn5'
i += 1
elif sys.argv[i] == '-himar1':
vars.transposon = 'Himar1'
i += 1
elif sys.argv[i] == '-primer':
vars.prefix = sys.argv[i+1]
i += 2
elif sys.argv[i] == '-reads1':
vars.fq1 = sys.argv[i+1]
i += 2
elif sys.argv[i] == '-reads2':
flag = True
vars.fq2 = sys.argv[i+1]
i += 2
elif sys.argv[i] == '-bwa':
vars.bwa = sys.argv[i+1]
i += 2
elif sys.argv[i] == '-ref':
vars.ref = sys.argv[i+1]
i += 2
elif sys.argv[i] == '-maxreads':
vars.maxreads = int(sys.argv[i+1])
i += 2
elif sys.argv[i] == '-output':
vars.base = sys.argv[i+1]
i += 2
elif sys.argv[i] == '-mismatches':
vars.mm1 = int(sys.argv[i+1])
i += 2
else:
print "error: unrecognized flag:",sys.argv[i]
# Check for strange flags
known_flags = set(["tn5", "help", "himar1", "protocol", "primer", "reads1",
"reads2", "bwa", "ref", "maxreads", "output", "mismatches", "flags"])
unknown_flags = set(kwargs.keys()) - known_flags
if unknown_flags:
print "error: unrecognized flags:", ", ".join(unknown_flags)
show_help()
sys.exit()
if flag==False: vars.fq2 = ""
if vars.fq2=="": msg = 'running pre-processing on %s' % (vars.fq1)
else: msg = 'running pre-processing on %s and %s' % (vars.fq1,vars.fq2)
# Initialize variables
initialize_globals(vars, args, kwargs)
# Check inputs make sense
verify_inputs(vars)
# Print some messages
if vars.fq2:
msg = 'running pre-processing on %s' % (vars.fq1)
else:
msg = 'running pre-processing on %s and %s' % (vars.fq1, vars.fq2)
message(msg)
message("protocol: %s" % vars.protocol)
message("transposon type: %s" % vars.transposon)
verify_inputs(vars)
# Save configuration file
save_config(vars)
# Run TPP
driver(vars)
if __name__ == "__main__":
main()
main(sys.argv[1:])
This diff is collapsed.
......@@ -26,6 +26,23 @@ import platform
import gzip
import subprocess
def cleanargs(rawargs):
#TODO: Write docstring
args = []
kwargs = {}
count = 0
while count < len(rawargs):
if rawargs[count].startswith("-"): #and len(rawargs[count].split(" ")) == 1:
if count + 1 < len(rawargs) and (not rawargs[count+1].startswith("-") or len(rawargs[count+1].split(" ")) > 1):
kwargs[rawargs[count][1:]] = rawargs[count+1]
count += 1
else:
kwargs[rawargs[count][1:]] = True
else:
args.append(rawargs[count])
count += 1
return (args, kwargs)
def analyze_dataset(wigfile):
......@@ -76,8 +93,8 @@ def fastq2reads(infile,outfile,maxreads):
def fix_paired_headers_for_bwa(reads1,reads2):
a = open(reads1)
b = open(reads2)
temp1 = "temp."+reads1
temp2 = "temp."+reads2
temp1 = reads1+".temp"
temp2 = reads2+".temp"
c = open(temp1,"w")
d = open(temp2,"w")
tot = 0
......@@ -126,7 +143,11 @@ def fix_paired_headers_for_bwa(reads1,reads2):
os.system("mv %s %s" % (temp2, reads2))
'''
# find index of H[1..m] in G[1..n] with up to max mismatches
def mmfind(G,n,H,m,max): # lengths; assume n>m
a = G[:n].find(H[:m])
if a!=-1: return a # shortcut for perfect matches
for i in range(0,n-m):
cnt = 0
for k in range(m):
......@@ -149,8 +170,11 @@ def extract_staggered(infile,outfile,vars):
vars.truncated_reads = 0
output = open(outfile,"w")
tot = 0
#print infile
for line in open(infile):
#print line
line = line.rstrip()
if not line: continue
if line[0]=='>': header = line; continue
tot += 1
if tot%1000000==0: message("%s reads processed" % tot)
......@@ -160,7 +184,8 @@ def extract_staggered(infile,outfile,vars):
if a>=P and a<=Q:
gstart,gend = a+lenTn,readlen
if b!=-1: gend = b; vars.truncated_reads += 1
if gend-gstart<20: continue # too short
#if gend-gstart<20: continue # too short
if gend-gstart<5: continue # too short
output.write(header+"\n")
output.write(line[gstart:gend]+"\n")
vars.tot_tgtta += 1
......@@ -314,7 +339,7 @@ def template_counts(ref,sam,bcfile,vars):
sites = []
for i in range(len(genome)-1):
if genome[i:i+2]=="TA":
if genome[i:i+2].upper()=="TA":
pos = i+1
h = hits.get(pos,[])
f = filter(lambda x: x[0]=='F',h)
......@@ -335,8 +360,25 @@ def template_counts(ref,sam,bcfile,vars):
# pretend that all reads count as unique templates
def increase_counts(pos,sites, strand):
if strand == "F":
sites[pos][1] += 1 #if read has been found before, tally 1 more in R reads
sites[pos][2] += 1 #if read has been found before, tally 1 more in R reads
if strand == "R":
sites[pos][3] += 1 #if read has been found before, tally 1 more in R reads
sites[pos][4] += 1 #if read has been found before, tally 1 more in R reads
sites[pos][5] += 1 #if read has been found before, tally 1 more in R reads
sites[pos][6] += 1 #if read has been found before, tally 1 more in R reads
def read_counts(ref,sam,vars):
genome = read_genome(ref)
sites = {}
for i in range(len(genome)-1):
if genome[i:i+2]=="TA" or vars.transposon=='Tn5':
pos = i+1
sites[pos] = [pos,0,0,0,0,0,0]
hits = {}
vars.tot_tgtta,vars.mapped = 0,0
vars.r1 = vars.r2 = 0
......@@ -351,28 +393,34 @@ def read_counts(ref,sam,vars):
vars.mapped += 1
readlen = len(w[9])
pos = int(w[3])
if vars.protocol.lower() == "mme1":
strand,delta = 'F',readlen
if code[4]=="1": strand,delta = 'R',1
site1 = pos + delta - 2 #if on + strand, take column 3 position and add 1bp,
site2 = pos + delta - 1 #check one off just in case it enzyme chewed too much
if site1 in sites:
increase_counts(site1, sites, strand)
if site2 in sites:
increase_counts(site2, sites, strand)
else:
strand,delta = 'F',-2
if code[4]=="1": strand,delta = 'R',readlen
pos += delta
if pos not in hits: hits[pos] = []
hits[pos].append(strand)
site1 = pos + delta #if on + strand, take column 3 position and add 1bp)
if site1 in sites:
increase_counts(site1, sites, strand)
results = []
for key in sorted(sites.keys()):
results.append(sites[key])
return results # (coord, Fwd_Rd_Ct, Fwd_Templ_Ct, Rev_Rd_Ct, Rev_Templ_Ct, Tot_Rd_Ct, Tot_Templ_Ct)
sites = []
for i in range(len(genome)-1):
if genome[i:i+2]=="TA" or vars.transposon=='Tn5':
pos = i+1
h = hits.get(pos,[])
lenf,lenr = h.count('F'),h.count('R')
data = [pos,lenf,lenf,lenr,lenr,lenf+lenr,lenf+lenr]
sites.append(data)
return sites # (coord, Fwd_Rd_Ct, Fwd_Templ_Ct, Rev_Rd_Ct, Rev_Templ_Ct, Tot_Rd_Ct, Tot_Templ_Ct)
def driver(vars):
vars.reads1 = vars.base+".reads1"
vars.reads2 = vars.base+".reads2"
vars.tgtta1 = vars.base+".tgtta1"
vars.tgtta2 = vars.base+".tgtta2"
vars.trimmed1 = vars.base+".trimmed1"
vars.trimmed2 = vars.base+".trimmed2"
vars.barcodes1 = vars.base+".barcodes1"
vars.barcodes2 = vars.base+".barcodes2"
vars.genomic2 = vars.base+".genomic2"
......@@ -383,9 +431,10 @@ def driver(vars):
vars.wig = vars.base+".wig"
vars.stats = vars.base+".tn_stats"
if vars.prefix==None:
if not vars.prefix:
if vars.transposon=="Tn5": vars.prefix = "TAAGAGACAG"
elif vars.transposon=="Himar1": vars.prefix = "ACTTATCAGCCAACCTGTTA"
else: vars.prefix = ""
try:
extract_reads(vars)
......@@ -446,8 +495,8 @@ def extract_reads(vars):
if vars.single_end==True:
message("assuming single-ended reads")
message("creating %s" % vars.tgtta1)
extract_staggered(vars.reads1,vars.tgtta1,vars)
message("creating %s" % vars.trimmed1)
extract_staggered(vars.reads1,vars.trimmed1,vars)
return
......@@ -462,23 +511,23 @@ def extract_reads(vars):
message("extracting barcodes and genomic parts of reads...")
message("creating %s" % vars.tgtta1)
extract_staggered(vars.reads1,vars.tgtta1,vars)
message("creating %s" % vars.trimmed1)
extract_staggered(vars.reads1,vars.trimmed1,vars)
message("creating %s" % vars.tgtta2)
select_reads(vars.tgtta1,vars.reads2,vars.tgtta2)
message("creating %s" % vars.trimmed2)
select_reads(vars.trimmed1,vars.reads2,vars.trimmed2)
#message("creating %s" % vars.barcodes2)
#select_cycles(vars.tgtta2,22,30,vars.barcodes2)
#select_cycles(vars.trimmed2,22,30,vars.barcodes2)
#message("creating %s" % vars.genomic2)
#select_cycles(vars.tgtta2,43,-1,vars.genomic2)
#select_cycles(vars.trimmed2,43,-1,vars.genomic2)
# instead of using select_cycles, do these both in one shot by looking for constant seqs
message("creating %s" % vars.barcodes2)
message("creating %s" % vars.genomic2)
extract_barcodes(vars.tgtta2,vars.barcodes2,vars.genomic2, vars.mm1)
extract_barcodes(vars.trimmed2,vars.barcodes2,vars.genomic2, vars.mm1)
message("creating %s" % vars.barcodes1)
replace_ids(vars.tgtta1,vars.barcodes2,vars.barcodes1)
replace_ids(vars.trimmed1,vars.barcodes2,vars.barcodes1)
# pattern for read 2...
# TAGTGGATGATGGCCGGTGGATTTGTG GTAATTACCA TGGTCGTGGTAT CCCAGCGCGACTTCTTCGGCGCACACACC TAACAGGTTGGCTGATAAGTCCCCG?AGAT AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGT
......@@ -548,6 +597,8 @@ def bwa_subprocess(command, outfile):
for line in iter(process.stderr.readline, ''):
if "Permission denied" in line:
raise IOError("Error: BWA encountered a permissions error: \n\n%s" % line)
if "invalid option" in line:
raise ValueError("Error: Unrecognized flag for BWA: %s" % (line.split()[-1]))
sys.stderr.write("%s\n" % line.strip())
......@@ -561,22 +612,29 @@ def run_bwa(vars):
bwa_subprocess(cmd, sys.stdout)
cmd = [vars.bwa, "aln", vars.ref, vars.tgtta1]
cmd = [vars.bwa, "aln"]
if vars.flags.strip():
cmd.extend( vars.flags.split(" "))
cmd.extend([vars.ref, vars.trimmed1])
outfile = open(vars.sai1, "w")
bwa_subprocess(cmd, outfile)
if vars.single_end==True:
cmd = [vars.bwa, "samse", vars.ref, vars.sai1, vars.tgtta1]
cmd = [vars.bwa, "samse", vars.ref, vars.sai1, vars.trimmed1]
outfile = open(vars.sam, "w")
bwa_subprocess(cmd, outfile)
else:
cmd = [vars.bwa, "aln", vars.ref, vars.genomic2]
cmd = [vars.bwa, "aln"]
if vars.flags.strip():
cmd.extend(vars.flags.split(" "))
cmd.extend([vars.ref, vars.genomic2])
outfile = open(vars.sai2, "w")
bwa_subprocess(cmd, outfile)
cmd = [vars.bwa, "sampe", vars.ref, vars.sai1, vars.sai2, vars.tgtta1, vars.genomic2]
cmd = [vars.bwa, "sampe", vars.ref, vars.sai1, vars.sai2, vars.trimmed1, vars.genomic2]
outfile = open(vars.sam, "w")
bwa_subprocess(cmd, outfile)
......@@ -667,7 +725,7 @@ def generate_output(vars):
rcounts = [x[5] for x in counts]
tcounts = [x[6] for x in counts]
rc,tc = sum(rcounts),sum(tcounts)
ratio = rc/float(tc)
ratio = rc/float(tc) if (rc != 0 and tc !=0) else 0
ta_sites = len(rcounts)
tas_hit = len(filter(lambda x: x>0,rcounts))
density = tas_hit/float(ta_sites)
......@@ -687,7 +745,7 @@ def generate_output(vars):
read_length = get_read_length(vars.base + ".reads1")
mean_r1_genomic = get_genomic_portion(vars.base + ".tgtta1")
mean_r1_genomic = get_genomic_portion(vars.base + ".trimmed1")
if vars.single_end==False: mean_r2_genomic = get_genomic_portion(vars.base + ".genomic2")
output = open(vars.stats,"w")
......@@ -698,12 +756,14 @@ def generate_output(vars):
output.write("# command: python ")
output.write(' '.join(sys.argv)+"\n")
output.write('# transposon type: %s\n' % vars.transposon)
output.write('# protocol type: %s\n' % vars.protocol)
output.write('# bwa flags: %s\n' % vars.flags)
output.write('# read1: %s\n' % vars.fq1)
output.write('# read2: %s\n' % vars.fq2)
output.write('# ref_genome: %s\n' % vars.ref)
output.write("# total_reads %s (or read pairs)\n" % tot_reads)
#output.write("# truncated_reads %s (fragments shorter than the read length; ADAP2 appears in read1)\n" % vars.truncated_reads)
output.write("# TGTTA_reads %s (reads with valid Tn prefix, and insert size>20bp)\n" % vars.tot_tgtta)
output.write("# trimmed_reads %s (reads with valid Tn prefix, and insert size>20bp)\n" % vars.tot_tgtta)
output.write("# reads1_mapped %s\n" % vars.r1)
output.write("# reads2_mapped %s\n" % vars.r2)
output.write("# mapped_reads %s (both R1 and R2 map into genome)\n" % vars.mapped)
......@@ -746,22 +806,140 @@ def error(s):
print "error:",s
sys.exit(0)
def warning(s):
print "warning:",s
def set_defaults(vars, protocol):
#protocol = kwargs.get("protocol", "sassetti")
if protocol == "sassetti":
set_sassetti_defaults(vars)
elif protocol == "mme1":
set_mme1_defaults(vars)
elif protocol == "tn5":
set_tn5_defaults(vars)
else:
set_sassetti_defaults(vars)
def set_attributes(vars, attributes_list, override=False):
for (attr, value) in attributes_list:
if override:
setattr(vars, attr, value)
else:
if not hasattr(vars, attr):
setattr(vars, attr, value)
def set_sassetti_defaults(vars):
attributes_list = []
attributes_list.append(("transposon", "Himar1"))
attributes_list.append(("protocol", "Sassetti"))
attributes_list.append(("prefix", "ACTTATCAGCCAACCTGTTA"))
attributes_list.append(("maxreads", -1))
attributes_list.append(("mm1", 100))
set_attributes(vars, attributes_list)
def set_mme1_defaults(vars):
attributes_list = []
attributes_list.append(("transposon", "Himar1"))
attributes_list.append(("protocol", "Mme1"))
attributes_list.append(("prefix", ""))
attributes_list.append(("maxreads", -1))
attributes_list.append(("mm1", 2))
set_attributes(vars, attributes_list)
def set_tn5_defaults(vars):
attributes_list = []
attributes_list.append(("transposon", "Tn5"))
attributes_list.append(("protocol", "Tn5"))
attributes_list.append(("prefix", ""))
attributes_list.append(("maxreads", -1))
attributes_list.append(("mm1", 2))
set_attributes(vars, attributes_list)
def verify_inputs(vars):
if not os.path.exists(vars.fq1): error("file not found: "+vars.fq1)
if not os.path.exists(vars.fq1): error("reads1 file not found: "+vars.fq1)
vars.single_end = False
if vars.fq2=="": vars.single_end = True
elif not os.path.exists(vars.fq2): error("file not found: "+vars.fq2)
if not os.path.exists(vars.ref): error("file not found: "+vars.ref)
elif not os.path.exists(vars.fq2): error("reads2 file not found: "+vars.fq2)
if not os.path.exists(vars.ref): error("reference file not found: "+vars.ref)
if vars.base == '': error("prefix cannot be empty")
if vars.fq1 == vars.fq2: error('fastq files cannot be identical')
def initialize_globals(vars):
# If Mme1 protocol, warn that we don't use read2 file
if vars.protocol.lower() == "mme1" and not vars.single_end:
warning("Ignoring Read 2 file. TPP assumes Mme1 protocol runs in single-end mode.")
vars.single_end = True
vars.fq2 = ""
if os.path.isdir(vars.bwa):
bwaexec_unix = os.path.join(vars.bwa, "bwa")
bwaexec_win = os.path.join(vars.bwa, "bwa.exe")
if os.path.exists(bwaexec_unix) and not os.path.isdir(bwaexec_unix):
warning("did not include BWA executable name. Assuming BWA executable is named 'bwa'")
vars.bwa = bwaexec_unix
elif os.path.exists(bwaexec_win) and not os.path.isdir(bwaexec_win):
warning("did not include BWA executable name. Assuming BWA executable is named 'bwa.exe'")
vars.bwa = bwaexec_win
else:
error('cannot find BWA executable. Please include the full executable name as well as its directory.')
elif not os.path.exists(vars.bwa):
error('cannot find BWA executable. Please include the full executable name as well as its directory.')
def initialize_globals(vars, args=[], kwargs={}):
vars.fq1,vars.fq2,vars.ref,vars.bwa,vars.base,vars.maxreads = "","","","","temp",-1
vars.mm1 = 1 # mismatches allowed in Tn prefix
vars.transposon = 'Himar1'
vars.prefix = None
vars.protocol = "Sassetti"
vars.prefix = "ACTTATCAGCCAACCTGTTA"
vars.flags = ""
# Update defaults
protocol = kwargs.get("protocol", "").lower()
if protocol:
set_protocol_defaults(vars, protocol)
elif not kwargs:
read_config(vars)
# If running in console mode with flags
if "protocol" in kwargs:
vars.protocol = kwargs["protocol"]
if "himar1" in kwargs:
vars.transposon = "Himar1"
if "tn5" in kwargs:
vars.transposon = "Tn5"
if "protocol" in kwargs:
vars.protocol = kwargs["protocol"]
if "primer" in kwargs:
vars.prefix = kwargs["primer"]
if "reads1" in kwargs:
vars.fq1 = kwargs["reads1"]
if "reads2" in kwargs:
vars.fq2 = kwargs["reads2"]
if "bwa" in kwargs:
vars.bwa = kwargs["bwa"]
if "ref" in kwargs:
vars.ref = kwargs["ref"]
if "maxreads" in kwargs:
vars.maxreads = int(kwargs["maxreads"])
if "output" in kwargs:
vars.base = kwargs["output"]
if "mismatches" in kwargs:
vars.mm1 = int(kwargs["mismatches"])
if "flags" in kwargs:
vars.flags = kwargs["flags"]
def read_config(vars):
if not os.path.exists("tpp.cfg"): return
for line in open("tpp.cfg"):
......@@ -773,6 +951,10 @@ def read_config(vars):
if len(w)>=2 and w[0]=='prefix': vars.base = w[1]
if len(w)>=2 and w[0]=='mismatches1': vars.mm1 = int(w[1])
if len(w)>=2 and w[0]=='transposon': vars.transposon = w[1]
if len(w)>=2 and w[0]=='protocol': vars.protocol = " ".join(w[1:])
if len(w)>=2 and w[0]=='primer': vars.prefix = w[1]
if len(w)>=2 and w[0]=='flags': vars.flags = " ".join(w[1:])
def save_config(vars):
f = open("tpp.cfg","w")
......@@ -783,10 +965,13 @@ def save_config(vars):
f.write("prefix %s\n" % vars.base)
f.write("mismatches1 %s\n" % vars.mm1)
f.write("transposon %s\n" % vars.transposon)
f.write("protocol %s\n" % vars.protocol)
f.write("primer %s\n" % vars.prefix)
f.write("flags %s\n" % vars.flags)
f.close()
def show_help():
print 'usage: python PATH/src/tpp.pyc -bwa <PATH_TO_EXECUTABLE> -ref <REF_SEQ> -reads1 <FASTQ_OR_FASTA_FILE> [-reads2 <FASTQ_OR_FASTA_FILE>] -output <BASE_FILENAME> [-maxreads <N>] [-mismatches <N>] [-tn5|-himar1] [-primer <seq>]'
print 'usage: python PATH/src/tpp.py -bwa <PATH_TO_EXECUTABLE> -ref <REF_SEQ> -reads1 <FASTQ_OR_FASTA_FILE> [-reads2 <FASTQ_OR_FASTA_FILE>] -output <BASE_FILENAME> [-maxreads <N>] [-mismatches <N>] [-flags "<STRING>"] [-tn5|-himar1] [-primer <seq>]'
class Globals:
pass
......
__all__ = []
#__all__ = ["transit_tools", "tnseq_tools", "norm_tools", "stat_tools"]
__all__ = ["transit_tools", "tnseq_tools", "norm_tools", "stat_tools"]
__version__ = "v2.0.2"
__version__ = "v2.1.1"
prefix = "[TRANSIT]"
......@@ -20,7 +20,10 @@ import pytransit.analysis
method_wrap_width = 250
methods = pytransit.analysis.methods
export_methods = pytransit.analysis.export_methods
all_methods = {}
all_methods.update(methods)
all_methods.update(export_methods)
wildcard = "Python source (*.py)|*.py|" \
"All files (*.*)|*.*"
......@@ -29,20 +32,26 @@ transit_prefix = "[TRANSIT]"
def main(args=None):
#If no arguments, show GUI:
if len(sys.argv) == 1 and hasWx:
import pytransit.transit_gui as transit_gui
DEBUG = "--debug" in sys.argv
if DEBUG:
sys.argv.remove("--debug")
# Check if running in GUI Mode
if len(sys.argv) == 1 and hasWx:
import pytransit.transit_gui as transit_gui
transit_tools.transit_message("Running in GUI Mode")
app = wx.App(False)
#create an object of CalcFrame
frame = transit_gui.TnSeekFrame(None)
frame = transit_gui.TnSeekFrame(None, DEBUG)
#show the frame
frame.Show(True)
#start the applications
app.MainLoop()
# Tried GUI mode but has no wxPython
elif len(sys.argv) == 1 and not hasWx:
print "Please install wxPython to run in GUI Mode."
print "To run in Console Mode please follow these instructions:"
......@@ -51,16 +60,17 @@ def main(args=None):
print "List of known methods:"
for m in methods:
print "\t - %s" % m
# Running in Console mode
else:
method_name = sys.argv[1]
if method_name not in methods:
if method_name not in all_methods:
print "Error: The '%s' method is unknown." % method_name
print "Please use one of the known methods (or see documentation to add a new one):"
for m in methods:
for m in all_methods:
print "\t - %s" % m
print "Usage: python %s <method>" % sys.argv[0]
else:
methodobj = methods[method_name].method.fromconsole()
methodobj = all_methods[method_name].method.fromconsole()
methodobj.Run()
......
......@@ -8,7 +8,7 @@ __all__ = [ basename(f)[:-3] for f in modules if isfile(f)]
import base
import gumbel
#import example
import example
import tn5gaps
import binomial
import griffin
......@@ -18,7 +18,7 @@ import rankproduct
methods = {}
#methods["example"] = example.ExampleAnalysis()
methods["example"] = example.ExampleAnalysis()
methods["gumbel"] = gumbel.GumbelAnalysis()
methods["binomial"] = binomial.BinomialAnalysis()
methods["griffin"] = griffin.GriffinAnalysis()
......@@ -26,6 +26,15 @@ methods["hmm"] = hmm.HMMAnalysis()
methods["resampling"] = resampling.ResamplingAnalysis()
methods["tn5gaps"] = tn5gaps.Tn5GapsAnalysis()
methods["rankproduct"] = rankproduct.RankProductAnalysis()
#methods["mcce"] = mcce.MCCEAnalysis()
#methods["mcce2"] = mcce2.MCCE2Analysis()
#methods["motifhmm"] = motifhmm.MotifHMMAnalysis()
# EXPORT METHODS
import norm
export_methods = {}
export_methods["norm"] = norm.NormAnalysis()
......@@ -22,11 +22,75 @@ import pytransit.transit_tools as transit_tools
file_prefix = "[FileDisplay]"
class TransitFile:
class InvalidArgumentException(Exception):
def __init__(self, message):
# Call the base class constructor with the parameters it needs
super(InvalidArgumentException, self).__init__(message)
if hasWx:
class InfoIcon(wx.StaticBitmap):
def __init__(self, panel, flag, bmp=None, tooltip=""):
if not bmp:
bmp = wx.ArtProvider.GetBitmap(wx.ART_INFORMATION, wx.ART_OTHER, (16, 16))
wx.StaticBitmap.__init__(self, panel, flag, bmp)
tp = wx.ToolTip(tooltip)
self.SetToolTip(tp)
class TransitGUIBase:
def __init__(self):
self.wxobj = None
self.short_name = "TRANSIT"
self.long_name = "TRANSIT"
def status_message(self, text, time=-1):
#TODO: write docstring
if self.wxobj:
if newWx:
wx.CallAfter(pub.sendMessage, "status", msg=(self.short_name, text, time))
else:
wx.CallAfter(pub.sendMessage, "status", (self.short_name, text, time))
wx.Yield()
def console_message(self, text):
#TODO: write docstring
sys.stdout.write("[%s] %s\n" % (self.short_name, text))
def console_message_inplace(self, text):
#TODO: write docstring
sys.stdout.write("[%s] %s \r" % (self.short_name, text) )
sys.stdout.flush()
def transit_message(self, text):
#TODO: write docstring
self.console_message(text)
self.status_message(text)
def transit_message_inplace(self, text):
#TODO: write docstring
self.console_message_inplace(text)
self.status_message(text)
def transit_error(self,text):
self.transit_message(text)
if self.wxobj:
transit_tools.ShowError(text)
def transit_warning(self,text):
self.transit_message(text)
if self.wxobj:
transit_tools.ShowWarning(text)
class TransitFile (TransitGUIBase):
#TODO write docstring
def __init__(self, identifier="#Unknown", colnames=[]):
#TODO write docstring
TransitGUIBase.__init__(self)
self.identifier = identifier
self.colnames = colnames
......@@ -34,11 +98,20 @@ class TransitFile:
#TODO write docstring
row = 0
data = []
shownError = False
for line in open(path):
if line.startswith("#"): continue
tmp = line.split("\t")
tmp[-1] = tmp[-1].strip()
#print colnames
#print len(colnames), len(tmp)
try:
rowdict = dict([(colnames[i], tmp[i]) for i in range(len(colnames))])
except Exception as e:
if not shownError:
self.transit_warning("Error reading data! This may be caused by trying to load a old results file, when the format has changed.")
shownError = True
rowdict = dict([(colnames[i], tmp[i]) for i in range(min(len(colnames), len(tmp)))])
data.append((row, rowdict))
row+=1
return data
......@@ -69,6 +142,8 @@ class AnalysisGUI:
def __init__(self):
self.wxobj = None
self.panel = None
self.LABELSIZE = (100,-1)
self.WIDGETSIZE = (100,-1)
def Hide(self):
self.panel.Hide()
......@@ -107,6 +182,41 @@ class AnalysisGUI:
Button.Bind( wx.EVT_BUTTON, self.wxobj.RunMethod )
self.panel = wPanel
def defineTextBox(self, panel, labelText="", widgetText="", tooltipText="", labSize=None, widgetSize=None):
if not labSize: labSize = self.LABELSIZE
if not widgetSize: widgetSize = self.WIDGETSIZE
sizer = wx.BoxSizer( wx.HORIZONTAL )
label = wx.StaticText(panel, wx.ID_ANY, labelText, wx.DefaultPosition, labSize, 0)
label.Wrap( -1 )
textBox = wx.TextCtrl( panel, wx.ID_ANY, widgetText, wx.DefaultPosition, widgetSize, 0 )
sizer.Add(label, 0, wx.ALIGN_CENTER_VERTICAL, 5 )
sizer.Add(textBox, 0, wx.ALIGN_CENTER_VERTICAL, 5 )
sizer.Add(InfoIcon(panel, wx.ID_ANY, tooltip=tooltipText), 0, wx.ALIGN_CENTER_VERTICAL, 5 )
return (label, textBox, sizer)
def defineChoiceBox(self, panel, labelText="", widgetChoice=[""], tooltipText="", labSize=None, widgetSize=None):
if not labSize: labSize = self.LABELSIZE
if not widgetSize: widgetSize = self.WIDGETSIZE
sizer = wx.BoxSizer( wx.HORIZONTAL )
label = wx.StaticText(panel, wx.ID_ANY, labelText, wx.DefaultPosition, labSize, 0)
label.Wrap( -1 )
choiceBox = wx.Choice( panel, wx.ID_ANY, wx.DefaultPosition, widgetSize, widgetChoice, 0 )
choiceBox.SetSelection(0)
sizer.Add(label, 0, wx.ALIGN_CENTER_VERTICAL, 5 )
sizer.Add(choiceBox, 0, wx.ALIGN_CENTER_VERTICAL, 5 )
sizer.Add(InfoIcon(panel, wx.ID_ANY, tooltip=tooltipText), 0, wx.ALIGN_CENTER_VERTICAL, 5 )
return (label, choiceBox, sizer)
def defineCheckBox(self, panel, labelText="", widgetCheck=False, tooltipText="", widgetSize=None):
if not widgetSize: widgetSize = self.WIDGETSIZE
sizer = wx.BoxSizer( wx.HORIZONTAL )
checkBox = wx.CheckBox(panel, label = labelText, size=widgetSize)
checkBox.SetValue(widgetCheck)
sizer.Add(checkBox, 0, wx.ALIGN_CENTER_VERTICAL, 5 )
sizer.Add(InfoIcon(panel, wx.ID_ANY, tooltip=tooltipText), 0, wx.ALIGN_CENTER_VERTICAL, 5 )
return (checkBox, sizer)
class AnalysisMethod:
......@@ -140,8 +250,11 @@ class AnalysisMethod:
#TODO: write docstring
try:
return self.fromargs(sys.argv[2:])
except InvalidArgumentException as e:
print "Error: %s" % str(e)
print self.usage_string()
except IndexError as e:
traceback.print_exc()
print "Error: %s" % str(e)
print self.usage_string()
except TypeError as e:
print "Error: %s" % str(e)
......@@ -218,13 +331,13 @@ class AnalysisMethod:
def status_message(self, text):
def status_message(self, text, time=-1):
#TODO: write docstring
if self.wxobj:
if newWx:
wx.CallAfter(pub.sendMessage, "status", msg=(self.short_name, text))
wx.CallAfter(pub.sendMessage, "status", msg=(self.short_name, text, time))
else:
wx.CallAfter(pub.sendMessage, "status", (self.short_name, text))
wx.CallAfter(pub.sendMessage, "status", (self.short_name, text, time))
wx.Yield()
def console_message(self, text):
......@@ -328,7 +441,7 @@ class TransitAnalysis:
elif len(self.transposons) == 1:
return "Intended for %s only" % self.transposons[0]
elif len(self.transposons) == 2:
return "Intended for %s && %s" % tuple(self.transposons)
return "Intended for %s or %s" % tuple(self.transposons)
else:
return "Intended for " + ", ".join(self.transposons[:-1]) + ", and " + self.transposons[-1]
......
......@@ -349,6 +349,7 @@ class BinomialMethod(base.SingleConditionMethod):
kp1c_std = 1.1
numpy.seterr(divide='ignore')
for i in range(1, sample_size):
i0 = Z[:,i-1] == 0; n0 = numpy.sum(i0);
......@@ -405,6 +406,7 @@ class BinomialMethod(base.SingleConditionMethod):
if Kp1_c <= 0: Kp1[i] = Kp1[i-1]
else:
fc = numpy.log(scipy.stats.gamma.pdf(Kp1_c, self.a1, self.b1));
f1 = numpy.log(scipy.stats.gamma.pdf(Kp1[i-1], self.a1, self.b1));
fc += numpy.sum(numpy.log(scipy.stats.beta.pdf(theta[i1,i], Kp1_c*rho1[i], Kp1_c*(1-rho1[i]))))
......@@ -440,10 +442,11 @@ class BinomialMethod(base.SingleConditionMethod):
#Update progress
text = "Running Gumbel Method... %2.0f%%" % (100.0*(i+1)/(sample_size))
text = "Running Binomial Method... %2.0f%%" % (100.0*(i+1)/(sample_size))
self.progress_update(text, i)
self.transit_message_inplace(text)
numpy.seterr(divide='warn')
z_bar = numpy.apply_along_axis(numpy.mean, 1, Z[:, self.burnin:])
theta_bar = numpy.apply_along_axis(numpy.mean, 1, theta[:, self.burnin:])
......@@ -457,7 +460,7 @@ class BinomialMethod(base.SingleConditionMethod):
memberstr = ""
for m in members:
memberstr += "%s = %s, " % (m, getattr(self, m))
self.output.write("#GUI with: ctrldata=%s, annotation=%s, output=%s, samples=%s, burnin=%s\n" % (",".join(self.ctrldata), self.annotation_path, self.output.name, self.samples, self.burnin))
self.output.write("#GUI with: ctrldata=%s, annotation=%s, output=%s, samples=%s, burnin=%s\n" % (",".join(self.ctrldata).encode('utf-8'), self.annotation_path.encode('utf-8'), self.output.name.encode('utf-8'), self.samples, self.burnin))
else:
self.output.write("#Console: python %s\n" % " ".join(sys.argv))
......
......@@ -167,7 +167,7 @@ class ExampleMethod(base.SingleConditionMethod):
#Get orf data
self.transit_message("Getting Data")
G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus)
G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, norm="TTR", ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus)
data = []
N = len(G)
......@@ -175,7 +175,11 @@ class ExampleMethod(base.SingleConditionMethod):
self.progress_range(N)
for gene in G:
count+=1
if gene.n == 0:
mean = 0.0
else:
mean = numpy.mean(gene.reads)
if gene.k == 0:
nzmean = 0.0
else:
......@@ -194,12 +198,12 @@ class ExampleMethod(base.SingleConditionMethod):
memberstr = ""
for m in members:
memberstr += "%s = %s, " % (m, getattr(self, m))
self.output.write("#GUI with: ctrldata=%s, annotation=%s, output=%s\n" % (",".join(self.ctrldata), self.annotation_path, self.output))
self.output.write("#GUI with: ctrldata=%s, annotation=%s, output=%s\n" % (",".join(self.ctrldata).encode('utf-8'), self.annotation_path.encode('utf-8'), self.output.name.encode('utf-8')))
else:
self.output.write("#Console: python %s\n" % " ".join(sys.argv))
self.output.write("#Data: %s\n" % (",".join(self.ctrldata)))
self.output.write("#Annotation path: %s\n" % (",".join(self.ctrldata)))
self.output.write("#Data: %s\n" % (",".join(self.ctrldata).encode('utf-8')))
self.output.write("#Annotation path: %s\n" % self.annotation_path.encode('utf-8'))
self.output.write("#Time: %s\n" % (time.time() - start_time))
self.output.write("#%s\n" % "\t".join(columns))
......
......@@ -118,6 +118,7 @@ class GriffinMethod(base.SingleConditionMethod):
ctrldata,
annotation_path,
output_file,
minread=1,
replicates="Sum",
normalization=None,
LOESS=False,
......@@ -125,7 +126,8 @@ class GriffinMethod(base.SingleConditionMethod):
NTerminus=0.0,
CTerminus=0.0, wxobj=None):
base.SingleConditionMethod.__init__(self, short_name, long_name, description, ctrldata, annotation_path, output_file, replicates=replicates, normalization=normalization, LOESS=LOESS, NTerminus=NTerminus, CTerminus=CTerminus, wxobj=wxobj)
base.SingleConditionMethod.__init__(self, short_name, long_name, description, ctrldata, annotation_path, output_file, replicates=replicates, normalization=normalization, LOESS=LOESS, ignoreCodon=ignoreCodon, NTerminus=NTerminus, CTerminus=CTerminus, wxobj=wxobj)
self.minread = minread
@classmethod
......@@ -147,6 +149,9 @@ class GriffinMethod(base.SingleConditionMethod):
return None
#
minread = 1
#Read the parameters from the wxPython widgets
ignoreCodon = True
NTerminus = float(wxobj.globalNTerminusText.GetValue())
......@@ -168,6 +173,7 @@ class GriffinMethod(base.SingleConditionMethod):
return self(ctrldata,
annotationPath,
output_file,
minread,
replicates,
normalization,
LOESS,
......@@ -185,16 +191,19 @@ class GriffinMethod(base.SingleConditionMethod):
outpath = args[2]
output_file = open(outpath, "w")
replicates = "Sum"
minread = int(kwargs.get("m", 1))
replicates = kwargs.get("r", "Sum")
normalization = None
LOESS = False
ignoreCodon = True
NTerminus = 0.0
CTerminus = 0.0
ignoreCodon = not kwargs.get("sC", False)
NTerminus = float(kwargs.get("iN", 0.0))
CTerminus = float(kwargs.get("iC", 0.0))
return self(ctrldata,
annotationPath,
output_file,
minread,
replicates,
normalization,
LOESS,
......@@ -210,7 +219,7 @@ class GriffinMethod(base.SingleConditionMethod):
#Get orf data
self.transit_message("Getting Data")
G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus)
G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, minread=self.minread, reps=self.replicates, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus)
N = len(G)
self.progress_range(N)
......@@ -246,12 +255,12 @@ class GriffinMethod(base.SingleConditionMethod):
memberstr = ""
for m in members:
memberstr += "%s = %s, " % (m, getattr(self, m))
self.output.write("#GUI with: ctrldata=%s, annotation=%s, output=%s\n" % (",".join(self.ctrldata), self.annotation_path, self.output.name))
self.output.write("#GUI with: ctrldata=%s, annotation=%s, output=%s\n" % (",".join(self.ctrldata).encode('utf-8'), self.annotation_path.encode('utf-8'), self.output.name.encode('utf-8')))
else:
self.output.write("#Console: python %s\n" % " ".join(sys.argv))
self.output.write("#Data: %s\n" % (",".join(self.ctrldata)))
self.output.write("#Annotation path: %s\n" % (",".join(self.ctrldata)))
self.output.write("#Data: %s\n" % (",".join(self.ctrldata).encode('utf-8')))
self.output.write("#Annotation path: %s\n" % self.annotation_path.encode('utf-8'))
self.output.write("#Time: %s\n" % (time.time() - start_time))
self.output.write("#%s\n" % "\t".join(columns))
......@@ -268,7 +277,15 @@ class GriffinMethod(base.SingleConditionMethod):
@classmethod
def usage_string(self):
return """python %s griffin <comma-separated .wig files> <annotation .prot_table> <output file>""" % (sys.argv[0])
return """python %s griffin <comma-separated .wig files> <annotation .prot_table> <output file> [Optional Arguments]
Optional Arguments:
-m <integer> := Smallest read-count to consider. Default: -m 1
-r <string> := How to handle replicates. Sum or Mean. Default: -r Sum
-sC := Include stop-codon (default is to ignore).
-iN <float> := Ignore TAs occuring at given fraction of the N terminus. Default: -iN 0.0
-iC <float> := Ignore TAs occuring at given fraction of the C terminus. Default: -iC 0.0
""" % (sys.argv[0])
if __name__ == "__main__":
......