Commit f5161553 authored by Michael R. Crusoe's avatar Michael R. Crusoe

Imported Upstream version 1.0.0

parents
*.gem
*.rbc
*.so
.bundle
.config
Gemfile.lock
coverage
test_data
InstalledFiles
lib/bundler/man
pkg
rdoc
spec/reports
test/tmp
test/version_tmp
tmp
*~
.#*
\#*
*so
dryrun
# YARD artifacts
.yardoc
_yardoc
doc/
.ruby-version
# large test files not for repo
dryrun
*.blast
*.phr
*.pin
*.psq
*.nhr
*.nin
*.nsq
*.bt2
*.fai
*.bam
*.csv
*.coverage
# c extension build artefacts
Makefile
transrate.bundle
transrate.o
.RUBYARCHDIR.time
## Fix Trinity Fastq Output
Trinity outputs fastq files that have been 'corrected'
Unfortunately these files are no longer properly paired and therefore can't be used in most aligners such as bowtie2 and snap
This script attempts to fix that and produce 2 properly paired fastq files and a fastq file containing single reads
version 1.0
Author: Chris Boursnell
cmb211@cam.ac.uk
\ No newline at end of file
require 'rake/testtask'
Rake::TestTask.new do |t|
t.libs << 'test'
end
desc "Run tests"
task :default => :test
#!/usr/bin/env ruby
# fix bullshit trinity output files
require 'trollop'
require 'fix-trinity-output'
ARGV[0] = "--help" if ARGV.length() == 0
opts = Trollop::options do
banner <<-EOS
fix-trinity-output v1.0
Trinity outputs fastq files that have been 'corrected'
Unfortunately these files are no longer properly paired and therefore
can't be used in most aligners such as bowtie2 and snap
This script attempts to fix that and produce 2 properly paired fastq
files and a fastq file containing single reads
Options:
EOS
opt :left, "fastq file of left reads", :type => String, :required => true
opt :right, "fastq file of right reads", :type => String, :required => true
opt :output, "output prefix", :type => String, :required => true
end
left = File.expand_path(opts.left)
right = File.expand_path(opts.right)
if left and right
if !File.exist?(left)
Trollop::die "#{left} not found"
end
if !File.exist?(right)
Trollop::die "#{right} not found"
end
end
fixer = Fixer.new
fixer.run(left, right, opts.output)
\ No newline at end of file
Gem::Specification.new do |gem|
gem.name = 'fix-trinity-output'
gem.version = '1.0.0'
gem.date = '2014-10-23'
gem.summary = "Fix output fastq files from Trinity"
gem.description = "See readme"
gem.authors = ["Chris Boursnell"]
gem.email = 'cmb211@cam.ac.uk'
gem.files = `git ls-files`.split("\n")
gem.executables = ["fix-trinity-output"]
gem.require_paths = %w( lib )
gem.homepage = 'http://rubygems.org/gems/fix-trinity-output'
gem.license = 'MIT'
gem.add_dependency 'trollop', '~> 2.0'
gem.add_development_dependency 'rake', '~> 10.3', '>= 10.3.2'
gem.add_development_dependency 'turn', '~> 0.9', '>= 0.9.7'
gem.add_development_dependency 'simplecov', '~> 0.8', '>= 0.8.2'
gem.add_development_dependency 'shoulda-context', '~> 1.2', '>= 1.2.1'
end
require 'fix-trinity-output/fix-trinity-output.rb'
\ No newline at end of file
#!/usr/bin/env ruby
class Fixer
def initialize
end
def run left, right, prefix
leftout = ""
rightout = ""
buffersize = 200000
buffer = 0
data = {}
cont = true
lh = File.open(left)
rh = File.open(right)
lout = File.open("#{prefix}-fixed.1.fastq", "w")
rout = File.open("#{prefix}-fixed.2.fastq", "w")
count=0
name1 = lh.readline.chomp rescue nil
name2 = rh.readline.chomp rescue nil
while cont
seq1 = lh.readline rescue nil
plus1 = lh.readline rescue nil
quals1 = lh.readline rescue nil
seq2 = rh.readline rescue nil
plus2 = rh.readline rescue nil
quals2 = rh.readline rescue nil
if name1
shortname1 = name1[0..-3]
if data[shortname1]
leftout << "#{name1}\n#{seq1}+\n#{quals1}"
rightout << data[shortname1][1]
buffer+=1
data.delete(shortname1)
if buffer>=buffersize
lout.write leftout
leftout=""
rout.write rightout
rightout=""
buffer=0
end
else
data[shortname1]=[]
data[shortname1][0]="#{name1}\n#{seq1}+\n#{quals1}"
end
end
if name2
shortname2 = name2[0..-3]
if data[shortname2]
leftout << data[shortname2][0]
rightout << "#{name2}\n#{seq2}+\n#{quals2}"
buffer+=1
data.delete(shortname2)
if buffer>=buffersize
lout.write leftout
leftout=""
rout.write rightout
rightout=""
buffer=0
end
else
data[shortname2]=[]
data[shortname2][1]="#{name2}\n#{seq2}+\n#{quals2}"
end
end
name1 = lh.readline.chomp rescue nil
name2 = rh.readline.chomp rescue nil
if name1==nil and name2==nil
cont=false
end
end
lout.write leftout
rout.write rightout
lout.close
rout.close
File.open("#{prefix}-fixed.single.fastq", "w") do |out|
data.each do |name, list|
if list
if list[0]
out.write list[0]
end
if list[1]
out.write list[1]
end
end
end
end
end
end
\ No newline at end of file
--- !ruby/object:Gem::Specification
name: fix-trinity-output
version: !ruby/object:Gem::Version
version: 1.0.0
platform: ruby
authors:
- Chris Boursnell
autorequire:
bindir: bin
cert_chain: []
date: 2014-10-23 00:00:00.000000000 Z
dependencies:
- !ruby/object:Gem::Dependency
name: trollop
requirement: !ruby/object:Gem::Requirement
requirements:
- - "~>"
- !ruby/object:Gem::Version
version: '2.0'
type: :runtime
prerelease: false
version_requirements: !ruby/object:Gem::Requirement
requirements:
- - "~>"
- !ruby/object:Gem::Version
version: '2.0'
- !ruby/object:Gem::Dependency
name: rake
requirement: !ruby/object:Gem::Requirement
requirements:
- - "~>"
- !ruby/object:Gem::Version
version: '10.3'
- - ">="
- !ruby/object:Gem::Version
version: 10.3.2
type: :development
prerelease: false
version_requirements: !ruby/object:Gem::Requirement
requirements:
- - "~>"
- !ruby/object:Gem::Version
version: '10.3'
- - ">="
- !ruby/object:Gem::Version
version: 10.3.2
- !ruby/object:Gem::Dependency
name: turn
requirement: !ruby/object:Gem::Requirement
requirements:
- - "~>"
- !ruby/object:Gem::Version
version: '0.9'
- - ">="
- !ruby/object:Gem::Version
version: 0.9.7
type: :development
prerelease: false
version_requirements: !ruby/object:Gem::Requirement
requirements:
- - "~>"
- !ruby/object:Gem::Version
version: '0.9'
- - ">="
- !ruby/object:Gem::Version
version: 0.9.7
- !ruby/object:Gem::Dependency
name: simplecov
requirement: !ruby/object:Gem::Requirement
requirements:
- - "~>"
- !ruby/object:Gem::Version
version: '0.8'
- - ">="
- !ruby/object:Gem::Version
version: 0.8.2
type: :development
prerelease: false
version_requirements: !ruby/object:Gem::Requirement
requirements:
- - "~>"
- !ruby/object:Gem::Version
version: '0.8'
- - ">="
- !ruby/object:Gem::Version
version: 0.8.2
- !ruby/object:Gem::Dependency
name: shoulda-context
requirement: !ruby/object:Gem::Requirement
requirements:
- - "~>"
- !ruby/object:Gem::Version
version: '1.2'
- - ">="
- !ruby/object:Gem::Version
version: 1.2.1
type: :development
prerelease: false
version_requirements: !ruby/object:Gem::Requirement
requirements:
- - "~>"
- !ruby/object:Gem::Version
version: '1.2'
- - ">="
- !ruby/object:Gem::Version
version: 1.2.1
description: See readme
email: cmb211@cam.ac.uk
executables:
- fix-trinity-output
extensions: []
extra_rdoc_files: []
files:
- ".gitignore"
- Gemfile
- README.md
- Rakefile
- bin/fix-trinity-output
- fix-trinity-output.gemspec
- lib/fix-trinity-output.rb
- lib/fix-trinity-output/fix-trinity-output.rb
- test/helper.rb
- test/left.fq
- test/right.fq
- test/test_test.rb
homepage: http://rubygems.org/gems/fix-trinity-output
licenses:
- MIT
metadata: {}
post_install_message:
rdoc_options: []
require_paths:
- lib
required_ruby_version: !ruby/object:Gem::Requirement
requirements:
- - ">="
- !ruby/object:Gem::Version
version: '0'
required_rubygems_version: !ruby/object:Gem::Requirement
requirements:
- - ">="
- !ruby/object:Gem::Version
version: '0'
requirements: []
rubyforge_project:
rubygems_version: 2.2.2
signing_key:
specification_version: 4
summary: Fix output fastq files from Trinity
test_files: []
require 'simplecov'
require 'test/unit'
begin; require 'turn/autorun'; rescue LoadError; end
require 'shoulda-context'
require 'fix-trinity-output'
Turn.config.format = :pretty
Turn.config.trace = 5
This diff is collapsed.
This diff is collapsed.
#!/usr/bin/env ruby
require 'helper'
require 'tmpdir'
class TestFixer < Test::Unit::TestCase
context 'fixer' do
setup do
@fixer = Fixer.new
@tmpdir = Dir.mktmpdir
end
teardown do
cmd = "rm -rf #{@tmpdir}"
`#{cmd}`
end
should 'fix pair of fastq files' do
left = File.join(File.dirname(__FILE__), 'left.fq')
right = File.join(File.dirname(__FILE__), 'right.fq')
prefix = "#{@tmpdir}/test"
@fixer.run left, right, prefix
assert File.exist?("#{@tmpdir}/test-fixed.1.fastq")
assert File.exist?("#{@tmpdir}/test-fixed.2.fastq")
assert File.exist?("#{@tmpdir}/test-fixed.single.fastq")
lines = `wc -l #{@tmpdir}/test-fixed.1.fastq`
assert_equal 876, lines.split(/\s+/).first.to_i
lines = `wc -l #{@tmpdir}/test-fixed.2.fastq`
assert_equal 876, lines.split(/\s+/).first.to_i
assert_equal 762, File.size("#{@tmpdir}/test-fixed.single.fastq")
end
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment