Skip to content
Commits on Source (6)
......@@ -218,9 +218,18 @@
proteinortho6.pl replaced chomp with s/[\r\n]+$//
proteinortho_clustering.cpp fix bug that only uses lapack if -pld is set, regardless of the value.
11. Sept (uid: 3813)
updated shebang of ffadj such that python2.7 is used directly (ffadj fails if called with higher version of python)
-p=blastp is now alias of blastp+ and legacy blast is now -p=blastp_legacy (blastn is equivalent)
Makefile: static now includes -lquadmath
updated shebang of ffadj such that python2.7 is used directly (ffadj fails if called with higher version of python)
-p=blastp is now alias of blastp+ and legacy blast is now -p=blastp_legacy (blastn is equivalent)
Makefile: static now includes -lquadmath
25. Sept (uid: 3899)
synteny update to python3 (but the code looks fishy, the -synteny option now gets a deprecated warning)
proteinortho now only print html for <10 files automatically and otherwise only gives the option
synteny update to python3 (but the code looks fishy, the -synteny option now gets a deprecated warning)
proteinortho now only print html for <10 files automatically and otherwise only gives the option
4. Nov (uid: 4020)
FIXED: sometimes the python3 version produces one edditional edge (global defintion of ALPHA). Special thanks for this update goes to Daniel Doerr for fixing this.
25. Nov (uid: 4030)
added proteinortho_history
the synteny option ffadj is now not depricated anymore
10. Dec (uid: 4196)
improved proteinortho_history
removed the new diamond spam
+ added proteinortho_summary.pl for a summary of proteinortho-graph on species level.
......@@ -77,7 +77,7 @@ endif
dir_guard=@if [ ! -d $(BUILDDIR) ]; then echo "Creating build directory ..."; mkdir -p $(BUILDDIR); fi
.PHONY: all
all:$(BUILDDIR)/proteinortho_extract_from_graph.pl $(BUILDDIR)/proteinortho_compareProteinorthoGraphs.pl $(BUILDDIR)/proteinortho_grab_proteins.pl $(BUILDDIR)/proteinortho_formatUsearch.pl $(BUILDDIR)/proteinortho_do_mcl.pl $(BUILDDIR)/proteinortho2tree.pl $(BUILDDIR)/proteinortho2html.pl $(BUILDDIR)/proteinortho2xml.pl $(BUILDDIR)/proteinortho_singletons.pl $(BUILDDIR)/proteinortho_ffadj_mcs.py $(BUILDDIR)/proteinortho_clustering $(BUILDDIR)/proteinortho_graphMinusRemovegraph $(BUILDDIR)/proteinortho_cleanupblastgraph $(BUILDDIR)/proteinortho_treeBuilderCore
all:$(BUILDDIR)/proteinortho_extract_from_graph.pl $(BUILDDIR)/proteinortho_compareProteinorthoGraphs.pl $(BUILDDIR)/proteinortho_grab_proteins.pl $(BUILDDIR)/proteinortho_formatUsearch.pl $(BUILDDIR)/proteinortho_do_mcl.pl $(BUILDDIR)/proteinortho2tree.pl $(BUILDDIR)/proteinortho2html.pl $(BUILDDIR)/proteinortho2xml.pl $(BUILDDIR)/proteinortho_singletons.pl $(BUILDDIR)/proteinortho_summary.pl $(BUILDDIR)/proteinortho_ffadj_mcs.py $(BUILDDIR)/proteinortho_clustering $(BUILDDIR)/proteinortho_history.pl $(BUILDDIR)/proteinortho_graphMinusRemovegraph $(BUILDDIR)/proteinortho_cleanupblastgraph $(BUILDDIR)/proteinortho_treeBuilderCore
@echo "[100%] $(GREEN)Everything is compiled with no errors.$(NC)"
$(BUILDDIR)/proteinortho_extract_from_graph.pl: src/proteinortho_extract_from_graph.pl
......@@ -120,6 +120,14 @@ $(BUILDDIR)/proteinortho_ffadj_mcs.py: src/proteinortho_ffadj_mcs.py
$(dir_guard)
@cp $< $@
$(BUILDDIR)/proteinortho_history.pl: src/proteinortho_history.pl
$(dir_guard)
@cp $< $@
$(BUILDDIR)/proteinortho_summary.pl: src/proteinortho_summary.pl
$(dir_guard)
@cp $< $@
echoENV:
@echo -n "CC = "
@echo $(CC)
......@@ -224,7 +232,7 @@ else
endif
.PHONY: install
install: proteinortho6.pl proteinortho $(BUILDDIR)/proteinortho_extract_from_graph.pl $(BUILDDIR)/proteinortho_formatUsearch.pl $(BUILDDIR)/proteinortho_compareProteinorthoGraphs.pl $(BUILDDIR)/proteinortho_do_mcl.pl $(BUILDDIR)/proteinortho2html.pl $(BUILDDIR)/proteinortho2xml.pl $(BUILDDIR)/proteinortho_clustering $(BUILDDIR)/proteinortho_singletons.pl $(BUILDDIR)/proteinortho_ffadj_mcs.py $(BUILDDIR)/proteinortho2tree.pl $(BUILDDIR)/proteinortho_cleanupblastgraph $(BUILDDIR)/proteinortho_graphMinusRemovegraph $(BUILDDIR)/proteinortho_treeBuilderCore $(BUILDDIR)/proteinortho_grab_proteins.pl
install: proteinortho6.pl proteinortho $(BUILDDIR)/proteinortho_extract_from_graph.pl $(BUILDDIR)/proteinortho_formatUsearch.pl $(BUILDDIR)/proteinortho_compareProteinorthoGraphs.pl $(BUILDDIR)/proteinortho_do_mcl.pl $(BUILDDIR)/proteinortho2html.pl $(BUILDDIR)/proteinortho2xml.pl $(BUILDDIR)/proteinortho_clustering $(BUILDDIR)/proteinortho_singletons.pl $(BUILDDIR)/proteinortho_ffadj_mcs.py $(BUILDDIR)/proteinortho2tree.pl $(BUILDDIR)/proteinortho_history.pl $(BUILDDIR)/proteinortho_cleanupblastgraph $(BUILDDIR)/proteinortho_graphMinusRemovegraph $(BUILDDIR)/proteinortho_treeBuilderCore $(BUILDDIR)/proteinortho_grab_proteins.pl $(BUILDDIR)/proteinortho_summary.pl $(BUILDDIR)/proteinortho_history.pl
@echo "INSTALLING everything to $(INSTALLDIR)"
@install -v $^ $(INSTALLDIR);
@echo "$(GREEN)Everything installed successfully to $(INSTALLDIR).$(NC)"
......@@ -246,15 +254,15 @@ test_step2: proteinortho6.pl
echo "$(GREEN)passed$(NC)"; \
fi
# @echo -n " [2/12] -p=blastp+ synteny (PoFF) test: "
# @if [ "$(shell which blastp)" = "" ]; then\
# echo "$(ORANGE)blastp missing, skipping...$(NC)"; \
# else \
# ./proteinortho6.pl -silent -force -project=test_synteny -synteny -singles -p=blastp+ test/*.faa; \
# set -e ; ./src/chk_test.pl test_synteny.proteinortho.tsv; \
# set -e ; ./src/chk_test.pl test_synteny.poff.tsv; \
# echo "$(GREEN)passed$(NC)"; \
# fi
@echo -n " [2/12] -p=blastp+ synteny (PoFF) test: "
@if [ "$(shell which blastp)" = "" ]; then\
echo "$(ORANGE)blastp missing, skipping...$(NC)"; \
else \
./proteinortho6.pl -silent -force -project=test_synteny -synteny -singles -p=blastp+ test/*.faa; \
set -e ; ./src/chk_test.pl test_synteny.proteinortho.tsv; \
set -e ; ./src/chk_test.pl test_synteny.poff.tsv; \
echo "$(GREEN)passed$(NC)"; \
fi
@echo -n " [3/12] -p=diamond test: "
@if [ "$(shell which diamond)" = "" ]; then\
......
# Proteinortho
Proteinortho is a tool to detect orthologous genes within different species. For doing so, it compares similarities of given gene sequences and clusters them to find significant groups. The algorithm was designed to handle large-scale data and can be applied to hundreds of species at one. Details can be found in <a href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-124">Lechner et al., BMC Bioinformatics. 2011 Apr 28;12:124.</a>
To enhance the prediction accuracy, the relative order of genes (synteny) can be used as additional feature for the discrimination of orthologs. The corresponding extension, namely PoFF (doi:10.1371/journal.pone.0105015), is already build in Proteinortho. The general workflow of proteinortho is depicted [![here](https://www.dropbox.com/s/7ubl1ginn3fmf8k/proteinortho_workflow.jpg?dl=0)].
Proteinortho is a tool to detect orthologous genes within different species.
For doing so, it compares similarities of given gene sequences and clusters them to find significant groups.
The algorithm was designed to handle large-scale data and can be applied to hundreds of species at one.
Details can be found in ([doi:10.1186/1471-2105-12-124](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-124)).
To enhance the prediction accuracy, the relative order of genes (synteny) can be used as additional feature for the discrimination of orthologs. The corresponding extension, namely PoFF ([doi:10.1371/journal.pone.0105015](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0105015)), is already build in Proteinortho. The general workflow of proteinortho:
# New Features of Proteinortho Version 6!
<img src="https://www.uni-marburg.de/de/fb16/ipc/ag-lechner/graph.png/@@images/image/unimr_lead_image_sd" alt="proteinortho.workflow.png" height="250">
Input: multiple fasta files (orange box) with many proteins/genes (circles).
First an initial all vs. all comparison between all proteins of all species is performed to determine protein similarities (upper right image).
The second stage is the clustering of similar genes to meaningful co-orthologous groups (lower right image). Connected components within this graph can be considered as putative co-orthologous groups in theory and are returned in the output (lower left image).
# New Features of Proteinortho Version 6
- Implementation of various Blast alternatives for step (for -step=2 the -p= options): Diamond, MMseqs2, Last, Topaz, Rapsearch2, Blat, Ublast and Usearch
- Multithreading support for the clustering step (-step=3)
- Integration of the LAPACK Fortran Library for a faster clustering step (-step=3)
- Integration of the bitscore weights in the connectivity calculation for more data dependant splits (-step=3)
- Continuous Integration [![pipeline status](https://gitlab.com/paulklemm_PHD/proteinortho/badges/master/pipeline.svg)](https://gitlab.com/paulklemm_PHD/proteinortho/pipelines)
<details>
<summary>Minor features: (Click to expand)</summary>
<summary>Minor new features: (Click to expand)</summary>
- Output now supports OrthoXML (-xml) and HTML.
- [proteinortho_history.pl](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Tools%20and%20additional%20programs) a new tool for tracking proteins (or pairs of proteins) in the workflow of proteinortho.
- [proteinortho_summary.pl](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Tools%20and%20additional%20programs)
- Various test routines (make test).
- New heuristics for connectivity calculation (-step=3).
</details>
<details>
<summary>6.0.12: (Click to expand)</summary>
- removed the diamond spam
- improved [proteinortho_history.pl](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Tools%20and%20additional%20programs) : now the program is "smarter" in detecting files automatically
- added [proteinortho_summary.pl](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Tools%20and%20additional%20programs) : a tool for summarizing the proteinortho-graph on species level. With the output it is easy to identify weak connected species.
</details>
# Continuous Integration
supports
The badge
[![pipeline status](https://gitlab.com/paulklemm_PHD/proteinortho/badges/master/pipeline.svg)](https://gitlab.com/paulklemm_PHD/proteinortho/commits/master) indicates the current status of the continuous integration (CI) among various platforms (ubuntu, centos, debian, fedora) and GNU c++ versions (5, 6, latest)
The whole git repository gets deployed on a clean docker imager (gcc:latest,gcc:5,ubuntu:latest,fedora:latest,debian:latest,centos:latest) and compiled (make all) and tested (make test). The badge is green only if all test are passed. For more information see [Continuous Integration (proteinortho wiki)](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Continuous%20Integration).
A more detailed list of all changes: [CHANGELOG](https://gitlab.com/paulklemm_PHD/proteinortho/blob/master/CHANGELOG)
# Table of Contents
1. [Installation](#installation)
......@@ -30,12 +48,18 @@ The whole git repository gets deployed on a clean docker imager (gcc:latest,gcc:
4. [PoFF synteny extension](#poff)
5. [Output description](#output)
6. [Examples](#examples)
7. [Error Codes and Troubleshooting](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Error-Codes) <- look here if you cannot compile/run (proteinortho wiki)
8. [Large compute jobs example](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Large-compute-jobs-(the--jobs-option)) (proteinortho wiki)
9. [Biological example](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/biological-example) (proteinortho wiki)
Bug reports: See chapter 7. or send a mail to incoming+paulklemm-phd-proteinortho-7278443-issue-@incoming.gitlab.com (Please include the 'Parameter-vector' that is printed for all errors)
You can also send a mail to lechner@staff.uni-marburg.de.
# [Proteinortho-Wiki](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/) Table of Contents
1. [Tools and additional programs](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Tools%20and%20additional%20programs)
2. [Error Codes and Troubleshooting](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Error-Codes) <- look here if you cannot compile/run proteinortho
3. [Large compute jobs example](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Large-compute-jobs-(the--jobs-option))
4. [FAQ](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/FAQ) <br>
[(...)](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/)
Bug reports: Please have a look at chapter [2.](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Error-Codes) first or send a mail to incoming+paulklemm-phd-proteinortho-7278443-issue-@incoming.gitlab.com. (please include the 'parameter-vector' that is printed for all errors)
You can also send mails to lechner@staff.uni-marburg.de. Any suggestions, feedback and comments are welcome!
# Installation
......@@ -88,7 +112,7 @@ Afterwards the deb package can be installed with `sudo dpkg -i proteinortho*deb`
<br>
#### 1. Prerequisites
#### Prerequisites for compiling proteinortho from source
Proteinortho uses standard software which is often installed already or is part of then package repositories and can thus easily be installed. The sources come with a precompiled version of Proteinortho for 64bit Linux.
......@@ -126,7 +150,7 @@ Proteinortho uses standard software which is often installed already or is part
<br>
#### 2. Building and installing proteinortho from source (linux and osx)
#### Building and installing proteinortho from source (linux and osx)
Here you can use a working lapack library, check this with 'dpkg --get-selections | grep lapack'. Install lapack e.g. with 'apt-get install libatlas3-base' or liblapack3.
......@@ -179,7 +203,7 @@ OR(!) specify the new g++ in 'make CXX=/usr/local/bin/g++-7 all'
[100%] Everything is compiled with no errors.
</pre>
The compilation of proteinortho_clustering has multiple fall-back routines. If everything fails please look here [Troubleshooting (proteinortho wiki)](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Error%20Codes).
The compilation of proteinortho\_clustering has multiple fall-back routines. If everything fails please look here [Troubleshooting (proteinortho wiki)](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Error%20Codes).
</details>
......@@ -214,21 +238,13 @@ If you have problems compiling/running the program go to [Troubleshooting (prote
<br>
# SYNOPSIS
> **proteinortho6.pl [options] \<fasta file(s)\>** (one fasta for each species, at least 2)
OR
> **proteinortho [options] \<fasta file(s)\>**
one fasta for each species; at least 2
# DESCRIPTION
**proteinortho** is a tool to detect orthologous genes within different
species. For doing so, it compares similarities of given gene sequences
and clusters them to find significant groups. The algorithm was designed
to handle large-scale data and can be applied to hundreds of species at
one. Details can be found in Lechner et al., BMC Bioinformatics. 2011 Apr
28;12:124. To enhance the prediction accuracy, the relative order of genes
(synteny) can be used as additional feature for the discrimination of
orthologs. The corresponding extension, namely PoFF (doi:10.1371/journal.pone.0105015), is already build in Proteinortho.
species.
Proteinortho assumes, that you have all your gene sequences in FASTA
format either represented as amino acids or as nucleotides. The source
......@@ -302,6 +318,7 @@ Open `proteinorthoHelper.html` in your favorite browser or visit [lechnerlab.de/
- diamond : Only for protein files! standard diamond procedure and for
genes/proteins of length >40 with the additional --sensitive flag
Warning: Please use version 0.9.29 or later to avoid this known bug: https://gitlab.com/paulklemm_PHD/proteinortho/issues/24
- lastn,lastp : lastal. -n : dna files, -p protein files (BLOSUM62
scoring matrix)!
......@@ -343,7 +360,6 @@ Open `proteinorthoHelper.html` in your favorite browser or visit [lechnerlab.de/
<br>
**Synteny options (optional, step 2)**
(This option is deprecated)
(output: <myproject>.ffadj-graph, <myproject>.poff.tsv (tab separated file)-graph)
<details>
......@@ -537,6 +553,9 @@ Open `proteinorthoHelper.html` in your favorite browser or visit [lechnerlab.de/
</details>
<br>
[myproject.proteinortho-graph.summary](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Tools-and-additional-programs#proteinortho-graphblast-graph-species-summary-table)
<br>
<details>
<summary> myproject.proteinortho.html (Click to expand)</summary>
The html version of the myproject.proteinortho.tsv file
......
proteinortho (6.0.12+dfsg-1) unstable; urgency=medium
* Team upload.
* New upstream version
* Standards-Version: 4.4.1
* debian/copyright: use spaces rather than tabs to start continuation
lines.
-- Steffen Moeller <moeller@debian.org> Fri, 13 Dec 2019 00:17:49 +0100
proteinortho (6.0.8+dfsg-1) unstable; urgency=medium
* New upstream version ported to Python3
......
......@@ -7,7 +7,7 @@ Build-Depends: debhelper-compat (= 12),
ncbi-blast+,
liblapack-dev | libatlas-base-dev | liblapack.so,
diamond-aligner
Standards-Version: 4.4.0
Standards-Version: 4.4.1
Vcs-Browser: https://salsa.debian.org/med-team/proteinortho
Vcs-Git: https://salsa.debian.org/med-team/proteinortho.git
Homepage: https://gitlab.com/paulklemm_PHD/proteinortho
......@@ -16,10 +16,10 @@ Package: proteinortho
Architecture: any
Depends: ${shlibs:Depends},
${misc:Depends},
${python3:Depends},
ncbi-blast+,
diamond-aligner,
liblapack3,
python3
liblapack3
Description: Detection of (Co-)orthologs in large-scale protein analysis
Proteinortho is a stand-alone tool that is geared towards large datasets
and makes use of distributed computing techniques when run on multi-core
......
......@@ -3,7 +3,7 @@ Upstream-Name: Proteinortho
Upstream-Contact: Marcus Lechner <lechner@staff.uni-marburg.de>
Source: https://www.bioinf.uni-leipzig.de/Software/proteinortho/
Files-Excluded: */BUILD
*/lapack-*.tar.gz
*/lapack-*.tar.gz
Files: *
Copyright: 2009-2014 Marcus Lechner <lechner@staff.uni-marburg.de>
......
......@@ -21,3 +21,7 @@ override_dh_install:
for pl in `grep -Rl '#!/usr/bin/env[[:space:]]\+perl' debian/*/usr/*` ; do \
sed -i '1s?^#!/usr/bin/env[[:space:]]\+perl?#!/usr/bin/perl?' $${pl} ; \
done
override_dh_auto_clean:
dh_auto_clean
rm -f remove.graph test_blastp.blast-graph test_blastp.info test_blastp.proteinortho-graph test_blastp.proteinortho-graph.summary test_blastp.proteinortho.html test_blastp.proteinortho.tsv test_lastp.blast-graph test_lastp.info test_lastp.proteinortho-graph test_lastp.proteinortho-graph.summary test_lastp.proteinortho.html test_lastp.proteinortho.tsv test_synteny.blast-graph test_synteny.ffadj-graph test_synteny.info test_synteny.poff-graph test_synteny.poff.html test_synteny.poff.tsv test_synteny.proteinortho-graph test_synteny.proteinortho-graph.summary test_synteny.proteinortho.html test_synteny.proteinortho.tsv test_synteny.poff-graph.summary
proteinortho source: python3-depends-but-no-python3-helper proteinortho
This diff is collapsed.
......@@ -2,7 +2,7 @@ variables:
PROJECT_NAME: "Proteinortho"
before_script:
- echo "starting yml for Proteinortho"
- apt-get update && apt-get -y install cmake diffutils wget ncbi-blast+ time git
- apt-get update && apt-get -y install cmake diffutils wget ncbi-blast+ time git python3
stages:
- codequality
- test-precompiled-bins
......@@ -17,7 +17,7 @@ gcc-latest-alloptions:
- tar xzf diamond-linux64.tar.gz
- mkdir ~/bin
- cp diamond ~/bin
- perl proteinortho*pl -project=testasd -cpus=1 -ram=100 -verbose=2 -selfblast -silent -force -desc -checkfasta -cleanblast -debug -binpath=~/bin -tmp='~/' -e=0.000001 -sim=0.9 -identity=20 -cov=30 -subparaBlast='--more-sensitive' -synteny -dups=1 -cs=4 -alpha=0.4 -conn=0.01 -purity=0.00001 -minspecies=2 -subparaCluster='-cpus 1 -seed 1' -nograph -singles -xml -exactstep3 test/*faa >/dev/null 2>&1 && rm testasd*poff* && rm testasd*fadj* && rm testasd*info* && export LC_NUMERIC="C" && export LC_ALL="C" && for f in testasd.*; do sort $f >$f.testasd; done; sha256sum -b *testasd | tr -d '\n' | awk '{if($0 == "eb88ba29afd4f2dba16d3dbf97a5b0d2ab7686654a854f8502f0e778628e7f56 *testasd.descriptions.testasdf80df4c1a951bfb55b02300a273f6395694f01e8ae908e296d9c14a847d432ac *testasd.proteinortho.html.testasdfa18e9a0530f5a5754f045cfe97deaf818bdb5eb725619952633f1da0641cf7b *testasd.proteinortho.tsv.testasdc598b8c43e48e06614ec19e2f6b870e2737a7117a50ab2b1613880764d0884b2 *testasd.proteinortho.tsv.xml.testasd"){print $0." -> OK"; exit 0}else{print $0." -> failed"; exit 1}}'
- perl proteinortho*pl -project=testasd -cpus=1 -ram=100 -verbose=2 -selfblast -silent -force -desc -checkfasta -cleanblast -debug -binpath=~/bin -tmp='~/' -e=0.000001 -sim=0.9 -identity=20 -cov=30 -subparaBlast='--more-sensitive' -synteny -dups=1 -cs=4 -alpha=0.4 -conn=0.01 -purity=0.00001 -minspecies=2 -subparaCluster='-cpus 1 -seed 1' -nograph -singles -xml -exactstep3 test/*faa >/dev/null 2>&1 && rm testasd*poff* && rm testasd*fadj* && rm testasd*info* && export LC_NUMERIC="C" && export LC_ALL="C" && for f in testasd.*; do sort $f >$f.testasd; done; sha256sum -b *testasd | tr -d '\n' | awk '{if($0 == "eb88ba29afd4f2dba16d3dbf97a5b0d2ab7686654a854f8502f0e778628e7f56 *testasd.descriptions.testasde3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 *testasd.proteinortho-graph.summary.testasdf80df4c1a951bfb55b02300a273f6395694f01e8ae908e296d9c14a847d432ac *testasd.proteinortho.html.testasdfa18e9a0530f5a5754f045cfe97deaf818bdb5eb725619952633f1da0641cf7b *testasd.proteinortho.tsv.testasdc598b8c43e48e06614ec19e2f6b870e2737a7117a50ab2b1613880764d0884b2 *testasd.proteinortho.tsv.xml.testasd"){print $0." -> OK"; exit 0}else{print $0." -> failed"; exit 1}}'
gcc-latest-all-p:
image: gcc
......@@ -96,7 +96,7 @@ ubuntu-latest0:
image: ubuntu
stage: test-precompiled-bins
script:
- apt-get -y update && apt-get -y install gcc && apt-get -y install gfortran && apt-get -y install build-essential g++
- apt-get -y update && apt-get -y install gcc && apt-get -y install gfortran && apt-get -y install build-essential g++ && apt-get -y install python3
- echo "installing topaz"
- git clone https://github.com/ajm/topaz
- cd topaz/src
......@@ -115,7 +115,7 @@ ubuntu-latest:
image: ubuntu
stage: recompile-and-test
script:
- apt-get -y update && apt-get -y install gcc && apt-get -y install gfortran && apt-get -y install build-essential g++
- apt-get -y update && apt-get -y install gcc && apt-get -y install gfortran && apt-get -y install build-essential g++ && apt-get -y install python3
- echo "installing topaz"
- git clone https://github.com/ajm/topaz
- cd topaz/src
......@@ -136,7 +136,7 @@ debian-latest:
image: debian
stage: recompile-and-test
script:
- apt-get -y update && apt-get -y install gcc && apt-get -y install gfortran && apt-get -y install build-essential g++
- apt-get -y update && apt-get -y install gcc && apt-get -y install gfortran && apt-get -y install build-essential g++ && apt-get -y install python3
- echo "installing topaz"
- git clone https://github.com/ajm/topaz
- cd topaz/src
......@@ -153,34 +153,33 @@ debian-latest:
- make all
- make test
fedora-latest:
image: fedora
stage: test-precompiled-bins
script:
- yum -y groupinstall "Development Tools"
- yum -y install gcc-c++
- yum -y install cmake
- yum -y install make
- yum -y install tar
- yum -y install which
- yum -y install wget
- yum -y install libstdc++-static
- yum -y install lapack-static
- yum -y install cpan
- yum -y install python
- yum -y install ncbi-blast+
- cpan Thread::Queue
- wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/ncbi-blast*-x64-linux.tar.gz
- tar -xzvf ncbi-blast*-x64-linux.tar.gz
- cp ncbi-blast*/bin/blastp $HOME
- cp ncbi-blast*/bin/makeblastdb $HOME
- echo "installing diamond"
- wget http://github.com/bbuchfink/diamond/releases/download/v0.9.24/diamond-linux64.tar.gz
- tar xzf diamond-linux64.tar.gz
- cp diamond $HOME
- export PATH="$PATH:$HOME"
- echo "start proteinortho tests"
- make test
#fedora-latest:
# image: fedora
# stage: test-precompiled-bins
# script:
# - yum -y groupinstall "Development Tools"
# - yum -y install gcc-c++
# - yum -y install cmake
# - yum -y install make
# - yum -y install tar
# - yum -y install which
# - yum -y install wget
# - yum -y install libstdc++-static
# - yum -y install lapack-static
# - yum -y install cpan
# - yum -y install python
# - yum -y install ncbi-blast+
# - cpan Thread::Queue
# - wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/ncbi-blast*-x64-linux.tar.gz
# - tar -xzvf ncbi-blast*-x64-linux.tar.gz
# - cp ncbi-blast*/bin/blastp $HOME
# - cp ncbi-blast*/bin/makeblastdb $HOME
# - echo "installing diamond"
# - wget http://github.com/bbuchfink/diamond/releases/download/v0.9.24/diamond-linux64.tar.gz
# - tar xzf diamond-linux64.tar.gz
# - cp diamond $HOME
# - export PATH="$PATH:$HOME"
# - echo "start proteinortho tests"
centos-latest:
image: centos
......@@ -193,6 +192,7 @@ centos-latest:
- yum -y install tar
- yum -y install which
- yum -y install wget
- yum -y install gcc-gfortran
- wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/ncbi-blast*-x64-linux.tar.gz
- tar -xzvf ncbi-blast*-x64-linux.tar.gz
- cp ncbi-blast*/bin/blastp $HOME
......@@ -205,7 +205,6 @@ centos-latest:
- echo "start proteinortho tests"
- make clean
- make
- make test
code_quality:
image: docker:stable
......
#!/usr/bin/python3
#!/usr/bin/env python
from sys import stderr, exit, argv, maxsize
from copy import deepcopy
......@@ -8,9 +8,6 @@ from random import randint
from math import ceil
import logging as log
ALPHA = 1
class BothStrands:
def __eq__(self, x):
......@@ -59,9 +56,9 @@ class Run:
return len(self.weight)
def __str__(self):
return 'G1:%s-%s G2:%s-%s %s (%.5f)' % (self.startG1, self.endG1,
return 'G1:%s-%s G2:%s-%s %s' % (self.startG1, self.endG1,
self.startG2, self.endG2,
self.direction, self.getWeight(ALPHA))
self.direction)
def readDistsAndOrder(dist_file, edgeThreshold):
......@@ -133,11 +130,11 @@ def sort_genome(chrom_pos):
return telomeres, g
def insertIntoRunList(runs, runList):
keys = [x.getWeight(ALPHA) for x in runList]
def insertIntoRunList(runs, runList, alpha):
keys = [x.getWeight(alpha) for x in runList]
for run in runs:
i = bisect(keys, run.getWeight(ALPHA))
keys.insert(i, run.getWeight(ALPHA))
i = bisect(keys, run.getWeight(alpha))
keys.insert(i, run.getWeight(alpha))
runList.insert(i, run)
......@@ -363,7 +360,7 @@ def replaceByNew(g1_runs, g2_runs, i, j, r_old, r_new):
break
def doMatching(g1, g2, g1_runs, g2_runs, m, runList):
def doMatching(g1, g2, g1_runs, g2_runs, m, runList, alpha):
g1pos = dict(zip(g1, range(len(g1))))
g2pos = dict(zip(g2, range(len(g2))))
newRuns = set()
......@@ -455,6 +452,7 @@ def doMatching(g1, g2, g1_runs, g2_runs, m, runList):
g1_runs[g1pos[r.endG1]].remove(r)
r.startG2 = g2[j]
log.info('Divided overlapping run in %s and %s' % (r_new, r))
replaceByNew(g1_runs, g2_runs, g1pos[r_new.startG1],
g2pos[r_new.startG2], r, r_new)
newRuns.add(r_new)
......@@ -480,10 +478,10 @@ def doMatching(g1, g2, g1_runs, g2_runs, m, runList):
newRuns.add(r)
elif r in newRuns:
newRuns.remove(r)
insertIntoRunList(newRuns, runList)
insertIntoRunList(newRuns, runList, alpha)
def mergeRuns(mod_g1, g1, g2, g1_runs, g2_runs, runList, alreadyMatched):
def mergeRuns(mod_g1, g1, g2, g1_runs, g2_runs, runList, alreadyMatched, alpha):
g1pos = dict(zip(g1, range(len(g1))))
g2pos = dict(zip(g2, range(len(g2))))
......@@ -504,9 +502,9 @@ def mergeRuns(mod_g1, g1, g2, g1_runs, g2_runs, runList, alreadyMatched):
# points (mod_g1) can be processed.
for r1, r2 in product(sorted(g1_runs[i].difference(g1_runs[i+1]),
key=lambda x: x.getWeight(ALPHA), reverse=True),
key=lambda x: x.getWeight(alpha), reverse=True),
sorted(g1_runs[i+1].difference(g1_runs[i]),
key=lambda x: x.getWeight(ALPHA), reverse=True)):
key=lambda x: x.getWeight(alpha), reverse=True)):
if r1.endG1 == g1[i] and r2.startG1 == g1[i+1] and \
r1.direction == r2.direction and \
r1.endG1[0] == r2.startG1[0] and \
......@@ -538,7 +536,7 @@ def mergeRuns(mod_g1, g1, g2, g1_runs, g2_runs, runList, alreadyMatched):
if r1 in alreadyMatched:
alreadyMatched.remove(r1)
# redo matching in case r1 xor r2 were not in matching before
insertIntoRunList(newRuns, runList)
insertIntoRunList(newRuns, runList, alpha)
return r2, set(mod_g1[x+1:])
if r2 in alreadyMatched:
# actually, both are already matched
......@@ -548,7 +546,7 @@ def mergeRuns(mod_g1, g1, g2, g1_runs, g2_runs, runList, alreadyMatched):
# none is matched
newRuns.add(r2)
insertIntoRunList(newRuns, runList)
insertIntoRunList(newRuns, runList, alpha)
return None, []
......@@ -567,7 +565,7 @@ def removeSingleGenes(genome, genome_runs):
return del_res, mod_res
def findRandomRunSequence(g1, g2, dists, topXperCent):
def findRandomRunSequence(g1, g2, dists, topXperCent, alpha):
g2dists = dict()
for g1i, x in list(dists.items()):
for g2j, d in list(x.items()):
......@@ -585,11 +583,12 @@ def findRandomRunSequence(g1, g2, dists, topXperCent):
g1_runs, g2_runs, runs = getAllRuns(g1, g2, dists)
log.info('Found %s runs.' % len(runs))
# sort
runList = sorted(runs, key=lambda x: x.getWeight(ALPHA))
runList = sorted(runs, key=lambda x: x.getWeight(alpha))
res = set()
while runList:
noOfAdjacencies = len([x for x in runList if x.getWeight(ALPHA) and x.getWeight(ALPHA) or 0])
noOfAdjacencies = len([x for x in runList if x.getWeight(alpha) and
x.getWeight(alpha) or 0])
if noOfAdjacencies:
randPos = randint(1, ceil(noOfAdjacencies * topXperCent))
else:
......@@ -601,7 +600,7 @@ def findRandomRunSequence(g1, g2, dists, topXperCent):
while mx:
res.add(mx)
# update run list
doMatching(g1, g2, g1_runs, g2_runs, mx, runList)
doMatching(g1, g2, g1_runs, g2_runs, mx, runList, alpha)
del_g1, new_mod_g1 = removeSingleGenes(g1, g1_runs)
if del_g1:
log.info('Zombie genes removed from G1: %s' % ', '.join(map(str, del_g1)))
......@@ -631,7 +630,7 @@ def findRandomRunSequence(g1, g2, dists, topXperCent):
mod_g1.add(g1[g1pos[g1i]-1])
# merge runs
mx, mod_g1 = mergeRuns(mod_g1, g1, g2, g1_runs, g2_runs,
runList, res)
runList, res, alpha)
if res:
log.info('Matching finished. Longest run size is %s.' % (max(list(map(len, res)))))
......@@ -642,7 +641,7 @@ def findRandomRunSequence(g1, g2, dists, topXperCent):
def repeatMatching(g1, g2, g1_mod, g2_mod, g1_runs, g2_runs, dists, repMatching,
minCsSize, topXperCent):
minCsSize, topXperCent, alpha):
g1_mod_res = g1_mod
g2_mod_res = g2_mod
......@@ -680,7 +679,7 @@ def repeatMatching(g1, g2, g1_mod, g2_mod, g1_runs, g2_runs, dists, repMatching,
(noReps-repMatching+2))
break
g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns = findRandomRunSequence(g1, g2, dists, topXperCent)
g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns = findRandomRunSequence(g1, g2, dists, topXperCent, alpha)
checkMatching(g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns, dists)
log.info('Obtained %s adjacencies in matching of size %s from iteration %s.' %
......@@ -793,7 +792,6 @@ if __name__ == '__main__':
cli.add_argument('-a', '--alpha', type=float, metavar='F', default=0.5)
cli.add_argument('dist_file')
args = cli.parse_args()
AlPHA = args.alpha
repMatching = args.repeat_matching
if repMatching > 0:
repMatching -= 1
......@@ -802,14 +800,16 @@ if __name__ == '__main__':
format="%(levelname)s\t%(asctime)s\t++ %(message)s")
multiChrom, g1, g2, dists = readDistsAndOrder(args.dist_file, args.edge_weight_threshold)
g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns = findRandomRunSequence(g1, g2, dists, args.greedy)
g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns = findRandomRunSequence(g1,
g2, dists, args.greedy, args.alpha)
checkMatching(g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns, dists)
# calculate number of breakpoints only from result of the first matching
bkp = len(selectedRuns) - 1
g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns_new = repeatMatching(g1, g2,
g1_mod, g2_mod, g1_runs, g2_runs, dists, repMatching, args.min_cs_size, args.greedy)
g1_mod, g2_mod, g1_runs, g2_runs, dists, repMatching,
args.min_cs_size, args.greedy, args.alpha)
selectedRuns.update(selectedRuns_new)
......@@ -833,3 +833,4 @@ if __name__ == '__main__':
print('#bkp\t#edg\tadj\tedg')
print('%s\t%s\t%.6f\t%.6f' % (bkp, edg, wAdj, wEdg))
This diff is collapsed.
#!/usr/bin/env perl
#pk
##########################################################################################
# This file is part of proteinortho.
# (C) 2009 Marcus Lechner
#
# proteinortho is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published
# by the Free Software Foundation; either version 2, or (at your
# option) any later version.
#
# proteinortho is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with proteinortho; see the file COPYING. If not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
##########################################################################################
##########################################################################################
# About
##########################################################################################
#
# @author Paul Klemm
# @email klemmp@staff.uni-marburg.de
# @company Bioinformatics, University of Leipzig
# @version 1
# @date 11-12-2019
#
##########################################################################################
use POSIX;
my $usage = "
proteinortho_summary.pl produces a summary on species level.
SYNOPSIS
proteinortho_summary.pl (options) GRAPH (GRAPH2)
GRAPH Path to the *.proteinortho-graph or *.blast-graph file generated by proteinortho.
GRAPH2 (optional) If you provide a blast-graph AND a proteinortho-graph, the difference is calculated (GRAPH - GRAPH2)
Note: The *.proteinortho.tsv file does not work here (use the proteinortho-graph file)
OPTIONS
-format,-f enables the table formatting instead of the plain csv output.
";
my $graphfilenameA="";
my $graphfilenameB="";
my $notableformat=1;
for(my $v = 0 ; $v < scalar @ARGV ; $v++){
if($ARGV[$v] =~ m/^--?(help|h)$/){$help=1;}
elsif($ARGV[$v] =~ m/^--?(format|f)$/){$notableformat=0;}
elsif($ARGV[$v] =~ m/^-.+/){ print $usage; print STDERR "ERROR: invalid option ".$ARGV[$v]."!\n\n";exit(1);}
elsif($graphfilenameA eq ""){$graphfilenameA = $ARGV[$v];}
elsif($graphfilenameB eq ""){$graphfilenameB = $ARGV[$v];}
}
if ($help){
print $usage;
exit(0);
}
my $fail="";
if ($graphfilenameA eq ""){
$fail.="ERROR: GRAPH not provided!\n";
}
if($fail ne ""){
print $usage.$fail;
exit(1);
}
our $maxNumOfCharsInOneLine=`tput cols`;
chomp($maxNumOfCharsInOneLine);
if($maxNumOfCharsInOneLine<10){$maxNumOfCharsInOneLine=160;}
our $split_delim="[:\t]";
our @spl_header;
our @spl;
our $last_isHeaderLine=0;
our $last_isHeaderLine=0;$isHeaderLine=1;
our $noheader=0;
my %species_matrix;
my %species_matrix_pow2;
my $currentSpeciesA;
my $currentSpeciesB;
open(my $FH,"<",$graphfilenameA) || die $!;
while(<$FH>){
if($_ eq ""){next;}
chomp;
if($_ eq "# file_a file_b" || $_ eq "# a b evalue_ab bitscore_ab evalue_ba bitscore_ba"){next;}
my @arr=split("\t",$_);
if(substr($_,0,1) eq "#" && scalar @arr == 2){
$currentSpeciesA=$arr[0];
$currentSpeciesB=$arr[1];
$currentSpeciesA=~s/^# ?//g;
}elsif(substr($_,0,1) ne "#" && scalar @arr == 6){
if(!exists $species_matrix{$currentSpeciesA}{$currentSpeciesB}){
$species_matrix{$currentSpeciesA}{$currentSpeciesB} = 1;
$species_matrix{$currentSpeciesB}{$currentSpeciesA} = 1;
$species_matrix_pow2{$currentSpeciesA}{$currentSpeciesB} = 0;
$species_matrix_pow2{$currentSpeciesB}{$currentSpeciesA} = 0;
}else{
$species_matrix{$currentSpeciesA}{$currentSpeciesB} ++;
$species_matrix{$currentSpeciesB}{$currentSpeciesA} ++;
}
}elsif( !(substr($_,0,1) eq "#" && scalar @arr == 4) ){
print STDERR "[STDERR] Error: wrong fromat... Please make sure you only provide *.blast-graph or *.proteinortho-graph files as input...\n";die;
}
}
close($FH);
if($graphfilenameB ne ""){
open(my $FH,"<",$graphfilenameB) || die $!;
while(<$FH>){
if($_ eq ""){next;}
chomp;
my @arr=split("\t",$_);
if(substr($_,0,1) eq "#" && scalar @arr == 2){
$currentSpeciesA=$arr[0];
$currentSpeciesB=$arr[1];
$currentSpeciesA=~s/^# ?//g;
}elsif(substr($_,0,1) ne "#"){
if(!exists $species_matrix{$currentSpeciesA}{$currentSpeciesB}){
$species_matrix{$currentSpeciesA}{$currentSpeciesB} = 1;
$species_matrix{$currentSpeciesB}{$currentSpeciesA} = 1;
$species_matrix_pow2{$currentSpeciesA}{$currentSpeciesB} = 0;
$species_matrix_pow2{$currentSpeciesB}{$currentSpeciesA} = 0;
}else{
$species_matrix{$currentSpeciesA}{$currentSpeciesB} --;
$species_matrix{$currentSpeciesB}{$currentSpeciesA} --;
}
}
}
close($FH);
}
my @keys=sort keys %species_matrix;
$noheader=0;$last_isHeaderLine=0;$isHeaderLine=1;@spl_header=();@spl=();
print STDERR "\n";
my $ret= "# The adjacency matrix, the number of edges between 2 species\n";
processLine($ret);
$ret= "# file\t";
for(my $i = 0 ; $i < scalar @keys; $i++){
if(scalar @keys>10 && !$notableformat){$ret.= "($i)\t";}
else{$ret.=$keys[$i]."\t";}
}
$ret.= "\n";
processLine($ret);
for(my $i = 0 ; $i < scalar @keys; $i++){
if(scalar @keys >10 && !$notableformat){
$ret=$keys[$i]."($i)\t";
}else{
$ret=$keys[$i]."\t";
}
for(my $j = 0 ; $j < scalar @keys; $j++){
if($i==$j){$species_matrix{$keys[$i]}{$keys[$j]}=0;}
$ret.= $species_matrix{$keys[$i]}{$keys[$j]};
if($j<scalar @keys -1){$ret.="\t";}
}
$ret.= "\n";
processLine($ret);
}
$noheader=0;$last_isHeaderLine=0;$isHeaderLine=1;@spl_header=();@spl=();
$maxNumOfCharsInOneLine=`tput cols`;
chomp($maxNumOfCharsInOneLine);$maxNumOfCharsInOneLine/=2;
if($maxNumOfCharsInOneLine<10){$maxNumOfCharsInOneLine=160;}
print STDERR "\n";
$ret= "# file\taverage number of edges\n";
processLine($ret);
for(my $i = 0 ; $i < scalar @keys; $i++){
$ret= $keys[$i]."\t";
my $sum=0;
for(my $j = 0 ; $j < scalar @keys; $j++){
$sum+=$species_matrix{$keys[$i]}{$keys[$j]};
}
$ret.= $sum/scalar @keys;
if($j<scalar @keys -1){$ret.= "\t";}
$ret.= "\n";
processLine($ret);
}
$noheader=0;$last_isHeaderLine=0;$isHeaderLine=1;@spl_header=();@spl=();
$maxNumOfCharsInOneLine=`tput cols`;
chomp($maxNumOfCharsInOneLine);
if($maxNumOfCharsInOneLine<10){$maxNumOfCharsInOneLine=160;}
print STDERR "\n";
$ret= "# The 2-path matrix, the number of paths between 2 species of length 2\n";
processLine($ret);
$ret= "# file\t";
for(my $i = 0 ; $i < scalar @keys; $i++){
if(scalar @keys>10 && !$notableformat){$ret.= "($i)\t";}
else{$ret.=$keys[$i]."\t";}
}
$ret.= "\n";
processLine($ret);
for(my $i = 0 ; $i < scalar @keys; $i++){
$ret= $keys[$i]."($i)\t";
for(my $j = 0 ; $j < scalar @keys; $j++){
if($i<$i+1){
for(my $k = 0 ; $k < scalar @keys; $k++){
$species_matrix_pow2{$keys[$i]}{$keys[$j]}+=$species_matrix{$keys[$i]}{$keys[$k]}*$species_matrix{$keys[$k]}{$keys[$j]};
$species_matrix_pow2{$keys[$j]}{$keys[$i]}=$species_matrix_pow2{$keys[$i]}{$keys[$j]};
}
}
$ret.= $species_matrix_pow2{$keys[$i]}{$keys[$j]};
if($j<scalar @keys -1){$ret.= "\t";}
}
$ret.= "\n";
processLine($ret);
}
$noheader=0;$last_isHeaderLine=0;$isHeaderLine=1;@spl_header=();@spl=();
$maxNumOfCharsInOneLine=`tput cols`;
chomp($maxNumOfCharsInOneLine);$maxNumOfCharsInOneLine/=2;
if($maxNumOfCharsInOneLine<10){$maxNumOfCharsInOneLine=160;}
print STDERR "\n";
processLine("# file\taverage number of 2-paths\n");
for(my $i = 0 ; $i < scalar @keys; $i++){
my $sum=0;
for(my $j = 0 ; $j < scalar @keys; $j++){
$sum+=$species_matrix_pow2{$keys[$i]}{$keys[$j]};
}
processLine($keys[$i]."($i)\t".($sum/scalar @keys)."\n");
}
sub processLine{
$_=shift;chomp;
if($notableformat == 1){print "$_\n";return 1;}
if(length($_)<1){return 1;}
@spl=split($split_delim,$_);
if(scalar @spl <2){print "$_\n";return 1;}
@spl_backup=@spl;
if(scalar @spl_header > 0 && scalar @spl != scalar @spl_header){$isHeaderLine=1;}
if(scalar @spl < 2 ){return 1;}
if(substr($spl[0],0,1) eq "#"){$spl[0]=~s/^# ?//g;}
if(scalar(@spl)*2-1>$maxNumOfCharsInOneLine){$maxNumOfCharsInOneLine= -1+2*scalar @spl;print STDERR "Corrected minimum table width: -w=$maxNumOfCharsInOneLine such that at least 1 character per column is displayed.\n";}
$sumOfCharsLine=length(join("",@spl));
if($isHeaderLine){ # is a header row
while(($sumOfCharsLine + scalar @spl-1) > $maxNumOfCharsInOneLine){ # shave of chars from widest cell
$max_l=0;
@max_l_is;
for (my $i = 0; $i < scalar @spl; $i++) {
if($max_l < length $spl[$i]){$max_l=length $spl[$i];@max_l_is=();push(@max_l_is,$i)}elsif($max_l == length $spl[$i]){push(@max_l_is,$i)}
}
for (my $i = 0; $i < scalar @max_l_is; $i++) {
if(length $spl[$max_l_is[$i]] > 8 && substr($spl[$max_l_is[$i]],-3) ne "..." ){
$spl[$max_l_is[$i]]=substr($spl[$max_l_is[$i]],0,length($spl[$max_l_is[$i]])-3-1)."..."
}
else{
$spl[$max_l_is[$i]]=substr($spl_backup[$max_l_is[$i]],0,length($spl[$max_l_is[$i]])-1)
}
}
$sumOfCharsLine=length(join("",@spl));
}
while(($sumOfCharsLine + scalar @spl-1) < $maxNumOfCharsInOneLine ){ # add of chars to smallest cell
$min_l=$maxNumOfCharsInOneLine*10;
@min_l_is;
for (my $i = 0; $i < scalar @spl; $i++) {
if($min_l > length $spl[$i]){$min_l=length $spl[$i];@min_l_is=();push(@min_l_is,$i)}
}
for (my $i = 0; $i < scalar @min_l_is; $i++) {
$leftPad=0;
$rightPad=0;
if($spl[$min_l_is[$i]]=~m/( +)$/){$rightPad=length $1}
if($spl[$min_l_is[$i]]=~m/^( +)/){$leftPad=length $1}
if( $leftPad < $rightPad ){
$spl[$min_l_is[$i]]=" ".$spl[$min_l_is[$i]];
}else{
$spl[$min_l_is[$i]]=$spl[$min_l_is[$i]]." ";
}
}
$sumOfCharsLine=length(join("",@spl));
}
@spl_header=@spl;
}else{ # is not headerline -> do the same as in headerline
while(scalar @spl > scalar @spl_header){pop @spl;}
for (my $i = 0; $i < scalar @spl; $i++) {
while(length $spl[$i]< length $spl_header[$i]){ # add pads
$leftPad=0;
$rightPad=0;
if($spl[$i]=~m/( +)$/){$rightPad=length $1}
if($spl[$i]=~m/^( +)/){$leftPad=length $1}
if( $leftPad < $rightPad ){
$spl[$i]=" ".$spl[$i];
}else{
$spl[$i]=$spl[$i]." ";
}
}
while(length $spl[$i]>length $spl_header[$i]){ # trim
if(length $spl[$i] > 5 && substr($spl[$i],-3) ne "..." ){
$spl[$i]=substr($spl[$i],0,length($spl[$i])-3-1)."..."
}
else{
$spl[$i]=substr($spl_backup[$i],0,length($spl[$i])-2)."#"
}
}
}
}
if($isHeaderLine && !$last_isHeaderLine ){$tmp=join("|",@spl);$tmp=~s/\|/+/g;$tmp=~s/[^+]/-/g; print "$tmp\n";}
print join("|",@spl);
if($isHeaderLine ){print "\n";$tmp=join("|",@spl);$tmp=~s/\|/+/g;$tmp=~s/[^+]/-/g; print "$tmp";}
print "\n";
$last_isHeaderLine=$isHeaderLine;
$isHeaderLine=0;
}