Steffen Möller · Steffen Möller · Steffen Möller · Steffen Möller · Steffen Möller · Steffen Möller
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -218,9 +218,18 @@
        proteinortho6.pl replaced chomp with s/[\r\n]+$//
        proteinortho_clustering.cpp fix bug that only uses lapack if -pld is set, regardless of the value.
    11. Sept (uid: 3813)
-	updated shebang of ffadj such that python2.7 is used directly (ffadj fails if called with higher version of python)
-	-p=blastp is now alias of blastp+ and legacy blast is now -p=blastp_legacy (blastn is equivalent)
-	Makefile: static now includes -lquadmath
+		updated shebang of ffadj such that python2.7 is used directly (ffadj fails if called with higher version of python)
+		-p=blastp is now alias of blastp+ and legacy blast is now -p=blastp_legacy (blastn is equivalent)
+		Makefile: static now includes -lquadmath
    25. Sept (uid: 3899)
-	synteny update to python3 (but the code looks fishy, the -synteny option now gets a deprecated warning)
-	proteinortho now only print html for <10 files automatically and otherwise only gives the option
+		synteny update to python3 (but the code looks fishy, the -synteny option now gets a deprecated warning)
+		proteinortho now only print html for <10 files automatically and otherwise only gives the option
+    4. Nov (uid: 4020)
+		FIXED: sometimes the python3 version produces one edditional edge (global defintion of ALPHA). Special thanks for this update goes to Daniel Doerr for fixing this.
+	25. Nov (uid: 4030)
+		added proteinortho_history
+		the synteny option ffadj is now not depricated anymore
+    10. Dec (uid: 4196)
+    	improved proteinortho_history
+    	removed the new diamond spam
+        + added proteinortho_summary.pl for a summary of proteinortho-graph on species level.
--- a/CHANGEUID
+++ b/CHANGEUID
-3899
+4196
--- a/Makefile
+++ b/Makefile
@@ -77,7 +77,7 @@ endif
 dir_guard=@if [ ! -d $(BUILDDIR) ]; then echo "Creating build directory ..."; mkdir -p $(BUILDDIR); fi

 .PHONY: all
-all:$(BUILDDIR)/proteinortho_extract_from_graph.pl $(BUILDDIR)/proteinortho_compareProteinorthoGraphs.pl $(BUILDDIR)/proteinortho_grab_proteins.pl $(BUILDDIR)/proteinortho_formatUsearch.pl $(BUILDDIR)/proteinortho_do_mcl.pl $(BUILDDIR)/proteinortho2tree.pl $(BUILDDIR)/proteinortho2html.pl $(BUILDDIR)/proteinortho2xml.pl $(BUILDDIR)/proteinortho_singletons.pl $(BUILDDIR)/proteinortho_ffadj_mcs.py $(BUILDDIR)/proteinortho_clustering $(BUILDDIR)/proteinortho_graphMinusRemovegraph $(BUILDDIR)/proteinortho_cleanupblastgraph $(BUILDDIR)/proteinortho_treeBuilderCore
+all:$(BUILDDIR)/proteinortho_extract_from_graph.pl $(BUILDDIR)/proteinortho_compareProteinorthoGraphs.pl $(BUILDDIR)/proteinortho_grab_proteins.pl $(BUILDDIR)/proteinortho_formatUsearch.pl $(BUILDDIR)/proteinortho_do_mcl.pl $(BUILDDIR)/proteinortho2tree.pl $(BUILDDIR)/proteinortho2html.pl $(BUILDDIR)/proteinortho2xml.pl $(BUILDDIR)/proteinortho_singletons.pl $(BUILDDIR)/proteinortho_summary.pl $(BUILDDIR)/proteinortho_ffadj_mcs.py $(BUILDDIR)/proteinortho_clustering $(BUILDDIR)/proteinortho_history.pl $(BUILDDIR)/proteinortho_graphMinusRemovegraph $(BUILDDIR)/proteinortho_cleanupblastgraph $(BUILDDIR)/proteinortho_treeBuilderCore
 	@echo "[100%] $(GREEN)Everything is compiled with no errors.$(NC)"

 $(BUILDDIR)/proteinortho_extract_from_graph.pl: src/proteinortho_extract_from_graph.pl
@@ -120,6 +120,14 @@ $(BUILDDIR)/proteinortho_ffadj_mcs.py: src/proteinortho_ffadj_mcs.py
 	$(dir_guard)
 	@cp $< $@

+$(BUILDDIR)/proteinortho_history.pl: src/proteinortho_history.pl
+	$(dir_guard)
+	@cp $< $@
+
+$(BUILDDIR)/proteinortho_summary.pl: src/proteinortho_summary.pl
+	$(dir_guard)
+	@cp $< $@
+
 echoENV:
 	@echo -n "CC = "
 	@echo $(CC)
@@ -224,7 +232,7 @@ else
 endif

 .PHONY: install
-install: proteinortho6.pl proteinortho $(BUILDDIR)/proteinortho_extract_from_graph.pl $(BUILDDIR)/proteinortho_formatUsearch.pl $(BUILDDIR)/proteinortho_compareProteinorthoGraphs.pl $(BUILDDIR)/proteinortho_do_mcl.pl $(BUILDDIR)/proteinortho2html.pl $(BUILDDIR)/proteinortho2xml.pl $(BUILDDIR)/proteinortho_clustering $(BUILDDIR)/proteinortho_singletons.pl $(BUILDDIR)/proteinortho_ffadj_mcs.py $(BUILDDIR)/proteinortho2tree.pl $(BUILDDIR)/proteinortho_cleanupblastgraph $(BUILDDIR)/proteinortho_graphMinusRemovegraph $(BUILDDIR)/proteinortho_treeBuilderCore $(BUILDDIR)/proteinortho_grab_proteins.pl
+install: proteinortho6.pl proteinortho $(BUILDDIR)/proteinortho_extract_from_graph.pl $(BUILDDIR)/proteinortho_formatUsearch.pl $(BUILDDIR)/proteinortho_compareProteinorthoGraphs.pl $(BUILDDIR)/proteinortho_do_mcl.pl $(BUILDDIR)/proteinortho2html.pl $(BUILDDIR)/proteinortho2xml.pl $(BUILDDIR)/proteinortho_clustering $(BUILDDIR)/proteinortho_singletons.pl $(BUILDDIR)/proteinortho_ffadj_mcs.py $(BUILDDIR)/proteinortho2tree.pl $(BUILDDIR)/proteinortho_history.pl $(BUILDDIR)/proteinortho_cleanupblastgraph $(BUILDDIR)/proteinortho_graphMinusRemovegraph $(BUILDDIR)/proteinortho_treeBuilderCore $(BUILDDIR)/proteinortho_grab_proteins.pl $(BUILDDIR)/proteinortho_summary.pl $(BUILDDIR)/proteinortho_history.pl 
 	@echo "INSTALLING everything to $(INSTALLDIR)"
 	@install -v $^ $(INSTALLDIR);
 	@echo "$(GREEN)Everything installed successfully to $(INSTALLDIR).$(NC)"
@@ -246,15 +254,15 @@ test_step2: proteinortho6.pl
 		echo "$(GREEN)passed$(NC)"; \
 	fi

-#	@echo -n " [2/12] -p=blastp+ synteny (PoFF) test: "
-#	@if [ "$(shell which blastp)" = "" ]; then\
-#		echo "$(ORANGE)blastp missing, skipping...$(NC)"; \
-#	else \
-#		./proteinortho6.pl -silent -force -project=test_synteny -synteny -singles -p=blastp+ test/*.faa; \
-#		set -e ; ./src/chk_test.pl test_synteny.proteinortho.tsv; \
-#		set -e ; ./src/chk_test.pl test_synteny.poff.tsv; \
-#		echo "$(GREEN)passed$(NC)"; \
-#	fi
+	@echo -n " [2/12] -p=blastp+ synteny (PoFF) test: "
+	@if [ "$(shell which blastp)" = "" ]; then\
+		echo "$(ORANGE)blastp missing, skipping...$(NC)"; \
+	else \
+		./proteinortho6.pl -silent -force -project=test_synteny -synteny -singles -p=blastp+ test/*.faa; \
+		set -e ; ./src/chk_test.pl test_synteny.proteinortho.tsv; \
+		set -e ; ./src/chk_test.pl test_synteny.poff.tsv; \
+		echo "$(GREEN)passed$(NC)"; \
+	fi

 	@echo -n " [3/12] -p=diamond test: "
 	@if [ "$(shell which diamond)" = "" ]; then\

--- a/README.md
+++ b/README.md
 # Proteinortho

- Proteinortho is a tool to detect orthologous genes within different species. For doing so, it compares similarities of given gene sequences and clusters them to find significant groups. The algorithm was designed to handle large-scale data and can be applied to hundreds of species at one. Details can be found in <a href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-124">Lechner et al., BMC Bioinformatics. 2011 Apr 28;12:124.</a>
-To enhance the prediction accuracy, the relative order of genes (synteny) can be used as additional feature for the discrimination of orthologs. The corresponding extension, namely PoFF (doi:10.1371/journal.pone.0105015), is already build in Proteinortho. The general workflow of proteinortho is depicted [![here](https://www.dropbox.com/s/7ubl1ginn3fmf8k/proteinortho_workflow.jpg?dl=0)].
+Proteinortho is a tool to detect orthologous genes within different species.
+ 
+For doing so, it compares similarities of given gene sequences and clusters them to find significant groups. 
+The algorithm was designed to handle large-scale data and can be applied to hundreds of species at one. 
+Details can be found in ([doi:10.1186/1471-2105-12-124](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-124)).
+To enhance the prediction accuracy, the relative order of genes (synteny) can be used as additional feature for the discrimination of orthologs. The corresponding extension, namely PoFF ([doi:10.1371/journal.pone.0105015](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0105015)), is already build in Proteinortho. The general workflow of proteinortho: 

-# New Features of Proteinortho Version 6!
+<img src="https://www.uni-marburg.de/de/fb16/ipc/ag-lechner/graph.png/@@images/image/unimr_lead_image_sd" alt="proteinortho.workflow.png" height="250">
+
+Input: multiple fasta files (orange box) with many proteins/genes (circles). 
+
+First an initial all vs. all comparison between all proteins of all species is performed to determine protein similarities (upper right image). 
+
+The second stage is the clustering of similar genes to meaningful co-orthologous groups (lower right image). Connected components within this graph can be considered as putative co-orthologous groups in theory and are returned in the output (lower left image).
+
+# New Features of Proteinortho Version 6

  - Implementation of various Blast alternatives for step (for -step=2 the -p= options): Diamond, MMseqs2, Last, Topaz, Rapsearch2, Blat, Ublast and Usearch
  - Multithreading support for the clustering step (-step=3)
  - Integration of the LAPACK Fortran Library for a faster clustering step (-step=3)
  - Integration of the bitscore weights in the connectivity calculation for more data dependant splits (-step=3)
+  - Continuous Integration [![pipeline status](https://gitlab.com/paulklemm_PHD/proteinortho/badges/master/pipeline.svg)](https://gitlab.com/paulklemm_PHD/proteinortho/pipelines) 
 <details>
-  <summary>Minor features: (Click to expand)</summary>
+  <summary>Minor new features: (Click to expand)</summary>

  - Output now supports OrthoXML (-xml) and HTML.
+  - [proteinortho_history.pl](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Tools%20and%20additional%20programs) a new tool for tracking proteins (or pairs of proteins) in the workflow of proteinortho.
+  - [proteinortho_summary.pl](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Tools%20and%20additional%20programs)
  - Various test routines (make test).
  - New heuristics for connectivity calculation (-step=3).
 </details>
+<details>
+  <summary>6.0.12: (Click to expand)</summary>
+
+  - removed the diamond spam
+  - improved [proteinortho_history.pl](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Tools%20and%20additional%20programs) : now the program is "smarter" in detecting files automatically
+  - added [proteinortho_summary.pl](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Tools%20and%20additional%20programs) : a tool for summarizing the proteinortho-graph on species level. With the output it is easy to identify weak connected species.   
+</details>

-# Continuous Integration
-supports
-The badge
-[![pipeline status](https://gitlab.com/paulklemm_PHD/proteinortho/badges/master/pipeline.svg)](https://gitlab.com/paulklemm_PHD/proteinortho/commits/master) indicates the current status of the continuous integration (CI) among various platforms (ubuntu, centos, debian, fedora) and GNU c++ versions (5, 6, latest)
-The whole git repository gets deployed on a clean docker imager (gcc:latest,gcc:5,ubuntu:latest,fedora:latest,debian:latest,centos:latest) and compiled (make all) and tested (make test). The badge is green only if all test are passed. For more information see [Continuous Integration (proteinortho wiki)](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Continuous%20Integration).
+A more detailed list of all changes: [CHANGELOG](https://gitlab.com/paulklemm_PHD/proteinortho/blob/master/CHANGELOG)

 # Table of Contents
 1. [Installation](#installation)
@@ -30,12 +48,18 @@ The whole git repository gets deployed on a clean docker imager (gcc:latest,gcc:
 4. [PoFF synteny extension](#poff)
 5. [Output description](#output)
 6. [Examples](#examples)
-7. [Error Codes and Troubleshooting](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Error-Codes) <- look here if you cannot compile/run (proteinortho wiki)
-8. [Large compute jobs example](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Large-compute-jobs-(the--jobs-option)) (proteinortho wiki)
-9. [Biological example](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/biological-example) (proteinortho wiki)

-Bug reports: See chapter 7. or send a mail to incoming+paulklemm-phd-proteinortho-7278443-issue-@incoming.gitlab.com (Please include the 'Parameter-vector' that is printed for all errors)
-You can also send a mail to lechner@staff.uni-marburg.de.
+# [Proteinortho-Wiki](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/) Table of Contents
+
+1. [Tools and additional programs](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Tools%20and%20additional%20programs)
+2. [Error Codes and Troubleshooting](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Error-Codes) <- look here if you cannot compile/run proteinortho
+3. [Large compute jobs example](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Large-compute-jobs-(the--jobs-option))
+4. [FAQ](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/FAQ) <br>
+[(...)](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/)
+
+Bug reports: Please have a look at chapter [2.](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Error-Codes) first or send a mail to incoming+paulklemm-phd-proteinortho-7278443-issue-@incoming.gitlab.com. (please include the 'parameter-vector' that is printed for all errors)
+You can also send mails to lechner@staff.uni-marburg.de. Any suggestions, feedback and comments are welcome!
+

 # Installation

@@ -88,7 +112,7 @@ Afterwards the deb package can be installed with `sudo dpkg -i proteinortho*deb`

 <br>

-#### 1. Prerequisites
+#### Prerequisites for compiling proteinortho from source

 Proteinortho uses standard software which is often installed already or is part of then package repositories and can thus easily be installed. The sources come with a precompiled version of Proteinortho for 64bit Linux.

@@ -126,7 +150,7 @@ Proteinortho uses standard software which is often installed already or is part

 <br>

-#### 2. Building and installing proteinortho from source (linux and osx)
+#### Building and installing proteinortho from source (linux and osx)

  Here you can use a working lapack library, check this with 'dpkg --get-selections | grep lapack'. Install lapack e.g. with 'apt-get install libatlas3-base' or liblapack3.

@@ -179,7 +203,7 @@ OR(!) specify the new g++ in 'make CXX=/usr/local/bin/g++-7 all'
 [100%] Everything is compiled with no errors.
 </pre>

-The compilation of proteinortho_clustering has multiple fall-back routines. If everything fails please look here [Troubleshooting (proteinortho wiki)](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Error%20Codes).
+The compilation of proteinortho\_clustering has multiple fall-back routines. If everything fails please look here [Troubleshooting (proteinortho wiki)](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Error%20Codes).

 </details>

@@ -214,21 +238,13 @@ If you have problems compiling/running the program go to [Troubleshooting (prote
 <br>

 # SYNOPSIS
-  > **proteinortho6.pl [options] \<fasta file(s)\>** (one fasta for each species, at least 2)
-
-  OR
-
  > **proteinortho [options] \<fasta file(s)\>**
+ 
+   one fasta for each species; at least 2

 # DESCRIPTION
  **proteinortho** is a tool to detect orthologous genes within different
-  species. For doing so, it compares similarities of given gene sequences
-  and clusters them to find significant groups. The algorithm was designed
-  to handle large-scale data and can be applied to hundreds of species at
-  one. Details can be found in Lechner et al., BMC Bioinformatics. 2011 Apr
-  28;12:124. To enhance the prediction accuracy, the relative order of genes
-  (synteny) can be used as additional feature for the discrimination of
-  orthologs. The corresponding extension, namely PoFF (doi:10.1371/journal.pone.0105015), is already build in Proteinortho.
+  species. 

  Proteinortho assumes, that you have all your gene sequences in FASTA
  format either represented as amino acids or as nucleotides. The source
@@ -302,6 +318,7 @@ Open `proteinorthoHelper.html` in your favorite browser or visit [lechnerlab.de/

        - diamond : Only for protein files! standard diamond procedure and for
        genes/proteins of length >40 with the additional --sensitive flag
+        Warning: Please use version 0.9.29 or later to avoid this known bug: https://gitlab.com/paulklemm_PHD/proteinortho/issues/24

        - lastn,lastp : lastal. -n : dna files, -p protein files (BLOSUM62
        scoring matrix)!
@@ -343,7 +360,6 @@ Open `proteinorthoHelper.html` in your favorite browser or visit [lechnerlab.de/
 <br>

 **Synteny options (optional, step 2)**
-  (This option is deprecated)
  (output: <myproject>.ffadj-graph, <myproject>.poff.tsv (tab separated file)-graph)

 <details>
@@ -537,6 +553,9 @@ Open `proteinorthoHelper.html` in your favorite browser or visit [lechnerlab.de/
 </details>
 <br>

+[myproject.proteinortho-graph.summary](https://gitlab.com/paulklemm_PHD/proteinortho/wikis/Tools-and-additional-programs#proteinortho-graphblast-graph-species-summary-table)
+
+ <br>
 <details>
  <summary> myproject.proteinortho.html (Click to expand)</summary>
    The html version of the myproject.proteinortho.tsv file

--- a/debian/changelog
+++ b/debian/changelog
+proteinortho (6.0.12+dfsg-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * Standards-Version: 4.4.1
+  * debian/copyright: use spaces rather than tabs to start continuation
+    lines.
+
+ -- Steffen Moeller <moeller@debian.org>  Fri, 13 Dec 2019 00:17:49 +0100
+
 proteinortho (6.0.8+dfsg-1) unstable; urgency=medium

  * New upstream version ported to Python3

--- a/debian/control
+++ b/debian/control
@@ -7,7 +7,7 @@ Build-Depends: debhelper-compat (= 12),
               ncbi-blast+,
               liblapack-dev | libatlas-base-dev | liblapack.so,
               diamond-aligner
-Standards-Version: 4.4.0
+Standards-Version: 4.4.1
 Vcs-Browser: https://salsa.debian.org/med-team/proteinortho
 Vcs-Git: https://salsa.debian.org/med-team/proteinortho.git
 Homepage: https://gitlab.com/paulklemm_PHD/proteinortho
@@ -16,10 +16,10 @@ Package: proteinortho
 Architecture: any
 Depends: ${shlibs:Depends},
         ${misc:Depends},
+         ${python3:Depends},
         ncbi-blast+,
         diamond-aligner,
-         liblapack3,
-         python3
+         liblapack3
 Description: Detection of (Co-)orthologs in large-scale protein analysis
 Proteinortho is a stand-alone tool that is geared towards large datasets
 and makes use of distributed computing techniques when run on multi-core

--- a/debian/copyright
+++ b/debian/copyright
@@ -3,7 +3,7 @@ Upstream-Name: Proteinortho
 Upstream-Contact: Marcus Lechner <lechner@staff.uni-marburg.de>
 Source: https://www.bioinf.uni-leipzig.de/Software/proteinortho/
 Files-Excluded: */BUILD
-		*/lapack-*.tar.gz
+ 		*/lapack-*.tar.gz

 Files: *
 Copyright: 2009-2014 Marcus Lechner <lechner@staff.uni-marburg.de>

--- a/debian/rules
+++ b/debian/rules
@@ -21,3 +21,7 @@ override_dh_install:
 	for pl in `grep -Rl '#!/usr/bin/env[[:space:]]\+perl' debian/*/usr/*` ; do \
 	    sed -i '1s?^#!/usr/bin/env[[:space:]]\+perl?#!/usr/bin/perl?' $${pl} ; \
 	done
+
+override_dh_auto_clean:
+	dh_auto_clean
+	rm -f  remove.graph test_blastp.blast-graph test_blastp.info test_blastp.proteinortho-graph test_blastp.proteinortho-graph.summary test_blastp.proteinortho.html test_blastp.proteinortho.tsv test_lastp.blast-graph test_lastp.info test_lastp.proteinortho-graph test_lastp.proteinortho-graph.summary test_lastp.proteinortho.html test_lastp.proteinortho.tsv test_synteny.blast-graph test_synteny.ffadj-graph test_synteny.info test_synteny.poff-graph test_synteny.poff.html test_synteny.poff.tsv test_synteny.proteinortho-graph test_synteny.proteinortho-graph.summary test_synteny.proteinortho.html test_synteny.proteinortho.tsv test_synteny.poff-graph.summary
--- a/debian/source/lintian-overrides
+++ b/debian/source/lintian-overrides
+proteinortho source: python3-depends-but-no-python3-helper proteinortho
--- a/proteinortho6.pl
+++ b/proteinortho6.pl
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -2,7 +2,7 @@ variables:
  PROJECT_NAME: "Proteinortho"
 before_script:
  - echo "starting yml for Proteinortho"
-  - apt-get update && apt-get -y install cmake diffutils wget ncbi-blast+ time git
+  - apt-get update && apt-get -y install cmake diffutils wget ncbi-blast+ time git python3
 stages:
  - codequality
  - test-precompiled-bins
@@ -17,7 +17,7 @@ gcc-latest-alloptions:
  - tar xzf diamond-linux64.tar.gz
  - mkdir ~/bin
  - cp diamond ~/bin
-  - perl proteinortho*pl -project=testasd -cpus=1 -ram=100 -verbose=2 -selfblast -silent -force -desc -checkfasta -cleanblast -debug -binpath=~/bin -tmp='~/' -e=0.000001 -sim=0.9 -identity=20 -cov=30 -subparaBlast='--more-sensitive' -synteny -dups=1 -cs=4 -alpha=0.4 -conn=0.01 -purity=0.00001 -minspecies=2 -subparaCluster='-cpus 1 -seed 1' -nograph -singles -xml -exactstep3 test/*faa >/dev/null 2>&1 && rm testasd*poff* && rm testasd*fadj* && rm testasd*info* && export LC_NUMERIC="C" && export LC_ALL="C" && for f in testasd.*; do sort $f >$f.testasd; done; sha256sum -b *testasd | tr -d '\n' | awk '{if($0 == "eb88ba29afd4f2dba16d3dbf97a5b0d2ab7686654a854f8502f0e778628e7f56 *testasd.descriptions.testasdf80df4c1a951bfb55b02300a273f6395694f01e8ae908e296d9c14a847d432ac *testasd.proteinortho.html.testasdfa18e9a0530f5a5754f045cfe97deaf818bdb5eb725619952633f1da0641cf7b *testasd.proteinortho.tsv.testasdc598b8c43e48e06614ec19e2f6b870e2737a7117a50ab2b1613880764d0884b2 *testasd.proteinortho.tsv.xml.testasd"){print $0." -> OK"; exit 0}else{print $0." -> failed"; exit 1}}'
+  - perl proteinortho*pl -project=testasd -cpus=1 -ram=100 -verbose=2 -selfblast -silent -force -desc -checkfasta -cleanblast -debug -binpath=~/bin -tmp='~/' -e=0.000001 -sim=0.9 -identity=20 -cov=30 -subparaBlast='--more-sensitive' -synteny -dups=1 -cs=4 -alpha=0.4 -conn=0.01 -purity=0.00001 -minspecies=2 -subparaCluster='-cpus 1 -seed 1' -nograph -singles -xml -exactstep3 test/*faa >/dev/null 2>&1 && rm testasd*poff* && rm testasd*fadj* && rm testasd*info* && export LC_NUMERIC="C" && export LC_ALL="C" && for f in testasd.*; do sort $f >$f.testasd; done; sha256sum -b *testasd | tr -d '\n' | awk '{if($0 == "eb88ba29afd4f2dba16d3dbf97a5b0d2ab7686654a854f8502f0e778628e7f56 *testasd.descriptions.testasde3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 *testasd.proteinortho-graph.summary.testasdf80df4c1a951bfb55b02300a273f6395694f01e8ae908e296d9c14a847d432ac *testasd.proteinortho.html.testasdfa18e9a0530f5a5754f045cfe97deaf818bdb5eb725619952633f1da0641cf7b *testasd.proteinortho.tsv.testasdc598b8c43e48e06614ec19e2f6b870e2737a7117a50ab2b1613880764d0884b2 *testasd.proteinortho.tsv.xml.testasd"){print $0." -> OK"; exit 0}else{print $0." -> failed"; exit 1}}'

 gcc-latest-all-p:
  image: gcc
@@ -96,7 +96,7 @@ ubuntu-latest0:
  image: ubuntu
  stage: test-precompiled-bins
  script:
-  - apt-get -y update && apt-get -y install gcc && apt-get -y install gfortran && apt-get -y install build-essential g++
+  - apt-get -y update && apt-get -y install gcc && apt-get -y install gfortran && apt-get -y install build-essential g++ && apt-get -y install python3
  - echo "installing topaz"
  - git clone https://github.com/ajm/topaz
  - cd topaz/src
@@ -115,7 +115,7 @@ ubuntu-latest:
  image: ubuntu
  stage: recompile-and-test
  script:
-  - apt-get -y update && apt-get -y install gcc && apt-get -y install gfortran && apt-get -y install build-essential g++
+  - apt-get -y update && apt-get -y install gcc && apt-get -y install gfortran && apt-get -y install build-essential g++ && apt-get -y install python3
  - echo "installing topaz"
  - git clone https://github.com/ajm/topaz
  - cd topaz/src
@@ -136,7 +136,7 @@ debian-latest:
  image: debian
  stage: recompile-and-test
  script:
-  - apt-get -y update && apt-get -y install gcc && apt-get -y install gfortran && apt-get -y install build-essential g++
+  - apt-get -y update && apt-get -y install gcc && apt-get -y install gfortran && apt-get -y install build-essential g++ && apt-get -y install python3
  - echo "installing topaz"
  - git clone https://github.com/ajm/topaz
  - cd topaz/src
@@ -153,34 +153,33 @@ debian-latest:
  - make all
  - make test

-fedora-latest:
-  image: fedora
-  stage: test-precompiled-bins
-  script:
-  - yum -y groupinstall "Development Tools" 
-  - yum -y install gcc-c++
-  - yum -y install cmake
-  - yum -y install make
-  - yum -y install tar
-  - yum -y install which
-  - yum -y install wget
-  - yum -y install libstdc++-static
-  - yum -y install lapack-static
-  - yum -y install cpan
-  - yum -y install python
-  - yum -y install ncbi-blast+
-  - cpan Thread::Queue
-  - wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/ncbi-blast*-x64-linux.tar.gz
-  - tar -xzvf ncbi-blast*-x64-linux.tar.gz
-  - cp ncbi-blast*/bin/blastp $HOME
-  - cp ncbi-blast*/bin/makeblastdb $HOME
-  - echo "installing diamond"
-  - wget http://github.com/bbuchfink/diamond/releases/download/v0.9.24/diamond-linux64.tar.gz
-  - tar xzf diamond-linux64.tar.gz
-  - cp diamond $HOME
-  - export PATH="$PATH:$HOME"
-  - echo "start proteinortho tests"
-  - make test
+#fedora-latest:
+#  image: fedora
+#  stage: test-precompiled-bins
+#  script:
+#  - yum -y groupinstall "Development Tools" 
+#  - yum -y install gcc-c++
+#  - yum -y install cmake
+#  - yum -y install make
+#  - yum -y install tar
+#  - yum -y install which
+#  - yum -y install wget
+#  - yum -y install libstdc++-static
+#  - yum -y install lapack-static
+#  - yum -y install cpan
+#  - yum -y install python
+#  - yum -y install ncbi-blast+
+#  - cpan Thread::Queue
+#  - wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/ncbi-blast*-x64-linux.tar.gz
+#  - tar -xzvf ncbi-blast*-x64-linux.tar.gz
+#  - cp ncbi-blast*/bin/blastp $HOME
+#  - cp ncbi-blast*/bin/makeblastdb $HOME
+#  - echo "installing diamond"
+#  - wget http://github.com/bbuchfink/diamond/releases/download/v0.9.24/diamond-linux64.tar.gz
+#  - tar xzf diamond-linux64.tar.gz
+#  - cp diamond $HOME
+#  - export PATH="$PATH:$HOME"
+#  - echo "start proteinortho tests"

 centos-latest:
  image: centos
@@ -193,6 +192,7 @@ centos-latest:
  - yum -y install tar
  - yum -y install which
  - yum -y install wget
+  - yum -y install gcc-gfortran
  - wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/ncbi-blast*-x64-linux.tar.gz
  - tar -xzvf ncbi-blast*-x64-linux.tar.gz
  - cp ncbi-blast*/bin/blastp $HOME
@@ -205,7 +205,6 @@ centos-latest:
  - echo "start proteinortho tests"
  - make clean
  - make
-  - make test
 
 code_quality:
  image: docker:stable

--- a/src/proteinortho_ffadj_mcs.py
+++ b/src/proteinortho_ffadj_mcs.py
-#!/usr/bin/python3
+#!/usr/bin/env python

 from sys import stderr, exit, argv, maxsize
 from copy import deepcopy
@@ -8,9 +8,6 @@ from random import randint
 from math import ceil
 import logging as log

-ALPHA = 1
-
-
 class BothStrands:

    def __eq__(self, x):
@@ -59,9 +56,9 @@ class Run:
        return len(self.weight)

    def __str__(self):
-        return 'G1:%s-%s G2:%s-%s %s (%.5f)' % (self.startG1, self.endG1,
+        return 'G1:%s-%s G2:%s-%s %s' % (self.startG1, self.endG1,
                                                self.startG2, self.endG2,
-                                                self.direction, self.getWeight(ALPHA))
+                                                self.direction)


 def readDistsAndOrder(dist_file, edgeThreshold):
@@ -133,11 +130,11 @@ def sort_genome(chrom_pos):
    return telomeres, g


-def insertIntoRunList(runs, runList):
-    keys = [x.getWeight(ALPHA) for x in runList]
+def insertIntoRunList(runs, runList, alpha):
+    keys = [x.getWeight(alpha) for x in runList]
    for run in runs:
-        i = bisect(keys, run.getWeight(ALPHA))
-        keys.insert(i, run.getWeight(ALPHA))
+        i = bisect(keys, run.getWeight(alpha))
+        keys.insert(i, run.getWeight(alpha))
        runList.insert(i, run)


@@ -363,7 +360,7 @@ def replaceByNew(g1_runs, g2_runs, i, j, r_old, r_new):
            break


-def doMatching(g1, g2, g1_runs, g2_runs, m, runList):
+def doMatching(g1, g2, g1_runs, g2_runs, m, runList, alpha):
    g1pos = dict(zip(g1, range(len(g1))))
    g2pos = dict(zip(g2, range(len(g2))))
    newRuns = set()
@@ -455,6 +452,7 @@ def doMatching(g1, g2, g1_runs, g2_runs, m, runList):
                    g1_runs[g1pos[r.endG1]].remove(r)
                r.startG2 = g2[j]
                log.info('Divided overlapping run in %s and %s' % (r_new, r))
+
                replaceByNew(g1_runs, g2_runs, g1pos[r_new.startG1],
                             g2pos[r_new.startG2], r, r_new)
                newRuns.add(r_new)
@@ -480,10 +478,10 @@ def doMatching(g1, g2, g1_runs, g2_runs, m, runList):
                newRuns.add(r)
            elif r in newRuns:
                newRuns.remove(r)
-    insertIntoRunList(newRuns, runList)
+    insertIntoRunList(newRuns, runList, alpha)


-def mergeRuns(mod_g1, g1, g2, g1_runs, g2_runs, runList, alreadyMatched):
+def mergeRuns(mod_g1, g1, g2, g1_runs, g2_runs, runList, alreadyMatched, alpha):
    g1pos = dict(zip(g1, range(len(g1))))
    g2pos = dict(zip(g2, range(len(g2))))

@@ -504,9 +502,9 @@ def mergeRuns(mod_g1, g1, g2, g1_runs, g2_runs, runList, alreadyMatched):
        # points (mod_g1) can be processed.

        for r1, r2 in product(sorted(g1_runs[i].difference(g1_runs[i+1]),
-                                     key=lambda x: x.getWeight(ALPHA), reverse=True),
+                                     key=lambda x: x.getWeight(alpha), reverse=True),
                              sorted(g1_runs[i+1].difference(g1_runs[i]),
-                                     key=lambda x: x.getWeight(ALPHA), reverse=True)):
+                                     key=lambda x: x.getWeight(alpha), reverse=True)):
            if r1.endG1 == g1[i] and r2.startG1 == g1[i+1] and \
                    r1.direction == r2.direction and \
                    r1.endG1[0] == r2.startG1[0] and \
@@ -538,7 +536,7 @@ def mergeRuns(mod_g1, g1, g2, g1_runs, g2_runs, runList, alreadyMatched):
                    if r1 in alreadyMatched:
                        alreadyMatched.remove(r1)
                    # redo matching in case r1 xor r2 were not in matching before
-                    insertIntoRunList(newRuns, runList)
+                    insertIntoRunList(newRuns, runList, alpha)
                    return r2, set(mod_g1[x+1:])
                if r2 in alreadyMatched:
                    # actually, both are already matched
@@ -548,7 +546,7 @@ def mergeRuns(mod_g1, g1, g2, g1_runs, g2_runs, runList, alreadyMatched):
                    # none is matched
                    newRuns.add(r2)

-    insertIntoRunList(newRuns, runList)
+    insertIntoRunList(newRuns, runList, alpha)
    return None, []


@@ -567,7 +565,7 @@ def removeSingleGenes(genome, genome_runs):
    return del_res, mod_res


-def findRandomRunSequence(g1, g2, dists, topXperCent):
+def findRandomRunSequence(g1, g2, dists, topXperCent, alpha):
    g2dists = dict()
    for g1i, x in list(dists.items()):
        for g2j, d in list(x.items()):
@@ -585,11 +583,12 @@ def findRandomRunSequence(g1, g2, dists, topXperCent):
    g1_runs, g2_runs, runs = getAllRuns(g1, g2, dists)
    log.info('Found %s runs.' % len(runs))
    # sort
-    runList = sorted(runs, key=lambda x: x.getWeight(ALPHA))
+    runList = sorted(runs, key=lambda x: x.getWeight(alpha))

    res = set()
    while runList:
-        noOfAdjacencies = len([x for x in runList if x.getWeight(ALPHA) and x.getWeight(ALPHA) or 0])
+        noOfAdjacencies = len([x for x in runList if x.getWeight(alpha) and
+            x.getWeight(alpha) or 0])
        if noOfAdjacencies:
            randPos = randint(1, ceil(noOfAdjacencies * topXperCent))
        else:
@@ -601,7 +600,7 @@ def findRandomRunSequence(g1, g2, dists, topXperCent):
        while mx:
            res.add(mx)
            # update run list
-            doMatching(g1, g2, g1_runs, g2_runs, mx, runList)
+            doMatching(g1, g2, g1_runs, g2_runs, mx, runList, alpha)
            del_g1, new_mod_g1 = removeSingleGenes(g1, g1_runs)
            if del_g1:
                log.info('Zombie genes removed from G1: %s' % ', '.join(map(str, del_g1)))
@@ -631,7 +630,7 @@ def findRandomRunSequence(g1, g2, dists, topXperCent):
                                mod_g1.add(g1[g1pos[g1i]-1])
            # merge runs
            mx, mod_g1 = mergeRuns(mod_g1, g1, g2, g1_runs, g2_runs,
-                                   runList, res)
+                                   runList, res, alpha)

    if res:
        log.info('Matching finished. Longest run size is %s.' % (max(list(map(len, res)))))
@@ -642,7 +641,7 @@ def findRandomRunSequence(g1, g2, dists, topXperCent):


 def repeatMatching(g1, g2, g1_mod, g2_mod, g1_runs, g2_runs, dists, repMatching,
-                   minCsSize, topXperCent):
+                   minCsSize, topXperCent, alpha):

    g1_mod_res = g1_mod
    g2_mod_res = g2_mod
@@ -680,7 +679,7 @@ def repeatMatching(g1, g2, g1_mod, g2_mod, g1_runs, g2_runs, dists, repMatching,
                     (noReps-repMatching+2))
            break

-        g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns = findRandomRunSequence(g1, g2, dists, topXperCent)
+        g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns = findRandomRunSequence(g1, g2, dists, topXperCent, alpha)
        checkMatching(g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns, dists)

        log.info('Obtained %s adjacencies in matching of size %s from iteration %s.' %
@@ -793,7 +792,6 @@ if __name__ == '__main__':
    cli.add_argument('-a', '--alpha', type=float, metavar='F', default=0.5)
    cli.add_argument('dist_file')
    args = cli.parse_args()
-    AlPHA = args.alpha
    repMatching = args.repeat_matching
    if repMatching > 0:
        repMatching -= 1
@@ -802,14 +800,16 @@ if __name__ == '__main__':
                    format="%(levelname)s\t%(asctime)s\t++ %(message)s")

    multiChrom, g1, g2, dists = readDistsAndOrder(args.dist_file, args.edge_weight_threshold)
-    g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns = findRandomRunSequence(g1, g2, dists, args.greedy)
+    g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns = findRandomRunSequence(g1,
+            g2, dists, args.greedy, args.alpha)
    checkMatching(g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns, dists)

    # calculate number of breakpoints only from result of the first matching
    bkp = len(selectedRuns) - 1

    g1_mod, g2_mod, g1_runs, g2_runs, selectedRuns_new = repeatMatching(g1, g2,
-            g1_mod, g2_mod, g1_runs, g2_runs, dists, repMatching, args.min_cs_size, args.greedy)
+            g1_mod, g2_mod, g1_runs, g2_runs, dists, repMatching,
+            args.min_cs_size, args.greedy, args.alpha)

    selectedRuns.update(selectedRuns_new)

@@ -833,3 +833,4 @@ if __name__ == '__main__':

    print('#bkp\t#edg\tadj\tedg')
    print('%s\t%s\t%.6f\t%.6f' % (bkp, edg, wAdj, wEdg))
+
--- a/src/proteinortho_history.pl
+++ b/src/proteinortho_history.pl
--- a/src/proteinortho_summary.pl
+++ b/src/proteinortho_summary.pl
+#!/usr/bin/env perl
+#pk
+
+##########################################################################################
+#	  This file is part of proteinortho.
+#	  (C) 2009 Marcus Lechner
+# 
+#	  proteinortho is free software; you can redistribute it and/or modify
+#	  it under the terms of the GNU General Public License as published
+#	  by the Free Software Foundation; either version 2, or (at your
+#	  option) any later version.
+#
+#	  proteinortho is distributed in the hope that it will be useful, but
+#	  WITHOUT ANY WARRANTY; without even the implied warranty of
+#	  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#	  General Public License for more details.
+#
+#	  You should have received a copy of the GNU General Public License
+#	  along with proteinortho; see the file COPYING.  If not, write to the
+#	  Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+#	  Boston, MA 02111-1307, USA.	
+##########################################################################################
+
+##########################################################################################
+# About
+##########################################################################################
+# 
+# @author Paul Klemm
+# @email klemmp@staff.uni-marburg.de
+# @company Bioinformatics, University of Leipzig
+# @version 1
+# @date 11-12-2019
+#
+##########################################################################################
+
+use POSIX;
+
+my $usage = "
+proteinortho_summary.pl        produces a summary on species level.
+ 
+SYNOPSIS
+ 
+proteinortho_summary.pl (options) GRAPH (GRAPH2)
+
+	GRAPH	Path to the *.proteinortho-graph or *.blast-graph file generated by proteinortho. 
+	GRAPH2	(optional) If you provide a blast-graph AND a proteinortho-graph, the difference is calculated (GRAPH - GRAPH2)
+
+	Note: The *.proteinortho.tsv file does not work here (use the proteinortho-graph file)
+
+	OPTIONS
+
+		-format,-f	enables the table formatting instead of the plain csv output.
+
+";
+
+my $graphfilenameA="";
+my $graphfilenameB="";
+my $notableformat=1;
+
+for(my $v = 0 ; $v < scalar @ARGV ; $v++){
+	if($ARGV[$v] =~ m/^--?(help|h)$/){$help=1;}
+	elsif($ARGV[$v] =~ m/^--?(format|f)$/){$notableformat=0;}
+	elsif($ARGV[$v] =~ m/^-.+/){ print $usage; print STDERR "ERROR: invalid option ".$ARGV[$v]."!\n\n";exit(1);}
+	elsif($graphfilenameA eq ""){$graphfilenameA = $ARGV[$v];}
+	elsif($graphfilenameB eq ""){$graphfilenameB = $ARGV[$v];}
+}
+
+if ($help){
+    print $usage;
+    exit(0);
+}
+my $fail="";
+if ($graphfilenameA eq ""){
+    $fail.="ERROR: GRAPH not provided!\n";
+}
+if($fail ne ""){
+	print $usage.$fail;
+	exit(1);
+}
+our $maxNumOfCharsInOneLine=`tput cols`;
+chomp($maxNumOfCharsInOneLine);
+if($maxNumOfCharsInOneLine<10){$maxNumOfCharsInOneLine=160;}
+our $split_delim="[:\t]";
+our @spl_header;
+our @spl;
+our $last_isHeaderLine=0;
+our $last_isHeaderLine=0;$isHeaderLine=1;
+our $noheader=0;
+
+
+my %species_matrix;
+my %species_matrix_pow2;
+my $currentSpeciesA;
+my $currentSpeciesB;
+
+open(my $FH,"<",$graphfilenameA) || die $!;
+while(<$FH>){
+	if($_ eq ""){next;}
+	chomp;
+	if($_ eq "# file_a	file_b" || $_ eq "# a	b	evalue_ab	bitscore_ab	evalue_ba	bitscore_ba"){next;}
+	my @arr=split("\t",$_);
+	if(substr($_,0,1) eq "#" && scalar @arr == 2){
+		$currentSpeciesA=$arr[0];
+		$currentSpeciesB=$arr[1];
+		$currentSpeciesA=~s/^# ?//g;
+	}elsif(substr($_,0,1) ne "#" && scalar @arr == 6){
+		if(!exists $species_matrix{$currentSpeciesA}{$currentSpeciesB}){
+			$species_matrix{$currentSpeciesA}{$currentSpeciesB} = 1;
+			$species_matrix{$currentSpeciesB}{$currentSpeciesA} = 1;
+			$species_matrix_pow2{$currentSpeciesA}{$currentSpeciesB} = 0;
+			$species_matrix_pow2{$currentSpeciesB}{$currentSpeciesA} = 0;
+		}else{
+			$species_matrix{$currentSpeciesA}{$currentSpeciesB} ++;
+			$species_matrix{$currentSpeciesB}{$currentSpeciesA} ++;
+		}
+	}elsif( !(substr($_,0,1) eq "#" && scalar @arr == 4) ){
+		print STDERR "[STDERR] Error: wrong fromat... Please make sure you only provide *.blast-graph or *.proteinortho-graph files as input...\n";die;
+	}
+}
+close($FH);
+
+if($graphfilenameB ne ""){
+	open(my $FH,"<",$graphfilenameB) || die $!;
+	while(<$FH>){
+		if($_ eq ""){next;}
+		chomp;
+		my @arr=split("\t",$_);
+		if(substr($_,0,1) eq "#" && scalar @arr == 2){
+			$currentSpeciesA=$arr[0];
+			$currentSpeciesB=$arr[1];
+			$currentSpeciesA=~s/^# ?//g;
+		}elsif(substr($_,0,1) ne "#"){
+			if(!exists $species_matrix{$currentSpeciesA}{$currentSpeciesB}){
+				$species_matrix{$currentSpeciesA}{$currentSpeciesB} = 1;
+				$species_matrix{$currentSpeciesB}{$currentSpeciesA} = 1;
+				$species_matrix_pow2{$currentSpeciesA}{$currentSpeciesB} = 0;
+				$species_matrix_pow2{$currentSpeciesB}{$currentSpeciesA} = 0;
+			}else{
+				$species_matrix{$currentSpeciesA}{$currentSpeciesB} --;
+				$species_matrix{$currentSpeciesB}{$currentSpeciesA} --;
+			}
+		}
+	}
+	close($FH);
+}
+
+my @keys=sort keys %species_matrix;
+
+$noheader=0;$last_isHeaderLine=0;$isHeaderLine=1;@spl_header=();@spl=();
+
+print STDERR "\n";
+my $ret= "# The adjacency matrix, the number of edges between 2 species\n";
+processLine($ret);
+$ret= "# file\t";
+
+for(my $i = 0 ; $i < scalar @keys; $i++){
+	if(scalar @keys>10 && !$notableformat){$ret.= "($i)\t";}
+	else{$ret.=$keys[$i]."\t";}
+}
+$ret.= "\n";
+processLine($ret);
+for(my $i = 0 ; $i < scalar @keys; $i++){
+	if(scalar @keys >10 && !$notableformat){
+		$ret=$keys[$i]."($i)\t";
+	}else{
+		$ret=$keys[$i]."\t";
+	}
+	for(my $j = 0 ; $j < scalar @keys; $j++){
+		if($i==$j){$species_matrix{$keys[$i]}{$keys[$j]}=0;}
+		$ret.= $species_matrix{$keys[$i]}{$keys[$j]};
+		if($j<scalar @keys -1){$ret.="\t";}
+	}
+	$ret.= "\n";
+	processLine($ret);
+}
+
+$noheader=0;$last_isHeaderLine=0;$isHeaderLine=1;@spl_header=();@spl=();
+$maxNumOfCharsInOneLine=`tput cols`;
+chomp($maxNumOfCharsInOneLine);$maxNumOfCharsInOneLine/=2;
+if($maxNumOfCharsInOneLine<10){$maxNumOfCharsInOneLine=160;}
+
+print STDERR "\n";
+$ret= "# file\taverage number of edges\n";
+processLine($ret);
+for(my $i = 0 ; $i < scalar @keys; $i++){
+	$ret= $keys[$i]."\t";
+	my $sum=0;
+	for(my $j = 0 ; $j < scalar @keys; $j++){
+		$sum+=$species_matrix{$keys[$i]}{$keys[$j]};
+	}
+	$ret.= $sum/scalar @keys;
+	if($j<scalar @keys -1){$ret.= "\t";}
+	$ret.= "\n";
+	processLine($ret);
+}
+
+$noheader=0;$last_isHeaderLine=0;$isHeaderLine=1;@spl_header=();@spl=();
+$maxNumOfCharsInOneLine=`tput cols`;
+chomp($maxNumOfCharsInOneLine);
+if($maxNumOfCharsInOneLine<10){$maxNumOfCharsInOneLine=160;}
+
+print STDERR "\n";
+$ret= "# The 2-path matrix, the number of paths between 2 species of length 2\n";
+processLine($ret);
+$ret= "# file\t";
+for(my $i = 0 ; $i < scalar @keys; $i++){
+	if(scalar @keys>10 && !$notableformat){$ret.= "($i)\t";}
+	else{$ret.=$keys[$i]."\t";}
+}
+$ret.= "\n";
+processLine($ret);
+for(my $i = 0 ; $i < scalar @keys; $i++){
+	$ret= $keys[$i]."($i)\t";
+	for(my $j = 0 ; $j < scalar @keys; $j++){
+		if($i<$i+1){
+			for(my $k = 0 ; $k < scalar @keys; $k++){
+				$species_matrix_pow2{$keys[$i]}{$keys[$j]}+=$species_matrix{$keys[$i]}{$keys[$k]}*$species_matrix{$keys[$k]}{$keys[$j]};
+				$species_matrix_pow2{$keys[$j]}{$keys[$i]}=$species_matrix_pow2{$keys[$i]}{$keys[$j]};
+			}
+		}
+		$ret.= $species_matrix_pow2{$keys[$i]}{$keys[$j]};
+		if($j<scalar @keys -1){$ret.= "\t";}
+	}
+	$ret.= "\n";
+	processLine($ret);
+}
+
+$noheader=0;$last_isHeaderLine=0;$isHeaderLine=1;@spl_header=();@spl=();
+$maxNumOfCharsInOneLine=`tput cols`;
+chomp($maxNumOfCharsInOneLine);$maxNumOfCharsInOneLine/=2;
+if($maxNumOfCharsInOneLine<10){$maxNumOfCharsInOneLine=160;}
+
+print STDERR "\n";
+processLine("# file\taverage number of 2-paths\n");
+for(my $i = 0 ; $i < scalar @keys; $i++){
+	
+	my $sum=0;
+	for(my $j = 0 ; $j < scalar @keys; $j++){
+		$sum+=$species_matrix_pow2{$keys[$i]}{$keys[$j]};
+	}
+	processLine($keys[$i]."($i)\t".($sum/scalar @keys)."\n");
+}
+
+
+
+
+
+
+
+
+sub processLine{
+	$_=shift;chomp;
+	if($notableformat == 1){print "$_\n";return 1;}
+	
+	if(length($_)<1){return 1;}
+
+	@spl=split($split_delim,$_);
+	
+	if(scalar @spl <2){print "$_\n";return 1;}
+
+	@spl_backup=@spl;
+
+	if(scalar @spl_header > 0 && scalar @spl != scalar @spl_header){$isHeaderLine=1;}
+	if(scalar @spl < 2 ){return 1;}
+	if(substr($spl[0],0,1) eq "#"){$spl[0]=~s/^# ?//g;}
+	if(scalar(@spl)*2-1>$maxNumOfCharsInOneLine){$maxNumOfCharsInOneLine= -1+2*scalar @spl;print STDERR "Corrected minimum table width: -w=$maxNumOfCharsInOneLine such that at least 1 character per column is displayed.\n";}
+
+	$sumOfCharsLine=length(join("",@spl));
+
+	if($isHeaderLine){ # is a header row 
+		while(($sumOfCharsLine + scalar @spl-1) > $maxNumOfCharsInOneLine){ # shave of chars from widest cell
+			$max_l=0;
+			@max_l_is;
+			for (my $i = 0; $i < scalar @spl; $i++) {
+				if($max_l < length $spl[$i]){$max_l=length $spl[$i];@max_l_is=();push(@max_l_is,$i)}elsif($max_l == length $spl[$i]){push(@max_l_is,$i)}
+			}
+			for (my $i = 0; $i < scalar @max_l_is; $i++) {
+				if(length $spl[$max_l_is[$i]] > 8 && substr($spl[$max_l_is[$i]],-3) ne "..." ){
+					$spl[$max_l_is[$i]]=substr($spl[$max_l_is[$i]],0,length($spl[$max_l_is[$i]])-3-1)."..."
+				}
+				else{
+					$spl[$max_l_is[$i]]=substr($spl_backup[$max_l_is[$i]],0,length($spl[$max_l_is[$i]])-1)
+				}
+			}
+			$sumOfCharsLine=length(join("",@spl));
+		}
+
+
+		while(($sumOfCharsLine + scalar @spl-1) < $maxNumOfCharsInOneLine ){ # add of chars to smallest cell
+			$min_l=$maxNumOfCharsInOneLine*10;
+			@min_l_is;
+			for (my $i = 0; $i < scalar @spl; $i++) {
+				if($min_l > length $spl[$i]){$min_l=length $spl[$i];@min_l_is=();push(@min_l_is,$i)}
+			}
+			for (my $i = 0; $i < scalar @min_l_is; $i++) {
+
+				$leftPad=0;
+				$rightPad=0;
+				if($spl[$min_l_is[$i]]=~m/( +)$/){$rightPad=length $1}
+				if($spl[$min_l_is[$i]]=~m/^( +)/){$leftPad=length $1}
+
+				if( $leftPad < $rightPad ){
+					$spl[$min_l_is[$i]]=" ".$spl[$min_l_is[$i]];
+				}else{
+					$spl[$min_l_is[$i]]=$spl[$min_l_is[$i]]." ";
+				}
+				
+			}
+			$sumOfCharsLine=length(join("",@spl));
+		}
+
+		@spl_header=@spl;
+
+	}else{ # is not headerline -> do the same as in headerline
+		
+		while(scalar @spl > scalar @spl_header){pop @spl;}
+
+		for (my $i = 0; $i < scalar @spl; $i++) {
+			while(length $spl[$i]< length $spl_header[$i]){ # add pads
+				$leftPad=0;
+				$rightPad=0;
+				if($spl[$i]=~m/( +)$/){$rightPad=length $1}
+				if($spl[$i]=~m/^( +)/){$leftPad=length $1}
+
+				if( $leftPad < $rightPad ){
+					$spl[$i]=" ".$spl[$i];
+				}else{
+					$spl[$i]=$spl[$i]." ";
+				}
+			}
+			while(length $spl[$i]>length $spl_header[$i]){ # trim
+				if(length $spl[$i] > 5 && substr($spl[$i],-3) ne "..." ){
+					$spl[$i]=substr($spl[$i],0,length($spl[$i])-3-1)."..."
+				}
+				else{
+					$spl[$i]=substr($spl_backup[$i],0,length($spl[$i])-2)."#"
+				}
+			}
+		}
+	}
+
+	if($isHeaderLine && !$last_isHeaderLine ){$tmp=join("|",@spl);$tmp=~s/\|/+/g;$tmp=~s/[^+]/-/g; print "$tmp\n";}
+	print join("|",@spl);
+	if($isHeaderLine ){print "\n";$tmp=join("|",@spl);$tmp=~s/\|/+/g;$tmp=~s/[^+]/-/g; print "$tmp";}
+	print "\n";
+	$last_isHeaderLine=$isHeaderLine;
+	$isHeaderLine=0;
+
+
+}
+