//addOption("log", Option(Option::Boolean, "L", "Output", "Log scale distances and divide by k-mer size to provide a better analog to phylogenetic distance. The special case of zero shared min-hashes will result in a distance of 1.", ""));
addOption("pvalue",Option(Option::Number,"v","Output","Maximum p-value to report.","1.0",0.,1.));
addOption("distance",Option(Option::Number,"d","Output","Maximum distance to report.","1.0",0.,1.));
addOption("comment",Option(Option::Boolean,"C","Output","Show comment fields with reference/query names (denoted with ':').","1.0",0.,1.));
useSketchOptions();
}
...
...
@@ -51,6 +52,7 @@ int CommandDistance::run() const
summary="Determine whether query sequences are within a larger pool of sequences.";
description="Determine how well query sequences are contained within a pool of sequences. The queries must be formatted as a single Mash sketch file (.msh), created with the `mash sketch` command. The <pool> files can be contigs or reads, in fasta or fastq, gzipped or not, and \"-\" can be given for <pool> to read from standard input. The <pool> sequences are assumed to be nucleotides, and will be 6-frame translated if the <queries> are amino acids. The output fields are [identity, shared-hashes, median-multiplicity, p-value, query-ID, query-comment], where median-multiplicity is computed for shared hashes, based on the number of observations of those hashes within the pool.";
summary="Determine whether query sequences are within a larger mixture of sequences.";
description="Determine how well query sequences are contained within a mixture of sequences. The queries must be formatted as a single Mash sketch file (.msh), created with the `mash sketch` command. The <mixture> files can be contigs or reads, in fasta or fastq, gzipped or not, and \"-\" can be given for <mixture> to read from standard input. The <mixture> sequences are assumed to be nucleotides, and will be 6-frame translated if the <queries> are amino acids. The output fields are [identity, shared-hashes, median-multiplicity, p-value, query-ID, query-comment], where median-multiplicity is computed for shared hashes, based on the number of observations of those hashes within the mixture.";
addOption("list",Option(Option::Boolean,"l","Input","List input. Lines in each <query> specify paths to sequence files, one per line. The reference file is not affected.",""));
addOption("comment",Option(Option::Boolean,"C","Output","Use comment fields for sequence names instead of IDs.",""));
addOption("edge",Option(Option::Boolean,"E","Output","Output edge list instead of Phylip matrix, with fields [seq1, seq2, dist, p-val, shared-hashes].",""));
addOption("pvalue",Option(Option::Number,"v","Output","Maximum p-value to report in edge list. Implies -"+getOption("edge").identifier+".","1.0",0.,1.));
addOption("distance",Option(Option::Number,"d","Output","Maximum distance to report in edge list. Implies -"+getOption("edge").identifier+".","1.0",0.,1.));
//addOption("log", Option(Option::Boolean, "L", "Output", "Log scale distances and divide by k-mer size to provide a better analog to phylogenetic distance. The special case of zero shared min-hashes will result in a distance of 1.", ""));
useSketchOptions();
}
...
...
@@ -50,8 +53,16 @@ int CommandTriangle::run() const