Skip to content
Commits on Source (4)
ncbi-entrez-direct (10.9.20190219+ds-1) unstable; urgency=medium
* New upstream point release.
* debian/man/xtract.1: Update accordingly (adding -is-within).
-- Aaron M. Ucko <ucko@debian.org> Tue, 26 Feb 2019 21:28:41 -0500
ncbi-entrez-direct (10.9.20190205+ds-1) unstable; urgency=medium
* New upstream release.
......
.TH XTRACT 1 2019-02-06 NCBI "NCBI Entrez Direct User's Manual"
.TH XTRACT 1 2019-02-26 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
xtract \- convert XML into a table of data values
.SH SYNOPSIS
......@@ -25,6 +25,7 @@ xtract \- convert XML into a table of data values
[\|\fB\-select\fP\ \fIcondition\fP\|]
[\|\fB\-equals\fP\ \fIstr\fP\|]
[\|\fB\-contains\fP\ \fIstr\fP\|]
[\|\fB\-is-within\fP\ \fIstr\fP\|]
[\|\fB\-starts\-with\fP\ \fIstr\fP\|]
[\|\fB\-ends\-with\fP\ \fIstr\fP\|]
[\|\fB\-is\-not\fP\ \fIstr\fP\|]
......@@ -191,6 +192,9 @@ String must match exactly.
\fB\-contains\fP\ \fIstr\fP
Substring must be present.
.TP
\fB\-is-within\fP\ \fIstr\fP
String must be present.
.TP
\fB\-starts\-with\fP\ \fIstr\fP
Substring must be at beginning.
.TP
......
......@@ -5,17 +5,10 @@ Description: <short summary of the patch>
information below has been extracted from the changelog. Adjust it or drop
it.
.
ncbi-entrez-direct (10.9.20190205+ds-1) unstable; urgency=medium
ncbi-entrez-direct (10.9.20190219+ds-1) unstable; urgency=medium
.
* New upstream release.
* debian/man/{archive-pubmed,download-pubmed,edirect,efilter,
entrez-phrase-search,esearch,espell,fetch-pubmed,index-pubmed,
phrase-search,rchive,stream-pubmed,transmute,xtract}.1: Update
accordingly.
* debian/man/{local-phrase-search,pm-{clean,current,erase,log,repack,uids,
verify}}.1: Retire per the corresponding scripts.
* debian/rules: Stop installing retired scripts local-phrase-search and
(implicitly) pm-{clean,current,erase,log,repack,uids,verify}.
* New upstream point release.
* debian/man/xtract.1: Update accordingly (adding -is-within).
Author: Aaron M. Ucko <ucko@debian.org>
---
......@@ -29,10 +22,10 @@ Bug-Debian: https://bugs.debian.org/<bugnumber>
Bug-Ubuntu: https://launchpad.net/bugs/<bugnumber>
Forwarded: <no|not-needed|url proving that it has been forwarded>
Reviewed-By: <name and email of someone who approved the patch>
Last-Update: 2019-02-07
Last-Update: 2019-02-27
--- /dev/null
+++ ncbi-entrez-direct-10.9.20190205+ds/.gitignore
+++ ncbi-entrez-direct-10.9.20190219+ds/.gitignore
@@ -0,0 +1,3 @@
+.pc
+bin
......
......@@ -84,6 +84,7 @@ sub clearflags {
$debug = false;
$http = "";
$j2x = false;
$x2j = false;
$output = "";
}
......@@ -492,12 +493,12 @@ BioThings Queries
EDirect Expansion
ExtractIDs() {
xtract -pattern BIO_THINGS -block Id -tab "\\n" -element "Id"
xtract -pattern BIO_THINGS -block Id -tab "\\n" -element Id
}
WrapIDs() {
xtract -wrp BIO_THINGS -pattern opt -wrp "Type" -lbl "\$1" \\
-wrp "Count" -num "\$2" -block "\$2" -wrp "Id" -element "\$3" |
xtract -wrp BIO_THINGS -pattern opt -wrp Type -lbl "\$1" \\
-wrp Count -num "\$2" -block "\$2" -wrp Id -element "\$3" |
xtract -format
}
......@@ -584,6 +585,7 @@ sub nquire {
'-gene' => 'http://mygene.info/v3',
'-variant' => 'http://myvariant.info/v1',
'-chem' => 'http://mychem.info/v1',
'-disease' => 'http://mydisease.info/v1',
'-drug' => 'http://c.biothings.io/v1',
'-taxon' => 'http://t.biothings.io/v1',
);
......@@ -695,6 +697,19 @@ sub nquire {
}
}
# if present, -j2x or -x2j must be next argument (undocumented)
if ( $i < $max ) {
$pat = $args[$i];
if ( $pat eq "-j2x" ) {
$i++;
$j2x = true;
} elsif ( $pat eq "-x2j" ) {
$i++;
$x2j = true;
}
}
# read file of keyword shortcuts for URL expansion
if ( $i < $max ) {
......@@ -920,6 +935,16 @@ sub nquire {
binmode(STDOUT, ":utf8");
}
if ( $x2j ) {
my $xc = new XML::Simple(KeepRoot => 1);
my $conv = $xc->XMLin($output);
convert_bools($conv);
my $jc = JSON::PP->new->ascii->pretty->allow_nonref;
my $result = $jc->encode($conv);
$output = "$result";
}
print "$output";
}
......
......@@ -112,6 +112,7 @@ String Constraints
-equals String must match exactly
-contains Substring must be present
-is-within String must be present
-starts-with Substring must be at beginning
-ends-with Substring must be at end
-is-not String must not match
......@@ -1675,6 +1676,7 @@ const (
OR
EQUALS
CONTAINS
ISWITHIN
STARTSWITH
ENDSWITH
ISNOT
......@@ -1778,6 +1780,7 @@ var argTypeIs = map[string]ArgumentType{
"-or": CONDITIONAL,
"-equals": CONDITIONAL,
"-contains": CONDITIONAL,
"-is-within": CONDITIONAL,
"-starts-with": CONDITIONAL,
"-ends-with": CONDITIONAL,
"-is-not": CONDITIONAL,
......@@ -1893,6 +1896,7 @@ var opTypeIs = map[string]OpType{
"-or": OR,
"-equals": EQUALS,
"-contains": CONTAINS,
"-is-within": ISWITHIN,
"-starts-with": STARTSWITH,
"-ends-with": ENDSWITH,
"-is-not": ISNOT,
......@@ -2586,7 +2590,7 @@ func ParseArguments(cmdargs []string, pttrn string) *Block {
cond = append(cond, op)
parseStep(op, elementColonValue)
status = UNSET
case EQUALS, CONTAINS, STARTSWITH, ENDSWITH, ISNOT:
case EQUALS, CONTAINS, ISWITHIN, STARTSWITH, ENDSWITH, ISNOT:
if op != nil {
if len(str) > 1 && str[0] == '\\' {
// first character may be backslash protecting dash (undocumented)
......@@ -4729,7 +4733,7 @@ func ConditionsAreSatisfied(conditions []*Operation, curr *Node, mask string, in
stat := constraint.Type
switch stat {
case EQUALS, CONTAINS, STARTSWITH, ENDSWITH, ISNOT:
case EQUALS, CONTAINS, ISWITHIN, STARTSWITH, ENDSWITH, ISNOT:
// substring test on element values
str = strings.ToUpper(str)
val = strings.ToUpper(val)
......@@ -4743,6 +4747,10 @@ func ConditionsAreSatisfied(conditions []*Operation, curr *Node, mask string, in
if strings.Contains(str, val) {
return true
}
case ISWITHIN:
if strings.Contains(val, str) {
return true
}
case STARTSWITH:
if strings.HasPrefix(str, val) {
return true
......@@ -6039,6 +6047,70 @@ func ProcessINSD(args []string, isPipe, addDash, doIndex bool) []string {
return acc
}
// BIOTHINGS EXTRACTION COMMAND GENERATOR
// ProcessBiopath generates extraction commands for BioThings resources (undocumented)
func ProcessBiopath(args []string, isPipe bool) []string {
// nquire -variant variant "chr6:g.26093141G>A" | xtract -biopath "clinvar.rcv.conditions.identifiers.omim"
// xtract -pattern opt -division clinvar -group rcv -branch conditions -block identifiers -sep "\n" -tab "\n" -element "omim,@omim"
var acc []string
max := len(args)
if max < 1 {
fmt.Fprintf(os.Stderr, "\nERROR: Insufficient command-line arguments supplied to xtract -biopath\n")
os.Exit(1)
}
path := args[0]
dirs := strings.Split(path, ".")
max = len(dirs)
if max < 1 {
fmt.Fprintf(os.Stderr, "\nERROR: Insufficient command-line arguments supplied to xtract -biopath\n")
os.Exit(1)
}
expname := []string{
"-division",
"-group",
"-branch",
"-block",
"-section",
"-subset",
"-unit",
}
acc = append(acc, "-pattern", "opt")
max--
if max > 7 {
fmt.Fprintf(os.Stderr, "\nERROR: Too many nodes in argument supplied to xtract -biopath\n")
os.Exit(1)
}
for i, str := range dirs {
if i < max {
acc = append(acc, expname[i], str)
} else {
if isPipe {
acc = append(acc, "-sep", "\\n", "-tab", "\\n")
} else {
acc = append(acc, "-sep", "\"\\n\"", "-tab", "\"\\n\"")
}
if isPipe {
acc = append(acc, "-element", str+",@"+str)
} else {
acc = append(acc, "-element", "\""+str+",@"+str+"\"")
}
}
}
return acc
}
// HYDRA CITATION MATCHER COMMAND GENERATOR
// ProcessHydra generates extraction commands for NCBI's in-house citation matcher (undocumented)
......@@ -8471,6 +8543,29 @@ func main() {
args = hydra
}
// BIOTHINGS EXTRACTION COMMAND GENERATOR
// -biopath takes a dotted exploration path for BioThings resources (undocumented)
if args[0] == "-biopath" {
args = args[1:]
biopath := ProcessBiopath(args, isPipe || usingFile)
if !isPipe && !usingFile {
// no piped input, so write output instructions
fmt.Printf("xtract")
for _, str := range biopath {
fmt.Printf(" %s", str)
}
fmt.Printf("\n")
return
}
// data in pipe, so replace arguments, execute dynamically
args = biopath
}
// ENTREZ2INDEX COMMAND GENERATOR
// -e2index shortcut for experimental indexing code (documented in rchive.go)
......