Skip to content
Commits on Source (192)
#!/bin/sh
while [ $# -gt 0 ]
do
case "$1" in
-path )
shift
;;
-* )
exec >&2
echo "$0: Unrecognized option $1"
exit 1
;;
* )
break
;;
esac
done
if [ "$#" -gt 0 ]
then
target="$1"
MASTER=$(cd "$target" && pwd)
shift
else
if [ -z "${EDIRECT_PUBMED_MASTER}" ]
then
echo "Must supply path to master archive area or set EDIRECT_PUBMED_MASTER environment variable"
exit 1
else
MASTER="${EDIRECT_PUBMED_MASTER}"
MASTER=${MASTER%/}
fi
fi
while [ $# -gt 0 ]
do
case "$1" in
-temp | -work | -working )
shift
;;
-* )
exec >&2
echo "$0: Unrecognized option $1"
exit 1
;;
* )
break
;;
esac
done
if [ "$#" -gt 0 ]
then
working="$1"
WORKING=$(cd "$working" && pwd)
shift
else
if [ -z "${EDIRECT_PUBMED_WORKING}" ]
then
WORKING=${MASTER}
else
WORKING="${EDIRECT_PUBMED_WORKING}"
WORKING=${WORKING%/}
fi
fi
for dir in Archive Postings
do
mkdir -p "$MASTER/$dir"
done
for dir in Current Indexed Inverted Merged Pubmed
do
mkdir -p "$WORKING/$dir"
done
if [ ! -f "$MASTER/Archive/CACHEDIR.TAG" ]
then
pm-prepare "$MASTER/Archive"
fi
echo "Downloading PubMed Files"
cd "$WORKING/Pubmed"
download-pubmed baseline updatefiles
echo ""
echo "Populating PubMed Archive"
pm-stash "$MASTER/Archive"
echo ""
echo "Refreshing Versioned Records"
pm-refresh "$MASTER/Archive"
echo ""
echo 18810966 |
fetch-pubmed -path "$MASTER/Archive" |
xtract -pattern Author -if Affiliation -contains Medicine \
-pfx "Archive is " -element Initials
echo ""
#!/usr/bin/perl -w
# Usage: asp-ls PATH
#!/usr/bin/env perl
use warnings;
use strict;
use Net::FTP;
# Usage: asp-ls PATH
my $server = "ftp.ncbi.nlm.nih.gov";
my $dir = shift;
my $ftp = new Net::FTP($server, Passive => 1)
......
#!/bin/sh
cd "$GOPATH"
go get -u github.com/fatih/color
go get -u github.com/fiam/gounidecode/unidecode
go get -u github.com/klauspost/cpuid
go get -u github.com/pbnjay/memory
go get -u github.com/surgebase/porter2
go get -u golang.org/x/text/runes
go get -u golang.org/x/text/transform
go get -u golang.org/x/text/unicode/norm
cd "$GOPATH/src/xtract"
go build -o xtract xtract.go common.go
go build -o rchive rchive.go common.go
This diff is collapsed.
debhelper-build-stamp
files
gopath
ncbi-entrez-direct.debhelper.log
ncbi-entrez-direct.substvars
ncbi-entrez-direct/
ncbi-entrez-direct (10.5.20181204+ds-1) unstable; urgency=medium
* New upstream release.
* (debian/).gitignore: Update per GOPATH change.
* debian/control:
- Build-depend on dh-golang, golang-github-dataence-porter2-dev,
golang-github-fatih-color-dev, golang-github-klauspost-cpuid-dev,
golang-github-rainycape-unidecode-dev, and
golang-github-shirou-gopsutil-dev.
- Set Built-Using (with the help of dh-golang).
- Bump minimum Go version to 1.10 for strings.Builder.
- Depend on libxml-simple-perl.
- Repoint Vcs-{Browser,Git} at salsa.debian.org.
- Rules-Requires-Root: no (confirmed safe).
- Standards-Version: 4.2.1 (compliant at this point).
* debian/examples: Remove local storage/indexing scripts (considered
official enough for /usr/bin at this point), leaving only asp-* and
has-asp, which promote non-free software; add test-pubmed-index.
* debian/man/{archive-pubmed,download-pubmed,download-sequence,eblast,
fetch-pubmed,index-pubmed,intersect-uid-lists,(local-)phrase-search,
pm-clean,pm-erase,pm-invert,pm-log,pm-merge,pm-prepare,pm-promote,
pm-refresh,pm-repack,pm-stash,pm-uids,pm-verify,protein-neighbors,rchive,
stream-pubmed,transmute}.1: Document newly added tools.
* debian/man/{edirect,nquire,xtract}.1: Fix old documentation bugs.
* debian/man/{efetch,efilter,elink,entrez-phrase-search,ftp-cp,ftp-ls,
nquire,xtract}.1: Update for new release.
* debian/rules:
- Change (autopopulated) GOPATH to meet dh-golang's expectations.
- Belatedly run dh --with=golang to populate (misc:)Built-Using.
- Clean up remaining cruft from when Go support was optional.
- Pull github.com/surgebase/porter2 (aka gh.c/dataence/porter2),
github.com/fatih/color (along with its dependencies
github.com/mattn/go-{colorable,isatty}), github.com/klauspost/cpuid,
github.com/rainycape/unidecode (aliased, per rchive.go's expectations,
to github.com/fiam/gounidecode/unidecode), github.com/shirou/gopsutil
(along with its dependency golang.org/x/sys/unix), and our ersatz
github.com/pbnjay/memory into the path. (The rainycape version of
unidecode has the key advantages of being fresher and already packaged
for Debian.)
- Build rchive.go in addition to xtract.go; build both against a
common.go factored out of xtract.go.
- Install run-ncbi-converter (accidentally left off earlier).
- Flip Perl scripts back to running via /usr/bin/perl (from
/usr/bin/env perl).
- Enforce use of Go version 1.10 or newer, even if the default is older.
- Install local storage/indexing scripts to /usr/bin. (Closes: #901028.)
- Install new eblast wrapper (corresponding to new -blast mode) and
protein-neighbors script.
* debian/vendor/github.com/pbnjay/memory/memory.go: New wrapper around
github.com/shirou/gopsutil/mem, filling in for a different third-party
library that lacks a declared license.
-- Aaron M. Ucko <ucko@debian.org> Wed, 05 Dec 2018 21:19:35 -0500
ncbi-entrez-direct (7.40.20170928+ds-1) unstable; urgency=medium
* New upstream release.
......
......@@ -4,20 +4,28 @@ Uploaders: Aaron M. Ucko <ucko@debian.org>
Section: science
Priority: optional
Build-Depends: debhelper (>= 10~),
golang-any (>= 2:1.8~2~) | golang-1.8-go,
golang-any (>= 2:1.10~) | golang-1.10-go,
golang-github-dataence-porter2-dev,
golang-github-fatih-color-dev,
golang-github-klauspost-cpuid-dev,
golang-github-rainycape-unidecode-dev,
golang-github-shirou-gopsutil-dev,
golang-golang-x-text-dev
Standards-Version: 4.1.1
Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/ncbi-entrez-direct.git
Vcs-Git: https://anonscm.debian.org/git/debian-med/ncbi-entrez-direct.git
Standards-Version: 4.2.1
Vcs-Browser: https://salsa.debian.org/med-team/ncbi-entrez-direct
Vcs-Git: https://salsa.debian.org/med-team/ncbi-entrez-direct.git
Homepage: http://www.ncbi.nlm.nih.gov/books/NBK179288
Rules-Requires-Root: no
Package: ncbi-entrez-direct
Architecture: any
Multi-Arch: foreign
Depends: libwww-perl,
libxml-simple-perl,
${misc:Depends},
${perl:Depends},
${shlibs:Depends}
Built-Using: ${misc:Built-Using}
Description: NCBI Entrez utilities on the command line
Entrez Direct (EDirect) is an advanced method for accessing NCBI's set
of interconnected databases (publication, sequence, structure, gene,
......
asp-cp
asp-ls
download-pubmed
download-sequence
erase-pubmed
fetch-pubmed
has-asp
stash-pubmed
test-pubmed-index
.TH ARCHIVE-PUBMED 1 2018-10-08 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
archive\-pubmed \- populate a local NCBI PubMed archive from scratch
.SH SYNOPSIS
.B archive\-pubmed
[\|[\|\fB\-path\fP\|]\ \fIdir\fP
[\|[\|\fB\-temp\fP|\fB\-work\fP|\fB\-working\fP\|]\ \fIdir\fP\|]\|]
.SH DESCRIPTION
\fBarchive\-pubmed\fP prepares a directory
to hold a local PubMed archive,
then downloads a full database dump from NCBI's servers,
organizes it into an \fBrchive\fP(1) trie,
and refreshes the versioned records.
.SH OPTIONS
.TP
[\|\fB\-path\fP\|]\ \fIdir\fP
Place the local archive in \fIdir\fP.
.TP
[\|\fB\-temp\fP|\fB\-work\fP|\fB\-working\fP\|]\ \fIdir\fP
Place intermediate subtrees in \fIdir\fP.
.SH ENVIRONMENT
.TP
.B EDIRECT_PUBMED_MASTER
Local archive directory to use in the absence of \fB\-path\fP.
Expected to hold an absolute path;
mandatory when not supplying a path on the command line.
.TP
.B EDIRECT_PUBMED_WORKING
Directory to hold intermediate subtrees
in the absence of \fB\-temp\fP|\fB\-work\fP|\fB\-working\fP.
Expected to hold an absolute path.
Defaults to the primary local archive directory when not set.
.SH SEE ALSO
.BR download\-pubmed (1),
.BR fetch\-pubmed (1),
.BR index\-pubmed (1),
.BR local\-phrase\-search (1),
.BR pm\-prepare (1),
.BR pm\-refresh (1),
.BR pm\-stash (1),
.BR rchive (1).
.TH DOWNLOAD-PUBMED 1 2018-11-12 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
download\-pubmed \- download a NCBI PubMed archive dump
.SH SYNOPSIS
.B download\-pubmed
[\|\fIsection...\fP\|]
.SH DESCRIPTION
\fBdownload\-pubmed\fP downloads one or more sections
of NCBI's PubMed archive to the current directory.
When run without arguments, it downloads the
\fBbaseline\fP and \fBupdatefiles\fP sections.
.SH OPTIONS
.TP
\fIsection...\fP
Archive section(s) to download.
.SH SEE ALSO
.BR archive\-pubmed (1),
.BR download\-sequence (1),
.BR fetch\-pubmed (1),
.BR ftp\-cp (1),
.BR ftp\-ls (1),
.BR index\-pubmed (1),
.BR local\-phrase\-search (1),
.BR pm\-clean (1),
.BR pm\-log (1),
.BR pm\-prepare (1),
.BR pm\-refresh (1),
.BR pm\-stash (1),
.BR pm\-verify (1),
.BR rchive (1),
.BR transmute (1).
.TH DOWNLOAD-SEQUENCE 1 2018-09-16 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
download\-sequence \- download NCBI ASN.1 biological sequence batches
.SH SYNOPSIS
.B download\-sequence
\fIdivision...\fP
.SH DESCRIPTION
\fBdownload\-pubmed\fP downloads one or more divisions
of NCBI's (GenBank, GenPept, and the like)
biological sequence archive to the current directory.
.SH OPTIONS
.TP
\fIdivision...\fP
Archive division(s) to download.
.SH SEE ALSO
.BR download\-pubmed (1),
.BR ftp\-cp (1),
.BR ftp\-ls (1).
.TH EBLAST 1 2018-11-18 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
eblast \- perform a remote NCBI BLAST protein query
.SH SYNOPSIS
\fBeblast\fP (\fBedirect\ \-blast\fP)
.SH DESCRIPTION
\fBeblast\fP reads one or more FASTA\-formatted protein sequences
from standard input
and performs an online search for similar sequences
in NCBI's \fBnr\fP database,
yielding BLAST XML output on standard output.
.SH SEE ALSO
.BR edirect (1),
.BR efetch (1),
.BR protein\-neighbors (1),
.BR xtract (1).
.TH EDIRECT 1 2017-01-24 NCBI "NCBI Entrez Direct User's Manual"
.TH EDIRECT 1 2018-10-08 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
edirect \- access NCBI Entrez from the command line
.SH SYNOPSIS
......@@ -29,6 +29,8 @@ edirect \- access NCBI Entrez from the command line
\fBenotify\fP (\fBedirect\ \-notify\fP) \fIoptions\fP
\fBeaddress\fP (\fBedirect\ \-address\fP) \fIoptions\fP
\fBeblast\fP (\fBedirect\ \-blast\fP) \fIoptions\fP
.SH DESCRIPTION
\fBedirect\fP (Entrez Direct) is a command\-line utility
for consulting NCBI's set of interconnected databases
......@@ -67,8 +69,15 @@ Print the internal URL query and XML results of each step.
.TP
\fB\-base\fP\ \fIURL\fP
Specify a particular server for quality assurance testing.
.SH ENVIRONMENT
.TP
.B NCBI_API_KEY
NCBI E\-Utilities API key,
allowing for a higher request rate
at the cost of some anonymity.
.SH SEE ALSO
.BR eaddress (1),
.BR eblast (1),
.BR ecitmatch (1),
.BR econtact (1),
.BR efetch (1),
......
.TH EFETCH 1 2017-01-24 NCBI "NCBI Entrez Direct User's Manual"
.TH EFETCH 1 2018-11-14 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
efetch, esummary \- retrieve results from an NCBI Entrez search
.SH SYNOPSIS
......@@ -6,6 +6,7 @@ efetch, esummary \- retrieve results from an NCBI Entrez search
[\|\fB\-help\fP\|]
[\|\fB\-format\fP\ \fIfmt\fP\|]
[\|\fB\-mode\fP\ \fImode\fP\|]
[\|\fB\-style\fP\ \fIstyle\fP\|]
[\|\fB\-db\fP\ \fIname\fP\|]
[\|\fB\-id\fP\ \fIID\fP\|]
[\|\fB\-seq_start\fP\ \fIN\fP\|]
......@@ -16,10 +17,13 @@ efetch, esummary \- retrieve results from an NCBI Entrez search
[\|\fB\-complexity\fP\ \fIN\fP\|]
[\|\fB\-extend\fP\ \fIN\fP\|]
[\|\fB\-extrafeat\fP\ \fIN\fP\|]
[\|\fB\-raw\fP\|]
[\|\fB\-json\fP\|]
\fBesummary\fP (\fBedirect\ \-summary\fP)
[\|\fB\-help\fP\|]
[\|\fB\-mode\fP\ \fImode\fP\|]
[\|\fB\-style\fP\ \fIstyle\fP\|]
[\|\fB\-db\fP\ \fIname\fP\|]
[\|\fB\-id\fP\ \fIID\fP\|]
[\|\fB\-seq_start\fP\ \fIN\fP\|]
......@@ -30,6 +34,8 @@ efetch, esummary \- retrieve results from an NCBI Entrez search
[\|\fB\-complexity\fP\ \fIN\fP\|]
[\|\fB\-extend\fP\ \fIN\fP\|]
[\|\fB\-extrafeat\fP\ \fIN\fP\|]
[\|\fB\-raw\fP\|]
[\|\fB\-json\fP\|]
.SH DESCRIPTION
\fBefetch\fP and \fBesummary\fP retrieve results
from either an \fBedirect\fP(1) pipeline
......@@ -58,6 +64,9 @@ Format of record or report.
.BR asn.1 ,
or
.BR json .
.TP
\fB\-style\fP\ \fIstyle\fP
\fBwithparts\fP or \fBconwithfeat\fP.
.SS Direct Record Selection
.TP
\fB\-db\fP\ \fIname\fP
......@@ -83,12 +92,7 @@ First sequence position to retrieve (0\-based).
.TP
\fB\-chr_stop\fP\ \fIN\fP
Last sequence position to retrieve (1\-based).
.SS Miscellaneous
.TP
\fB\-help\fP
Print usage information, complete with examples of notable
.BR \-db / \-format / \-mode
combinations.
.SS Sequence Flags
.TP
\fB\-complexity\fP\ \fIN\fP
How much context to fetch:
......@@ -108,8 +112,23 @@ Extend sequence retrieval by \fIN\fP residues in both directions.
.TP
\fB\-extrafeat\fP\ \fIN\fP
Bit flag specifying extra features.
.SS Miscellaneous
.TP
\fB\-raw\fP
Skip database\-specific XML modifications.
.TP
\fB\-json\fP
Convert adjusted XML output to JSON.
.TP
\fB\-help\fP
Print usage information, complete with examples of notable
.BR \-db / \-format / \-mode
combinations.
.SH SEE ALSO
.BR eblast (1),
.BR edirect (1),
.BR efetch.acedb (1),
.BR esearch (1),
.BR rchive (1),
.BR transmute (1),
.BR xtract (1).
.TH EFILTER 1 2017-10-05 NCBI "NCBI Entrez Direct User's Manual"
.TH EFILTER 1 2018-11-18 NCBI "NCBI Entrez Direct User's Manual"
NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
efilter \- filter and/or sort NCBI Entrez search results
......@@ -15,6 +15,7 @@ efilter \- filter and/or sort NCBI Entrez search results
[\|\fB\-pairs\ \fIfield\fP\|]
[\|\fB\-spell\fP\|]
[\|\fB\-pub\fP\ \fItype\fP\|]
[\|\fB\-country\fP\ \fIname\fP\|]
[\|\fB\-feature\fP\ \fItype\fP\|]
[\|\fB\-location\fP\ \fItype\fP\|]
[\|\fB\-molecule\fP\ \fItype\fP\|]
......@@ -76,6 +77,12 @@ Correct misspellings in query.
.BR structured .
.SS Sequence Filters
.TP
\fB\-country\fP\ \fIname\fP
.BR usa:minnesota ,
.BR united_kingdom ,
.BR \(dqpacific\ ocean\(dq ,
\&...
.TP
\fB\-feature\fP\ \fItype\fP
.BR gene ,
.BR mrna ,
......@@ -109,7 +116,8 @@ Correct misspellings in query.
.BR prokaryotes ,
.BR protists ,
.BR rodents ,
.BR viruses .
.BR viruses ,
\&...
.TP
\fB\-source\fP\ \fItype\fP
.BR genbank ,
......
.TH ELINK 1 2017-01-24 NCBI "NCBI Entrez Direct User's Manual"
.TH ELINK 1 2018-11-18 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
elink \- look up related NCBI Entrez records
.SH SYNOPSIS
......@@ -29,7 +29,9 @@ Follow links to another database.
.TP
\fB\-name\fP\ \fIname\fP
Follow an explicit link name such as
.BR pubmed_protein_refseq .
.BR pubmed_protein_refseq ,
.BR pubmed_pubmed_citedin ", or"
.BR pubmed_pubmed_refs .
.SS Direct Record Selection
.TP
\fB\-db\fP\ \fIname\fP
......@@ -82,4 +84,6 @@ Alias for query step.
.SH SEE ALSO
.BR edirect (1),
.BR einfo (1),
.BR epost (1).
.BR epost (1),
.BR protein\-neighbors (1).
.TH ENTREZ-PHRASE-SEARCH 1 2017-01-24 NCBI "NCBI Entrez Direct User's Manual"
.TH ENTREZ-PHRASE-SEARCH 1 2018-09-16 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
entrez\-phrase\-search \- search NCBI Entrez for phrases
.SH SYNOPSIS
......@@ -44,4 +44,5 @@ Phrase to search for.
.SH SEE ALSO
.BR edirect (1),
.BR esearch (1),
.BR filter\-stop\-words (1).
.BR filter\-stop\-words (1),
.BR local\-phrase\-search (1).
.TH FETCH-PUBMED 1 2018-10-08 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
fetch\-pubmed \- fetch records from a local NCBI PubMed archive by UID
.SH SYNOPSIS
.B fetch\-pubmed
[\|\fB\-strict\fP|\fB\-mixed\fP\|]
[\|[\|\fB\-path\|]\ \fIdir\fP\|]
.SH DESCRIPTION
\fBfetch\-pubmed\fP reads a list of record IDs from standard input
and prints to standard output the corresponding portions of
a local NCBI PubMed archive produced by \fBarchive\-pubmed\fP(1),
typically for further processing by \fBxtract\fP(1) or the like.
.SH OPTIONS
.TP
[\|\fB\-strict\fP|\fB\-mixed\fP\|]
Run \fBrchive\fP(1) with \fB\-flag\ strict\fP
or \fB\-flag\ mixed\fP, respectively.
.TP
[\|\fB\-path\fP\|]\ \fIdir\fP
Place the local archive in \fIdir\fP.
.SH ENVIRONMENT
.TP
.B EDIRECT_PUBMED_MASTER
Local archive directory to use in the absence of \fB\-path\fP.
Expected to hold an absolute path;
mandatory when not supplying a path on the command line.
.SH SEE ALSO
.BR archive\-pubmed (1),
.BR local\-phrase\-search (1),
.BR rchive (1),
.BR stream\-pubmed (1),
.BR xtract (1).
.TH FTP-CP 1 2017-07-05 NCBI "NCBI Entrez Direct User's Manual"
.TH FTP-CP 1 2018-09-16 NCBI "NCBI Entrez Direct User's Manual"
.SH NAME
ftp\-cp \- download files from an FTP server
.SH SYNOPSIS
......@@ -12,4 +12,9 @@ on a specified file server.
If no file names appear on the command line,
it will read the list of names to use from standard input.
.SH SEE ALSO
.BR ftp-ls (1).
.BR archive\-pubmed (1),
.BR download\-pubmed (1),
.BR download\-sequence (1),
.BR ftp\-ls (1),
.BR index\-pubmed (1),
.BR rchive (1).