Commit 00d7a8e3 authored by Dylan Aïssi's avatar Dylan Aïssi

New upstream version 0.9.0+dfsg

parent aed0c669
......@@ -5,7 +5,7 @@ Description: A programmatic interface to the Web Service methods provided by
Functions include methods for searching by sequences by taxonomic names,
ids, collectors, and institutions; as well as a function for searching
for specimens, and downloading trace files.
Version: 0.8.6
Version: 0.9.0
License: MIT + file LICENSE
Authors@R: c(person("Scott", "Chamberlain",
role = c("aut", "cre"),
......@@ -19,14 +19,14 @@ Encoding: UTF-8
Imports: xml2, crul (>= 0.3.8), stringr, jsonlite, reshape, plyr,
data.table, tibble
Suggests: roxygen2 (>= 6.1.1), sangerseqR, knitr, testthat, vcr (>=
0.2.0)
0.2.2)
RoxygenNote: 6.1.1
X-schema.org-applicationCategory: Data Access
X-schema.org-keywords: biodiversity, barcode, DNA, sequences, fasta
X-schema.org-isPartOf: https://ropensci.org
NeedsCompilation: no
Packaged: 2018-12-14 22:21:32 UTC; sckott
Packaged: 2019-06-27 18:13:58 UTC; sckott
Author: Scott Chamberlain [aut, cre] (<https://orcid.org/0000-0003-1444-9135>)
Maintainer: Scott Chamberlain <myrmecocystus@gmail.com>
Repository: CRAN
Date/Publication: 2018-12-14 22:40:03 UTC
Date/Publication: 2019-06-27 18:40:03 UTC
YEAR: 2018
YEAR: 2019
COPYRIGHT HOLDER: Scott Chamberlain
c333ebd7275835baa28d7647083c20dd *DESCRIPTION
1b96a74f7e95cf508e814ad0c04525e5 *LICENSE
55f8bc680a7200e098deb2d31168de58 *DESCRIPTION
de9532c5aeb8082a5fc9cbee02186d6f *LICENSE
19dc97bc26681640044166206b3e8b26 *NAMESPACE
999899ca91da6b7489d1c7d3c4b79fc9 *NEWS.md
d1656e9f84d212f933b89ef565e1a265 *NEWS.md
bca22309792f9e1bef76921cb712c956 *R/bold-package.R
86af3511a9fb16be45bd4a9aa65ec5bf *R/bold_filter.R
e7d58a273f516616cd5d5798c8adb42b *R/bold_identify.R
8d7b5bee1c00e3c45ce70b5dbbbca0a9 *R/bold_identify_parents.R
814d93da962957e8ae22f70a54f9f87c *R/bold_filter.R
3eb280a0f9a7fb0e711033387695cbdf *R/bold_identify.R
c2b8623056e517ec7871eac77969e02f *R/bold_identify_parents.R
30d45d1fb1ddbff6c81935b9dea5d483 *R/bold_seq.R
34757525c62643946a13bbdc88a2c499 *R/bold_seqspec.R
59a3960974f8c92df825b23408a147a1 *R/bold_specimens.R
fb4a5c2495a64b58d9dbb930953926aa *R/bold_stats.R
fbc4bad140d07d82ad505348bab99e5b *R/bold_specimens.R
90414c4a4414d7e5de2a75442dc70f54 *R/bold_stats.R
5b1e9f8a6f26df5e328a32033c276ed2 *R/bold_tax_id.R
cbf8ecafcc3b5d2943be20908afa6b16 *R/bold_tax_name.R
e5edff89902bf58e6c205a44d1cfcfe2 *R/bold_tax_name.R
3725c7e5034c9054eba152b29dab6a67 *R/bold_trace.R
da4c3be636c3217c00eab199f9190795 *R/zzz.R
0e255073e276a183f9ee8d8de8f2afae *README.md
a41720639252e2a6dba1d1efd98bc87a *build/vignette.rds
617c26e0d62df89b58c28430bf626085 *R/zzz.R
a23732686b405ae7df3ee53d50e2cf2b *README.md
8ca69cf7c17a65d059e0595a45250130 *build/vignette.rds
bb64a460c31e2e6821ac53870b09c38e *data/sequences.RData
1453679dfb1fd31a85fa5d2baba39545 *inst/doc/bold_vignette.Rmd
7fa8251dbc29cde4c399eabd8ed5645a *inst/doc/bold_vignette.html
67c9ac7f3486ce313bf6f3907e1e1c50 *inst/doc/bold.Rmd
2047a39aa373fa79d46a5795e931e050 *inst/doc/bold.html
1fc13c735a05dc20167294991f30e259 *man/bold-package.Rd
7025c073016140b45f74341a683a2c90 *man/bold_filter.Rd
06f89090411c32456a741d6c5e413bc4 *man/bold_filter.Rd
22d10484e000d4513ea2191ed5266d64 *man/bold_identify.Rd
baa059b290861d9ead4fe02c2d555b72 *man/bold_identify_parents.Rd
d8d8ab5c50ec26127bcfabf1dff7d369 *man/bold_identify_parents.Rd
8e88d9fb4747ba91498276889f99cede *man/bold_seq.Rd
1dc544ccf138e04ce2f470e6d340a521 *man/bold_seqspec.Rd
888ad897d0ba35e6a680c4fde08d6fa8 *man/bold_specimens.Rd
ecdb0aa85bd247e984a4ca58b8530c42 *man/bold_stats.Rd
523f4cb708eeeb6598589822974dd99d *man/bold_stats.Rd
53b9c4a880cfa10f17aeb5e04f644d07 *man/bold_tax_id.Rd
ae740ce75ebf5f8118c443e6c1715a76 *man/bold_tax_name.Rd
9743893655290dd97eed35fe56121c5c *man/bold_trace.Rd
8b6eac4da649615fee64522edaf3bf0b *man/sequences.Rd
594faed0f22f54e58ea511e0ddbfbde0 *tests/fixtures/bold_filter.yml
7b5116a4fddb40ac2d896ce3a53b81a6 *tests/fixtures/bold_identify_ampersands.yml
0d830abed439769157fb4ef5babe0bdf *tests/fixtures/bold_identify_db_param.yml
095858b38b8d1956537a2e6d32c95e7c *tests/fixtures/bold_identify_parents.yml
9f485a15102c54c8f92788546f4e5e8d *tests/fixtures/bold_identify_parents_wide.yml
44d90a7fc2950fdd2da928f43853fe03 *tests/fixtures/bold_identify_parents_wrong_type.yml
9ad453ba58eeec6f262433e2bc7e3e2b *tests/fixtures/bold_identify_response_param.yml
35b408a5259f8cd98bfdf3680a0db890 *tests/fixtures/bold_identify_works.yml
664c37ab8c51c54499e318295f303dd4 *tests/fixtures/bold_seq_works_bin.yml
......@@ -41,6 +46,9 @@ ae740ce75ebf5f8118c443e6c1715a76 *man/bold_tax_name.Rd
a1452c998f2ac8e6e108d035a2b9e416 *tests/fixtures/bold_seqspec_three.yml
4ac4776a727f6ff084018e4bb836a9a9 *tests/fixtures/bold_seqspec_two.yml
24df385752b8626506f02c1f3d7a8128 *tests/fixtures/bold_specimens_response.yml
bca4831249c994b9f6d14fb9053e8fef *tests/fixtures/bold_stats.yml
3adfd9e625a12938b6957097c8efb81d *tests/fixtures/bold_stats_many_taxa.yml
238a8d9176019432f0a8be77b8117c14 *tests/fixtures/bold_stats_response_true.yml
c71caf4ba8da20e9bba66d5b1e0f4c0f *tests/fixtures/bold_tax_id1.yml
2a131837326c6eb9b2598d2254b4e263 *tests/fixtures/bold_tax_id2.yml
af64047a6df1a5e4427fc5de2c5b5f7c *tests/fixtures/bold_tax_id_datatypes_param_basic.yml
......@@ -49,17 +57,20 @@ f36f31d56b60c20905182300062f6fef *tests/fixtures/bold_tax_id_datatypes_param_geo
579a9ccb2ebb0536a28e3fe15378eef8 *tests/fixtures/bold_tax_id_datatypes_param_sequencinglabs.yml
62b5c47b3f8a755badb7eabe3518899a *tests/fixtures/bold_tax_id_datatypes_param_stats1.yml
13e386be7f4d763d36a681a431099474 *tests/fixtures/bold_tax_id_datatypes_param_stats2.yml
176ad849e3e7e3cddd6d253bcd1fd12a *tests/fixtures/bold_tax_id_includetree_param_false.yml
a574237249e8a3f543a99f684efbb033 *tests/fixtures/bold_tax_id_includetree_param_true.yml
ddf57cd5689efff1d34ac661c2606d42 *tests/fixtures/bold_tax_id_multiple_ids.yml
84bbbed186e5e3b5098536e8b647f70f *tests/fixtures/bold_tax_name.yml
f2acf4f63b1c9be6a79d8c9f9cf13d38 *tests/fixtures/bold_tax_name_fuzzy.yml
4bdc017183766f5d9a7a65048f0f287f *tests/fixtures/bold_tax_id_includetree_param.yml
863d1cd0b415526964aa1e65c8ee1954 *tests/fixtures/bold_tax_id_multiple_ids.yml
30237469b2407ced38dfc2946f566698 *tests/fixtures/bold_tax_name.yml
8973c5e6f510cb9ed2ad97733420422b *tests/fixtures/bold_tax_name_fuzzy.yml
d9066883a8fecb16e80ceeef8323edac *tests/test-all.R
c0b5c58f6e2fe58bccb9a0e463e1437d *tests/testthat/bold_identify_list.rda
a0e0c0de2101ce6ca74246dc1a710eeb *tests/testthat/helper-bold.R
b8ebc7d03c680b6358f7b021bdee5ecf *tests/testthat/test-bold_identify.R
db596466435efcf56b15f8802ba00ec2 *tests/testthat/test-bold_seq.R
8e06e7b782416d2e17f19a3ecdd0950c *tests/testthat/test-bold_seqspec.R
83050133bd7f8191ad0d7adb6dd87b08 *tests/testthat/test-bold_specimens.R
3028c2844f2899e3aed06a2ead8f7916 *tests/testthat/test-bold_tax_id.R
54715cb499aef22697bcc5ce13708da7 *tests/testthat/test-bold_tax_name.R
1453679dfb1fd31a85fa5d2baba39545 *vignettes/bold_vignette.Rmd
b7a311e01031126c8ee5af7d7f6be76b *tests/testthat/test-bold_filter.R
75b3e6376fff2886d5ec2b8c354db6e8 *tests/testthat/test-bold_identify.R
bbd8deff90efb38f51abb67b494ee14b *tests/testthat/test-bold_identify_parents.R
b52af57cd83f8cc882486d9e2a663b68 *tests/testthat/test-bold_seq.R
fc46a405dba47527402cf268bf80d056 *tests/testthat/test-bold_seqspec.R
7f2bf3178b95256d1a79dbd6f093b8db *tests/testthat/test-bold_specimens.R
c6faf6b5917bbc53f29f91f483c1f535 *tests/testthat/test-bold_stats.R
f4ab16dcd0e51160b590979081d5710e *tests/testthat/test-bold_tax_id.R
a0ef4f4c55d65b4c8f9f3ac133af954c *tests/testthat/test-bold_tax_name.R
67c9ac7f3486ce313bf6f3907e1e1c50 *vignettes/bold.Rmd
bold 0.9.0
==========
### MINOR IMPROVEMENTS
* improved test coverage (#58)
* allow curl options to be passed into `bold_identify_parents()` (#64)
* fix instructions in README for package `sangerseqR` - instructions depend on which version of R is being used (#65) thanks @KevCaz
### BUG FIXES
* fixes in package for `_R_CHECK_LENGTH_1_LOGIC2_` (#57)
* `bold_identify()` fix: ampersands needed to be escaped (#62) thanks @devonorourke
bold 0.8.6
==========
......
#' Get BOLD specimen + sequence data.
#' Filter BOLD specimen + sequence data (output of bold_seqspec)
#'
#' Picks either shortest or longest sequences, for a given grouping variable
#' (e.g., species name)
#'
#' @export
#' @param x (data.frame) a data.frame, as returned from
......@@ -23,22 +26,13 @@
#' vapply(minn$nucleotides, nchar, 1, USE.NAMES = FALSE)
#' }
bold_filter <- function(x, by, how = "max") {
if (!inherits(x, "data.frame")) stop("'x' must be a data.frame",
call. = FALSE)
if (!how %in% c("min", "max")) stop("'how' must be one of 'min' or 'max'",
call. = FALSE)
if (!by %in% names(x)) stop(sprintf("'%s' is not a valid column in 'x'", by),
call. = FALSE)
if (!inherits(x, "data.frame")) stop("'x' must be a data.frame")
if (!how %in% c("min", "max")) stop("'how' must be one of 'min' or 'max'")
if (!by %in% names(x)) stop(sprintf("'%s' is not a valid column in 'x'", by))
xsp <- split(x, x[[by]])
tibble::as_data_frame(setrbind(lapply(xsp, function(z) {
tibble::as_tibble(setrbind(lapply(xsp, function(z) {
lgts <- vapply(z$nucleotides, function(w) nchar(gsub("-", "", w)), 1,
USE.NAMES = FALSE)
z[eval(parse(text = paste0("which.", how)))(lgts), ]
})))
}
setrbind <- function(x) {
(xxx <- data.table::setDF(
data.table::rbindlist(x, fill = TRUE, use.names = TRUE))
)
}
......@@ -69,6 +69,7 @@ bold_identify <- function(sequences, db = 'COX1', response=FALSE, ...) {
out
} else {
tt <- out$parse('UTF-8')
tt <- gsub("&", "&amp;", tt)
xml <- xml2::read_xml(tt)
nodes <- xml2::xml_find_all(xml, "//match")
toget <- c("ID","sequencedescription","database",
......
......@@ -20,6 +20,8 @@
#' below.
#' @param specimenrecords (character) A specimenrecords name. Optional.
#' See `Filtering` below.
#' @param ... Further args passed on to \code{\link[crul]{verb-GET}}, main
#' purpose being curl debugging
#'
#' @details This function gets unique set of taxonomic names from the input
#' data.frame, then queries \code{\link{bold_tax_name}} to get the
......@@ -66,31 +68,43 @@
#' out <- bold_identify_parents(df, wide = TRUE)
#' str(out)
#' head(out[[1]])
#'
#' x <- bold_seq(taxon = "Satyrium")
#' out <- bold_identify(c(x[[1]]$sequence, x[[13]]$sequence))
#' res <- bold_identify_parents(out)
#' res
#'
#' x <- bold_seq(taxon = 'Diplura')
#' out <- bold_identify(vapply(x, "[[", "", "sequence")[1:20])
#' res <- bold_identify_parents(out)
#' }
bold_identify_parents <- function(x, wide = FALSE, taxid = NULL,
taxon = NULL, tax_rank = NULL, tax_division = NULL, parentid = NULL,
parentname = NULL, taxonrep = NULL, specimenrecords = NULL) {
parentname = NULL, taxonrep = NULL, specimenrecords = NULL, ...) {
UseMethod("bold_identify_parents")
}
#' @export
bold_identify_parents.default <- function(x, wide = FALSE, taxid = NULL,
taxon = NULL, tax_rank = NULL, tax_division = NULL, parentid = NULL,
parentname = NULL, taxonrep = NULL, specimenrecords = NULL) {
stop("no 'bold_identify_parents' method for ", class(x), call. = FALSE)
parentname = NULL, taxonrep = NULL, specimenrecords = NULL, ...) {
stop("no 'bold_identify_parents' method for ", class(x)[1L], call. = FALSE)
}
#' @export
bold_identify_parents.data.frame <- function(x, wide = FALSE, taxid = NULL,
taxon = NULL, tax_rank = NULL, tax_division = NULL, parentid = NULL,
parentname = NULL, taxonrep = NULL, specimenrecords = NULL) {
bold_identify_parents(list(x), wide)
parentname = NULL, taxonrep = NULL, specimenrecords = NULL, ...) {
bold_identify_parents(list(x), wide, taxid, taxon, tax_rank,
tax_division, parentid, parentname, taxonrep, specimenrecords)
}
#' @export
bold_identify_parents.list <- function(x, wide = FALSE, taxid = NULL,
taxon = NULL, tax_rank = NULL, tax_division = NULL, parentid = NULL,
parentname = NULL, taxonrep = NULL, specimenrecords = NULL) {
parentname = NULL, taxonrep = NULL, specimenrecords = NULL, ...) {
assert(wide, "logical")
# get unique set of names
uniqnms <-
......@@ -101,7 +115,7 @@ bold_identify_parents.list <- function(x, wide = FALSE, taxid = NULL,
# get parent names via bold_tax_name and bold_tax_id
out <- stats::setNames(lapply(uniqnms, function(w) {
tmp <- bold_tax_name(w)
tmp <- bold_tax_name(w, ...)
# if length(tmp) > 1, user decides which one
if (NROW(tmp) > 1) {
tmp <- filt(tmp, "taxid", taxid)
......@@ -114,7 +128,7 @@ bold_identify_parents.list <- function(x, wide = FALSE, taxid = NULL,
tmp <- filt(tmp, "specimenrecords", specimenrecords)
}
if (!is.null(tmp$taxid)) {
tmp2 <- bold_tax_id(tmp$taxid, includeTree = TRUE)
tmp2 <- bold_tax_id(tmp$taxid, includeTree = TRUE, ...)
tmp2$input <- NULL
return(tmp2)
} else {
......@@ -126,13 +140,14 @@ bold_identify_parents.list <- function(x, wide = FALSE, taxid = NULL,
# appply parent names to input data
lapply(x, function(z) {
if (is.null(z)) return(NULL)
if (wide) {
# replace each data.frame with a wide version with just
# taxid and taxon name (with col names with rank name)
out <- lapply(out, function(h) do.call("cbind", (apply(h, 1, function(x) {
tmp <- as.list(x[c('taxid', 'taxon')])
tmp <- as.list(x[c("taxid", "taxon")])
tmp$taxid <- as.numeric(tmp$taxid)
data.frame(stats::setNames(tmp, paste0(x['tax_rank'], c('_id', ''))),
data.frame(stats::setNames(tmp, paste0(x["tax_rank"], c("_id", ""))),
stringsAsFactors = FALSE)
}))))
}
......@@ -149,11 +164,12 @@ bold_identify_parents.list <- function(x, wide = FALSE, taxid = NULL,
# function to help filter get_*() functions for a rank name or rank itself ---
filt <- function(df, col, z) {
assert_param(z, deparse(substitute(z)), "character")
if (NROW(df) == 0) {
df
} else {
if (is.null(z)) return(df)
mtch <- grep(sprintf("%s", tolower(z)), tolower(df[,col]))
mtch <- grep(sprintf("%s", tolower(z)), tolower(df[, col]))
if (length(mtch) != 0) {
df[mtch, ]
} else {
......
......@@ -35,7 +35,7 @@ bold_specimens <- function(taxon = NULL, ids = NULL, bin = NULL,
container = NULL, institutions = NULL, researchers = NULL, geo = NULL,
response=FALSE, format = 'tsv', ...) {
format <- match.arg(format, choices = c('xml', 'tsv'))
format <- match.arg(format, choices = c("xml", "tsv"))
args <- bc(list(taxon = pipeornull(taxon), geo = pipeornull(geo),
ids = pipeornull(ids), bin = pipeornull(bin),
container = pipeornull(container),
......
......@@ -34,7 +34,7 @@
#' bold_stats(taxon=c('Coelioxys','Osmia'))
#'
#' ## curl debugging
#' ### These examples below take a long time, so you can set a timeout so that
#' ### These examples below take a long time, so you can set a timeout so that
#' ### it stops by X sec
#' bold_stats(taxon='Osmia', verbose = TRUE)
#' # bold_stats(geo='Costa Rica', timeout_ms = 6)
......
......@@ -49,8 +49,9 @@ bold_tax_name <- function(name, fuzzy = FALSE, response = FALSE, ...) {
process_tax_name <- function(x, y) {
tt <- rawToChar(x$content)
out <- if (x$status_code > 202) "stop" else jsonlite::fromJSON(tt, flatten = TRUE)
if ( length(out) == 0 || identical(out[[1]], list()) || out == "stop" ) {
out <- if (x$status_code > 202) "stop" else
jsonlite::fromJSON(tt, flatten = TRUE)
if (length(out) == 0 || identical(out[[1]], list()) || all(out == "stop")) {
data.frame(input = y, stringsAsFactors = FALSE)
} else {
data.frame(out$top_matched_names, input = y, stringsAsFactors = FALSE)
......
......@@ -39,16 +39,16 @@ check_args_given_nonempty <- function(arguments, x){
process_response <- function(x, y, z, w){
tt <- rawToChar(x$content)
out <- if (x$status_code > 202) "stop" else jsonlite::fromJSON(tt)
if ( length(out) == 0 || identical(out[[1]], list()) || out == "stop" ) {
if ( length(out) == 0 || identical(out[[1]], list()) || any(out == "stop") ) {
data.frame(input = y, stringsAsFactors = FALSE)
} else {
if (w %in% c("stats",'images','geo','sequencinglabs','depository')) out <- out[[1]]
trynames <- tryCatch(as.numeric(names(out)), warning = function(w) w)
if (!inherits(trynames, "simpleWarning")) names(out) <- NULL
if (any(vapply(out, function(x) is.list(x) && length(x) > 0, logical(1)))) {
out <- lapply(out, function(x) Filter(length, x))
out <- lapply(out, function(x) Filter(length, x))
} else {
out <- Filter(length, out)
out <- Filter(length, out)
}
if (!is.null(names(out))) {
df <- data.frame(out, stringsAsFactors = FALSE)
......@@ -87,3 +87,27 @@ strextract <- function(str, pattern) {
strdrop <- function(str, pattern) {
regmatches(str, regexpr(pattern, str), invert = TRUE)
}
assert <- function(x, y) {
if (!is.null(x)) {
if (!inherits(x, y)) {
stop(deparse(substitute(x)), " must be of class ",
paste0(y, collapse = ", "), call. = FALSE)
}
}
}
assert_param <- function(x, param, y) {
if (!is.null(x)) {
if (!inherits(x, y)) {
stop(param, " must be of class ",
paste0(y, collapse = ", "), call. = FALSE)
}
}
}
setrbind <- function(x) {
(xxx <- data.table::setDF(
data.table::rbindlist(x, fill = TRUE, use.names = TRUE))
)
}
......@@ -15,7 +15,7 @@ This package retrieves data from the BOLD database of barcode clusters, and allo
[Documentation for the BOLD API](http://v4.boldsystems.org/index.php/api_home).
See also the taxize book for more options for taxonomic workflows with BOLD: <https://ropensci.github.io/taxize-book/>
See also the taxize book for more options for taxonomic workflows with BOLD: <https://ropenscilabs.github.io/taxize-book/>
## Package status and installation
......@@ -35,7 +35,9 @@ install.packages("bold")
__Development Version__
Install `sangerseqR` first
Install `sangerseqR` first (used in function `bold::bold_trace()` only)
For R < 3.5
```r
......@@ -43,7 +45,15 @@ source("http://bioconductor.org/biocLite.R")
biocLite("sangerseqR")
```
Then `bold`
For R >= 3.5
```r
install.packages('BiocManager')
BiocManager::install('sangerseqR')
```
Then install `bold`
```r
......@@ -66,16 +76,16 @@ Default is to get a list back
```r
bold_seq(taxon='Coelioxys')[[1]]
#> $id
#> [1] "ABEE013-17"
#> [1] "ABEE117-17"
#>
#> $name
#> [1] "Coelioxys afra"
#> [1] "Coelioxys elongata"
#>
#> $gene
#> [1] "ABEE013-17"
#> [1] "ABEE117-17"
#>
#> $sequence
#> [1] "AATATTATATATAATTTTTGCAATTTGATCAGGTATAATTGGATCTTCATTAAGAATAATTATTCGAATAGAATTAAGAACTCCAGGAAGATGAATCAACAACGATCAAATTTATAATTCTTTTATTACAGCTCATGCATTTTTAATAATTTTTTTTTTAGTAATACCATTTTTAATTGGAGGATTTGGAAATTGATTAGTACCTTTAATACTAGGAGCCCCCGATATAGCTTTTCCACGAATAAATAATGTAAGATTTTGACTATTACCTCCCTCAATTTTCTTATTATTATCAAGAACCCTAATTAACCCAAGAGCTGGTACTGGATGAACTGTATATCCTCCTTTATCCTTATATACATTTCATGCCTCACCTTCCGTTGATTTAGCAATTTTTTCACTTCATTTATCAGGAATTTCATCAATTATTGGATCAATAAATTTTATTGTTACAATCTTAATAATAAAAAATTTTTCTTTAAATTATAGACAAATACCATTATTTTCATGATCAGTTTTAATTACTACAATTTTACTTTTATTATCACTACCAATTTTAGCTGGAGCAATTACTATACTCCTATTTGATCGAAATTTAAATACCTCATTCTTTGACCCAATAGGAGGAGGAGATCCAATTTTATATCAACATTTATTT-----------------"
#> [1] "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTATCATTATATACATATCATCCTTCCCCATCAGTTGATTTAGCAATTTTTTYTTTACATTTATCAGGAATTTYTTYTATTATCGGATCAATAAATTTTATTGTAACAATTTTAATAATAAAAAATTATTCAATAAATTATAATCAAATACCTTTATTTCCATGATCAATTTTAATTACTACAATTTTATTATTATTATCATTACCTGTATTAGCAGGAGCTATTACAATATTATTATTTGATCGTAATTTAAATTCATCATTTTTTGACCCAATAGGAGGAGGAGATCCTATTTTATATCAACATTTATTTTG------------------------------------"
```
You can optionally get back the `crul` response object
......@@ -88,7 +98,7 @@ res$response_headers
#> [1] "HTTP/1.1 200 OK"
#>
#> $date
#> [1] "Wed, 14 Nov 2018 00:49:40 GMT"
#> [1] "Thu, 27 Jun 2019 17:28:47 GMT"
#>
#> $server
#> [1] "Apache/2.2.15 (Red Hat)"
......@@ -117,27 +127,20 @@ By default you download `tsv` format data, which is given back to you as a `data
```r
res <- bold_specimens(taxon='Osmia')
head(res[,1:8])
#> processid sampleid recordID catalognum
#> 1 ABEE151-17 NHMW-HYM 773 8362250
#> 2 ABEE184-17 NHMW-HYM 2142 8362283 NHMW-HYM 2142
#> 3 ABEE185-17 NHMW-HYM 2144 8362284 NHMW-HYM 2144
#> 4 ABEE188-17 NHMW-HYM 2242 8362287 NHMW-HYM 2242
#> 5 ABEE190-17 NHMW-HYM 2259 8362289 NHMW-HYM 2259
#> 6 GBAH3878-08 EU726629 856409 EU726629
#> fieldnum
#> 1
#> 2 NBH2 Zimmermann 2017.04.03 Oesterreich WienAUTWienZimmermann
#> 3 NBH2 Zimmermann 2017.04.03 Oesterreich WienAUTWienZimmermann
#> 4 NHB3 Schoder 2017.05.30 Oesterreich WienAUTWienSchoder
#> 5 NBH9 Schoder 2017.06.15 Oesterreich WienAUTWienSchoder
#> 6
#> institution_storing collection_code bin_uri
#> 1 Naturhistorisches Museum Wien NA BOLD:AAE5409
#> 2 Naturhistorisches Museum Wien NA BOLD:AAE5409
#> 3 Naturhistorisches Museum Wien NA BOLD:ADJ1069
#> 4 Naturhistorisches Museum Wien NA BOLD:AAF2155
#> 5 Naturhistorisches Museum Wien NA BOLD:AAD0313
#> 6 Mined from GenBank, NCBI NA BOLD:AAA4494
#> processid sampleid recordID catalognum fieldnum
#> 1 ASGCB255-13 BIOUG07489-F04 3955532 BIOUG07489-F04
#> 2 BEECA122-06 04-ON-0122 281152 04-ON-0122
#> 3 BEECA186-06 03-BC-0186 281216 03-BC-0186
#> 4 BEECA373-06 05-NT-0373 514740 05-NT-0373
#> 5 BEECA501-06 06-ON-0501 514868 06-ON-0501
#> 6 BEECA601-06 06-YT-0601 516953 06-YT-0601
#> institution_storing collection_code bin_uri
#> 1 Biodiversity Institute of Ontario NA BOLD:ABZ2181
#> 2 York University, Packer Collection NA BOLD:AAC8510
#> 3 York University, Packer Collection NA BOLD:AAC2237
#> 4 York University, Packer Collection NA BOLD:AAI2013
#> 5 York University, Packer Collection NA BOLD:AAC5789
#> 6 York University, Packer Collection NA BOLD:AAB4644
```
### Search for specimen plus sequence data
......@@ -148,11 +151,11 @@ By default you download `tsv` format data, which is given back to you as a `data
```r
res <- bold_seqspec(taxon='Osmia', sepfasta=TRUE)
res$fasta[1:2]
#> $`ABEE151-17`
#> [1] "----------------------------------------------------------TTTTTGCTATATGATCAGGTACAGTAGGTTCAGCTATAAGAATTATTATTCGAATAGAACTTAGAGTTCCAGGATCATGAATTTCTAATGACCAAATTTATAATACTTTAGTAACTGCTCATGCTTTTTTAATAATTTTCTTTCTTGTAATACCATTTCTAATTGGAGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCCTTTCCACGAATAAATAATATTAGATTTTGACTTTTACCACCTTCTTTAATATTATTAATATTAAGAAATTTTATAAATCCAAGTCCAGGAACTGGATGAACTGTTTATCCTCCTCTTTCATCTTATATATTTCATTCTTCCCCATCAGTAGATTTAGCAATTTTTTCATTACATATTTCCGGATTATCCTCTATTATAGGTTCATTAAATTTTATTGTCACAATTATTATAATAAAAAATATTTCATTAAAACATACTCAATTACCCTTATTTTCTTGATCTGTATTTATTACTACTATTTTATTACTTTTCTCTCTCCCAGTTTTAGCTGGAGCTATTACTATACTTTTATTTGATCGAAATTTTAACACCTCATTTTTTGACCCGACGGGAGGTGGAGATCCAATTTTATACCAACATTTATTTTGATTTTTTGGACAT-----------------------"
#> [[1]]
#> NULL
#>
#> $`ABEE184-17`
#> [1] "-------CTCACTATAGGGATTCAACCAATCATAAAGATATTGGAATTCTTTATATAATTTTTGCTATATGATCAGGTACAGTAGGTTCAGCTATAAGAATTATTATTCGAATAGAACTTAGAGTTCCAGGATCATGAATTTCTAATGACCAAATTTATAATACTTTAGTAACTGCTCATGCTTTTTTAATAATTTTCTTTCTTGTAATACCATTTCTAATTGGAGGATTTGGAAATTGATTAATTCCTTTAATATTAGGAATTCCAGATATAGCCTTTCCACGAATAAATAATATTAGATTTTGACTTTTACCACCTTCTTTAATATTATTAATATTAAGAAATTTTATAAATCCAAGTCCAGGAACTGGATGAACTGTTTATCCTCCTCTTTCATCTTATATATTTCATTCTTCCCCATCAGTAGATTTAGCAATTTTTTCATTACATATTTCCGGATTATCCTCTATTATAGGTTCATTAAATTTTATTGTCACAATTATTATAATAAAAAATATTTCATTAAAACATACTCAATTACCCTTATTTTCTTGATCTGTATTTATTACTACTATTTTATTACTTTTCTCTCTCCCAGTTTTAGCTGGAGCTATTACTATACTTTTATTTGATCGAAATTTTAACACCTCATTTTTTGACC-------------------------------------------------------------------------------"
#> [[2]]
#> NULL
```
Or you can index to a specific sequence like
......@@ -160,8 +163,8 @@ Or you can index to a specific sequence like
```r
res$fasta['GBAH0293-06']
#> $`GBAH0293-06`
#> [1] "------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------TTAATGTTAGGGATTCCAGATATAGCTTTTCCACGAATAAATAATATTAGATTTTGACTGTTACCTCCATCTTTAATATTATTACTTTTAAGAAATTTTTTAAATCCAAGTCCTGGAACAGGATGAACAGTTTATCCTCCTTTATCATCAAATTTATTTCATTCTTCTCCTTCAGTTGATTTAGCAATTTTTTCTTTACATATTTCAGGTTTATCTTCTATTATAGGTTCATTAAATTTTATTGTTACAATTATTATAATAAAAAATATTTCTTTAAAATATATTCAATTACCTTTATTTTCTTGATCTGTATTTATTACTACTATTCTTTTATTATTTTCTTTACCTGTATTAGCTGGAGCTATTACTATATTATTATTTGATCGAAATTTTAATACATCTTTTTTTGATCCAACAGGAGGGGGAGATCCAATTCTTTATCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGATTAATTTCTCAAATTATTTCTAATGAAAGAGGAAAAAAAGAAACTTTTGGAAATATTGGTATAATTTATGCTATATTAAGAATTGGACTTTTAGGTTTTATTGTT---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------"
#> [[1]]
#> NULL
```
### Get trace files
......@@ -201,6 +204,13 @@ Using `taxize::downstream` get children of _Arthropoda_
```r
x <- downstream("Arthropoda", db = "ncbi", downto = "class")
#> ══ 1 queries ═══════════════
#> ✔ Found: Arthropoda
#> ══ Results ═════════════════
#>
#> ● Total: 1
#> ● Found: 1
#> ● Not Found: 0
nms <- x$Arthropoda$childtaxa_name
```
......@@ -250,6 +260,8 @@ Get citation information for `bold` in R by running: `citation(package = 'bold')
* Please [report any issues or bugs](https://github.com/ropensci/bold/issues)
* License: MIT
* Get citation information for `bold` in R doing `citation(package = 'bold')`
* Please note that this project is released with a [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree to abide by its terms.
* Please note that this project is released with a [Contributor Code of Conduct][coc]. By participating in this project you agree to abide by its terms.
[![ropensci_footer](https://ropensci.org/public_images/github_footer.png)](https://ropensci.org)
[coc]: https://github.com/ropensci/bold/blob/master/CODE_OF_CONDUCT.md
No preview for this file type
......@@ -13,7 +13,7 @@
+ [BOLD home page](http://boldsystems.org/)
+ [BOLD API docs](http://v4.boldsystems.org/index.php/api_home)
See also the taxize book for more options for taxonomic workflows with BOLD: <https://ropensci.github.io/taxize-book/>
See also the taxize book for more options for taxonomic workflows with BOLD: <https://ropenscilabs.github.io/taxize-book/>
### Using bold
......
......@@ -2,7 +2,7 @@
% Please edit documentation in R/bold_filter.R
\name{bold_filter}
\alias{bold_filter}
\title{Get BOLD specimen + sequence data.}
\title{Filter BOLD specimen + sequence data (output of bold_seqspec)}
\usage{
bold_filter(x, by, how = "max")
}
......@@ -26,7 +26,8 @@ sequence, respectively. Note that we remove gap/alignment characters
a tibble/data.frame
}
\description{
Get BOLD specimen + sequence data.
Picks either shortest or longest sequences, for a given grouping variable
(e.g., species name)
}
\examples{
\dontrun{
......
......@@ -6,7 +6,7 @@
\usage{
bold_identify_parents(x, wide = FALSE, taxid = NULL, taxon = NULL,
tax_rank = NULL, tax_division = NULL, parentid = NULL,
parentname = NULL, taxonrep = NULL, specimenrecords = NULL)
parentname = NULL, taxonrep = NULL, specimenrecords = NULL, ...)
}
\arguments{
\item{x}{(data.frame/list) list of data.frames - the output from a call to
......@@ -37,6 +37,9 @@ below.}
\item{specimenrecords}{(character) A specimenrecords name. Optional.
See `Filtering` below.}
\item{...}{Further args passed on to \code{\link[crul]{verb-GET}}, main
purpose being curl debugging}
}
\value{
a list of the same length as the input
......@@ -93,5 +96,14 @@ head(out[[1]])
out <- bold_identify_parents(df, wide = TRUE)
str(out)
head(out[[1]])
x <- bold_seq(taxon = "Satyrium")
out <- bold_identify(c(x[[1]]$sequence, x[[13]]$sequence))
res <- bold_identify_parents(out)
res
x <- bold_seq(taxon = 'Diplura')
out <- bold_identify(vapply(x, "[[", "", "sequence")[1:20])
res <- bold_identify_parents(out)
}
}
......@@ -72,7 +72,7 @@ res$response_headers
bold_stats(taxon=c('Coelioxys','Osmia'))
## curl debugging
### These examples below take a long time, so you can set a timeout so that
### These examples below take a long time, so you can set a timeout so that
### it stops by X sec
bold_stats(taxon='Osmia', verbose = TRUE)
# bold_stats(geo='Costa Rica', timeout_ms = 6)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
http_interactions:
- request:
method: get
uri: http://v4.boldsystems.org/index.php/API_Public/stats?taxon=Osmia&dataType=drill_down&format=json
body:
encoding: ''
string: ''
headers:
User-Agent: libcurl/7.54.0 r-curl/3.3 crul/0.7.0.9100
Accept-Encoding: gzip, deflate
Accept: application/json, text/xml, application/xml, */*
response:
status:
status_code: '200'
message: OK
explanation: Request fulfilled, document follows
headers:
status: HTTP/1.1 200 OK
date: Mon, 28 Jan 2019 23:47:49 GMT
server: Apache/2.2.15 (Red Hat)
x-powered-by: PHP/5.3.15
content-disposition: attachment; filename=bold_data.json
content-length: '800'
connection: close
content-type: application/x-download
body:
encoding: UTF-8
string: '{"total_records":904,"records_with_species_name":875,"bins":{"count":151,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"countries":{"count":20,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"depositories":{"count":18,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"order":{"count":1,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"family":{"count":1,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"genus":{"count":1,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"species":{"count":130,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}}}'
recorded_at: 2019-01-28 23:47:49 GMT
recorded_with: vcr/0.2.2, webmockr/0.3.0
http_interactions:
- request:
method: get
uri: http://v4.boldsystems.org/index.php/API_Public/stats?taxon=Coelioxys%7COsmia&dataType=drill_down&format=json
body:
encoding: ''
string: ''
headers:
User-Agent: libcurl/7.54.0 r-curl/3.3 crul/0.7.0.9100
Accept-Encoding: gzip, deflate
Accept: application/json, text/xml, application/xml, */*
response:
status:
status_code: '200'
message: OK
explanation: Request fulfilled, document follows
headers:
status: HTTP/1.1 200 OK
date: Mon, 28 Jan 2019 23:47:50 GMT
server: Apache/2.2.15 (Red Hat)
x-powered-by: PHP/5.3.15
content-disposition: attachment; filename=bold_data.json
content-length: '802'
connection: close
content-type: application/x-download
body:
encoding: UTF-8
string: '{"total_records":1219,"records_with_species_name":1184,"bins":{"count":211,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"countries":{"count":31,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"depositories":{"count":21,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"order":{"count":1,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"family":{"count":1,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"genus":{"count":2,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"species":{"count":183,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}}}'
recorded_at: 2019-01-28 23:47:51 GMT
recorded_with: vcr/0.2.2, webmockr/0.3.0
http_interactions:
- request:
method: get
uri: http://v4.boldsystems.org/index.php/API_Public/stats?taxon=Osmia&dataType=drill_down&format=json
body:
encoding: ''
string: ''
headers:
User-Agent: libcurl/7.54.0 r-curl/3.3 crul/0.7.0.9100
Accept-Encoding: gzip, deflate
Accept: application/json, text/xml, application/xml, */*
response:
status:
status_code: '200'
message: OK
explanation: Request fulfilled, document follows
headers:
status: HTTP/1.1 200 OK
date: Mon, 28 Jan 2019 23:47:49 GMT
server: Apache/2.2.15 (Red Hat)
x-powered-by: PHP/5.3.15
content-disposition: attachment; filename=bold_data.json
content-length: '800'
connection: close
content-type: application/x-download
body:
encoding: UTF-8
string: '{"total_records":904,"records_with_species_name":875,"bins":{"count":151,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"countries":{"count":20,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"depositories":{"count":18,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"order":{"count":1,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"family":{"count":1,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"genus":{"count":1,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}},"species":{"count":130,"drill_down":{"entity":[{"name":null,"records":null},{"name":null,"records":null}]}}}'
recorded_at: 2019-01-28 23:47:50 GMT
recorded_with: vcr/0.2.2, webmockr/0.3.0
http_interactions:
- request:
method: get
uri: http://v4.boldsystems.org/index.php/API_Tax/TaxonData?taxId=88899&dataTypes=basic
body:
encoding: ''
string: ''