test-IO.R 17.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
################################################################################
# Use testthat to test file import and resulting class (and values)
################################################################################
library("phyloseq"); library("testthat")
# # # # TESTS!

################################################################################
# import_mothur tests
mothlist  <- system.file("extdata", "esophagus.fn.list.gz", package="phyloseq")
mothgroup <- system.file("extdata", "esophagus.good.groups.gz", package="phyloseq")
mothtree  <- system.file("extdata", "esophagus.tree.gz", package="phyloseq")
cutoff    <- "0.10"
esophman  <- import_mothur(mothlist, mothgroup, mothtree, cutoff)	
# mothur "Shared" file, create with mothur from these example data files
mothshared = system.file("extdata", "esophagus.fn.shared.gz", package="phyloseq")
constaxonomy = system.file("extdata", "mothur_example.cons.taxonomy.gz", package="phyloseq")

test_that("import_mothur: import of esophagus dataset from mothur files in extdata/ produces a phyloseq object", {
	expect_that(esophman, is_a("phyloseq"))
})

test_that("import_mothur: The two phyloseq objects, example and just-imported, are identical", {
	data("esophagus")
	expect_that(esophagus, is_equivalent_to(esophman))
})

test_that("import_mothur: Test mothur file import on the (esophagus data).", {
	smlc <- show_mothur_cutoffs(mothlist)
	expect_that(smlc, is_equivalent_to(c("unique", "0.00", "0.01", "0.02", "0.03", "0.04", "0.05", "0.06", "0.07", "0.08", "0.09", "0.10")))	
})

test_that("import_mothur: abundances can be manipulated mathematically", {
	x1 <- as(otu_table(esophman), "matrix")
	expect_that(2*x1-x1, is_equivalent_to(x1) )
})

test_that("import_mothur: empty stuff is NULL", {
	expect_that(tax_table(esophman, FALSE), is_a("NULL"))
	expect_that(sample_data(esophman, FALSE), is_a("NULL"))
})

test_that("import_mothur: Expected classes of non-empty components", {
	expect_that(otu_table(esophman), is_a("otu_table"))
	expect_that(phy_tree(esophman), is_a("phylo"))
})

test_that("import_mothur: imported files become S4 object", {
	expect_that(isS4(esophman), is_true())
})

test_that("import_mothur: show method output tests", {
  expect_output(print(esophman), "phyloseq-class experiment-level object")
})

test_that("Test newer supported mothur output files, constaxonomy and shared files", {
  # Shared file
  sharedOTU = import_mothur(mothur_shared_file=mothshared, cutoff=cutoff)
  expect_is(sharedOTU, "otu_table")
  expect_equivalent(as(sharedOTU[1:5, ], "matrix")[, "B"], c(50, 37, 14, 52, 2))
  expect_equivalent(as(sharedOTU[1, ], "matrix")[1, ], c(50, 19, 5))
  # Constaxonomy file
  tax = import_mothur(mothur_constaxonomy_file=constaxonomy)
  expect_is(tax, "taxonomyTable")
})

################################################################################
# import_RDP tests
test_that("the import_RDP_otu function can properly read gzipped-example", {
  # Setup data
	otufile <- system.file("extdata",
	                       "rformat_dist_0.03.txt.gz",
	                       package="phyloseq")
	ex_otu  <- import_RDP_otu(otufile)	
	# test expectations
	expect_output(print(head(t(ex_otu))), "OTU Table:")
	expect_is(ex_otu, "otu_table")
	expect_equal(ntaxa(ex_otu), 5276)
	expect_equal(nsamples(ex_otu), 14)
	expect_is(sample_sums(ex_otu), "numeric")
})


################################################################################
# import_qiime tests
################################################################################
otufile <- system.file("extdata", "GP_otu_table_rand_short.txt.gz", package="phyloseq")
mapfile <- system.file("extdata", "master_map.txt", package="phyloseq")
trefile <- system.file("extdata", "GP_tree_rand_short.newick.gz", package="phyloseq")
rs_file <- system.file("extdata", "qiime500-refseq.fasta", package="phyloseq")

t0 <- import_qiime(otufile, mapfile, trefile, rs_file, verbose=FALSE)
test_that("Class of import result is phyloseq-class", {
93
	expect_is(t0, "phyloseq")
94 95 96
})

test_that("Classes of components are as expected", {
97 98 99 100 101
	expect_is(otu_table(t0), ("otu_table"))
	expect_is(tax_table(t0), ("taxonomyTable"))
	expect_is(sample_data(t0), ("sample_data"))
	expect_is(phy_tree(t0), ("phylo"))		
	expect_is(refseq(t0), ("DNAStringSet"))
102 103 104
})

test_that("Features of the abundance data are consistent, match known values", {
105 106 107 108 109 110 111 112
	expect_equal(sum(taxa_sums(t0)), (1269671L))
	expect_equal(sum(taxa_sums(t0)==0), (5L))
	expect_equal(sum(taxa_sums(t0)>=100), (183L))
	expect_equal(sum(taxa_sums(t0)), (sum(sample_sums(t0))))
	expect_equal(sum(sample_sums(t0) > 10000L), (20L))
	expect_equal(nsamples(t0), (26L))
	expect_equal(ntaxa(t0), (500L))
	expect_equal(length(rank_names(t0)), (7L))
113 114 115
})

test_that("Features of the taxonomy table match expected values", {
116
	expect_equal(length(rank_names(t0)), (7L))
117 118 119
	expect_equal(rank_names(t0), 
               c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"))
	tax53 = as(tax_table(t0), "matrix")[53, ]
120 121 122 123 124
	expect_equivalent(
	  tax53, 
	  c("Bacteria", "Proteobacteria", "Deltaproteobacteria",
	    "Desulfovibrionales", "Desulfomicrobiaceae", 
	    "Desulfomicrobium", "Desulfomicrobiumorale"))
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
})
################################################################################
# parse function tests - note, these are also used by import_biom

test_that("Taxonomy vector parsing functions behave as expected", {
		
	chvec1 = c("Bacteria", "Proteobacteria", "Gammaproteobacteria",
		"Enterobacteriales", "Enterobacteriaceae", "Escherichia")
	
	chvec2 = c("k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria",
		"o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__")
	
	chvec3 = c("Root", "k__Bacteria", "p__Firmicutes", "c__Bacilli",
		"o__Bacillales", "f__Staphylococcaceae")
	
	# Example where only some entries have greengenes prefix.
	chvec4 = c("Root", "k__Bacteria", "Firmicutes", "c__Bacilli",
		"o__Bacillales", "Staphylococcaceae", "z__mistake")

	# Even more terrible example, where leading or trailing space characters included
	# (the exact weirdnes of chvec4, compounded by leading and/or trailing space characters)
	chvec5 = c("  Root \n ", " k__Bacteria", "  Firmicutes", " c__Bacilli   ",
		"o__Bacillales  ", "Staphylococcaceae ", "\t z__mistake \t\n")		

	# This should give a warning because there were no greengenes prefixes
	expect_warning(t1 <- parse_taxonomy_greengenes(chvec1))
	# And output from previous call, t1, should be identical to default
	expect_that(parse_taxonomy_default(chvec1), is_equivalent_to(t1))
	
	# All the greengenes entries get trimmed by parse_taxonomy_greengenes
	expect_that(all(sapply(chvec2, nchar) > sapply(parse_taxonomy_greengenes(chvec2), nchar)), is_true())
	# None of the greengenes entries are trimmed by parse_taxonomy_default
	expect_that(any(sapply(chvec2, nchar) > sapply(parse_taxonomy_default(chvec2), nchar)), is_false())
	
	# Check that the "Root" element is not removed by parse_taxonomy_greengenes and parse_taxonomy_default.
	expect_that("Root" %in% chvec3, is_true())
	expect_that("Root" %in% parse_taxonomy_default(chvec3), is_true())
	expect_that(length(parse_taxonomy_default(chvec3)) == length(chvec3), is_true())
	
	# Check that non-greengenes prefixes, and those w/o prefixes, are given dummy rank(s)
	chvec4ranks = names(parse_taxonomy_greengenes(chvec4))
	expect_that(grep("Rank", chvec4ranks, fixed=TRUE), is_equivalent_to(c(1, 3, 6, 7)))
	# Check that everything given dummy rank in default parse.
	chvec4ranks = names(parse_taxonomy_default(chvec4))
	expect_that(grep("Rank", chvec4ranks, fixed=TRUE), is_equivalent_to(1:7))
	
	# chvec4 and chvec5 result in identical vectors.
	expect_that(parse_taxonomy_default(chvec4), is_equivalent_to(parse_taxonomy_default(chvec5)))
	expect_that(parse_taxonomy_greengenes(chvec4), is_equivalent_to(parse_taxonomy_greengenes(chvec5)))	
	
	# The names of chvec5, greengenes parsed, should be...
	correct5names = c("Rank1", "Kingdom", "Rank3", "Class", "Order", "Rank6", "Rank7")
	expect_that(names(parse_taxonomy_greengenes(chvec5)), is_equivalent_to(correct5names))
})

################################################################################
# import_biom tests

rich_dense_biom  <- system.file("extdata", "rich_dense_otu_table.biom",  package="phyloseq")
rich_sparse_biom <- system.file("extdata", "rich_sparse_otu_table.biom", package="phyloseq")
min_dense_biom   <- system.file("extdata", "min_dense_otu_table.biom",   package="phyloseq")
min_sparse_biom  <- system.file("extdata", "min_sparse_otu_table.biom",  package="phyloseq")
# the tree and refseq file paths that are suitable for all biom format style examples
treefilename = system.file("extdata", "biom-tree.phy",  package="phyloseq")
refseqfilename = system.file("extdata", "biom-refseq.fasta",  package="phyloseq")

test_that("Importing biom files yield phyloseq objects", {
	library("biomformat")
193
	rdbiom = read_biom(rich_dense_biom)
194 195 196 197 198
	rsbiom = read_biom(rich_sparse_biom)

	rich_dense = import_biom(rdbiom)
	rich_sparse = import_biom(rsbiom)

199 200
	expect_is(rich_dense,  ("phyloseq"))
	expect_is(rich_sparse, ("phyloseq"))
201
	
202 203
	expect_equal(ntaxa(rich_dense), (5L))
	expect_equal(ntaxa(rich_sparse), (5L))
204 205 206

	# # Component classes
	# sample_data
207 208
	expect_is(access(rich_dense,  "sam_data"), ("sample_data"))
	expect_is(access(rich_sparse, "sam_data"), ("sample_data"))
209 210

	# taxonomyTable
211 212
	expect_is(access(rich_dense,  "tax_table"), ("taxonomyTable"))
	expect_is(access(rich_sparse, "tax_table"), ("taxonomyTable"))
213 214

	# otu_table
215 216
	expect_is(access(rich_dense,  "otu_table"), ("otu_table"))
	expect_is(access(rich_sparse, "otu_table"), ("otu_table"))
217 218
})

219
test_that("The different types of biom files yield phyloseq objects", {
220 221 222 223 224
	rich_dense = import_biom(rich_dense_biom, treefilename, refseqfilename, parseFunction=parse_taxonomy_greengenes)
	rich_sparse = import_biom(rich_sparse_biom, treefilename, refseqfilename, parseFunction=parse_taxonomy_greengenes)
	min_dense = import_biom(min_dense_biom, treefilename, refseqfilename, parseFunction=parse_taxonomy_greengenes)
	min_sparse = import_biom(min_sparse_biom, treefilename, refseqfilename, parseFunction=parse_taxonomy_greengenes)
	
225 226 227 228
	expect_is(rich_dense,  ("phyloseq"))
	expect_is(rich_sparse, ("phyloseq"))
	expect_is(min_dense,   ("phyloseq"))
	expect_is(min_sparse,  ("phyloseq"))
229

230 231 232 233
	expect_equal(ntaxa(rich_dense), (5L))
	expect_equal(ntaxa(rich_sparse), (5L))
	expect_equal(ntaxa(min_dense), (5L))
	expect_equal(ntaxa(min_sparse), (5L))			
234 235 236

	# # Component classes
	# sample_data
237 238 239 240
	expect_is(access(rich_dense,  "sam_data"), ("sample_data"))
	expect_is(access(rich_sparse, "sam_data"), ("sample_data"))
	expect_is(access(min_dense,   "sam_data"), ("NULL"))
	expect_is(access(min_sparse,  "sam_data"), ("NULL"))
241 242

	# taxonomyTable
243 244 245 246
	expect_is(access(rich_dense,  "tax_table"), ("taxonomyTable"))
	expect_is(access(rich_sparse, "tax_table"), ("taxonomyTable"))
	expect_is(access(min_dense,   "tax_table"), ("NULL"))
	expect_is(access(min_sparse,  "tax_table"), ("NULL"))		
247 248
	
	# phylo tree
249 250 251 252
	expect_is(access(rich_dense,  "phy_tree"), ("phylo"))
	expect_is(access(rich_sparse, "phy_tree"), ("phylo"))
	expect_is(access(min_dense,   "phy_tree"), ("phylo"))
	expect_is(access(min_sparse,  "phy_tree"), ("phylo"))
253 254

	# reference sequences
255 256 257 258 259 260 261 262
	expect_true(inherits(access(rich_dense,  "refseq"), "XStringSet"))
	expect_true(inherits(access(rich_sparse,  "refseq"), "XStringSet"))
	expect_true(inherits(access(min_dense,  "refseq"), "XStringSet"))
	expect_true(inherits(access(min_sparse,  "refseq"), "XStringSet"))
	expect_is(access(rich_dense,  "refseq"), ("DNAStringSet"))
	expect_is(access(rich_sparse, "refseq"), ("DNAStringSet"))
	expect_is(access(min_dense,   "refseq"), ("DNAStringSet"))
	expect_is(access(min_sparse,  "refseq"), ("DNAStringSet"))	
263 264
		
	# otu_table		
265 266 267 268
	expect_is(access(rich_dense,  "otu_table"), ("otu_table"))
	expect_is(access(rich_sparse, "otu_table"), ("otu_table"))
	expect_is(access(min_dense,   "otu_table"), ("otu_table"))
	expect_is(access(min_sparse,  "otu_table"), ("otu_table"))
269 270 271 272
	
	# Compare values in the otu_table. For some reason the otu_tables are not identical
	# one position is plus-two, another is minus-two
	combrich <- c(access(rich_dense, "otu_table"), access(rich_sparse, "otu_table"))
273 274 275
	expect_equivalent(sum(diff(combrich, length(access(rich_dense, "otu_table")))), (0))
	expect_equivalent(max(diff(combrich, length(access(rich_dense, "otu_table")))), (2))
	expect_equivalent(min(diff(combrich, length(access(rich_dense, "otu_table")))), (-2))
276
	combmin <- c(access(min_dense, "otu_table"), access(min_sparse, "otu_table"))
277 278 279
	expect_equivalent(sum(diff(combmin, length(access(min_dense, "otu_table")))), (0))
	expect_equivalent(max(diff(combmin, length(access(min_dense, "otu_table")))), (2))
	expect_equivalent(min(diff(combmin, length(access(min_dense, "otu_table")))), (-2))
280

281 282
	expect_equivalent(access(min_dense, "otu_table"), (access(rich_dense, "otu_table")))
	expect_equivalent(access(min_sparse, "otu_table"), (access(rich_sparse, "otu_table")))
283 284

	# Compare values in the sample_data
285
	expect_equivalent(access(rich_dense, "sam_data"), (access(rich_sparse, "sam_data")))
286 287
	
	# Compare values in the taxonomyTable
288
	expect_equivalent(access(rich_dense, "tax_table"), (access(rich_sparse, "tax_table")))
289 290 291 292 293 294

})

test_that("the import_biom and import(\"biom\", ) syntax give same result", {
	x1 <- import_biom(rich_dense_biom, parseFunction=parse_taxonomy_greengenes)
	x2 <- import("biom", BIOMfilename=rich_dense_biom, parseFunction=parse_taxonomy_greengenes)	
295
	expect_equivalent(x1, x2)
296 297 298 299 300 301
})
################################################################################
# read_tree tests
test_that("The read_tree function works as expected:", {
	GPNewick <- read_tree(system.file("extdata", "GP_tree_rand_short.newick.gz", package="phyloseq"))
	expect_that(GPNewick, is_a("phylo"))
302 303 304 305
	expect_equal(ntaxa(GPNewick), length(GPNewick$tip.label))
	expect_equal(ntaxa(GPNewick), 500L)
	expect_equal(GPNewick$Nnode, 499L)
	expect_equivalent(taxa_names(GPNewick), GPNewick$tip.label)
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
	# Now read a nexus tree... 
	# Some error-handling expectations
	expect_that(read_tree("alskflsakjsfskfhas.akshfaksj"), gives_warning()) # file not exist
	not_tree <- system.file("extdata", "esophagus.good.groups.gz", package="phyloseq")
	expect_that(read_tree(not_tree), is_a("NULL")) # file not a tree, gives NULL
	expect_that(read_tree(not_tree, TRUE), throws_error()) # file not a tree, check turned on/TRUE
})
# read_tree_greengenes
test_that("The specialized read_tree_greengenes function works:", {
  # The included, gzipped version of the the 13_5 73% similarity greengenes tree.
  # It causes ape::read.tree to fail with an error, but read_tree_greengenes should be fine.
  treefile = system.file("extdata", "gg13-5-73.tree.gz", package="phyloseq")
  x = read_tree_greengenes(treefile)
  expect_is(x, "phylo")
  # Happen to know that all OTU names should be numbers.
  expect_match(x$tip.label, "[[:digit:]]+")
  # All tip/OTU names should be unique
  expect_false(any(duplicated(taxa_names(x))))
324 325 326
  # The more general read_tree function reads, but they are not equal
  y = read_tree(treefile)
  expect_false(all.equal(x, y))
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
})
################################################################################
# microbio_me_qiime tests
# This tests different features and expected behavior for
# the functioning of an interface function to the 
# microbio.me/qiime data repository.
#
zipfile = "study_816_split_library_seqs_and_mapping.zip"
zipfile = system.file("extdata", zipfile, package="phyloseq")
tarfile = "study_816_split_library_seqs_and_mapping.tar.gz"
tarfile = system.file("extdata", tarfile, package="phyloseq")
tarps = suppressWarnings(microbio_me_qiime(tarfile))
zipps = suppressWarnings(microbio_me_qiime(zipfile))
# This function is intended to interface with an external server,
# as described in the documentation.
# However, I don't want successful testing of this package to 
# rely on the presence or form of particular files on an 
# external server, so these tests will be done exclusively on 
# compressed file(s) representing what is exposed by the data server
# It is up to the user to provide valid a URL in practice,
# and the function attempts to provide informative status
# and error messages if things go awry.
test_that("The microbio_me_qiime imports as expected: .tar.gz", {
  expect_is(tarps, "phyloseq")
  expect_is(sample_data(tarps, errorIfNULL=FALSE), "sample_data")
  expect_is(otu_table(tarps, errorIfNULL=FALSE), "otu_table")
  expect_identical(nrow(otu_table(tarps)), 50L)
  expect_identical(nrow(sample_data(tarps)), 15L)
})
test_that("The microbio_me_qiime imports as expected: .zip", {  
  expect_is(zipps, "phyloseq")
  expect_is(sample_data(zipps, errorIfNULL=FALSE), "sample_data")
  expect_is(otu_table(zipps, errorIfNULL=FALSE), "otu_table")
  expect_identical(nrow(otu_table(zipps)), 50L)
  expect_identical(nrow(sample_data(zipps)), 15L)
})
test_that("Results of .tar.gz and .zip should be identical", {  
  expect_identical(tarps, zipps)
  expect_identical(sample_data(tarps), sample_data(zipps))
  expect_identical(otu_table(tarps), otu_table(zipps))
})
################################################################################
# import_usearch_uc
################################################################################
usearchfile = system.file("extdata", "usearch.uc", package="phyloseq")
OTU1 = import_usearch_uc(usearchfile)
test_that("import_usearch_uc: Properly omit entries from failed search", {  
  ucLines = readLines(usearchfile)
  expect_identical( sum(OTU1), (length(ucLines) - length(grep("*", ucLines, fixed=TRUE))) )
  expect_identical( nrow(OTU1), 37L)
  expect_identical( nrow(OTU1), nsamples(OTU1))
  expect_identical( ncol(OTU1), ntaxa(OTU1))
  expect_identical( ncol(OTU1), 33L)
  expect_equivalent(colSums(OTU1)[1:6], c(6, 1, 2, 1, 1, 1))
})
################################################################################