Commit a6536245 authored by Andreas Tille's avatar Andreas Tille

New upstream version 2.2-6

parents
Package: prabclus
Title: Functions for Clustering of Presence-Absence, Abundance and
Multilocus Genetic Data
Version: 2.2-6
Date: 2015-01-14
Author: Christian Hennig <c.hennig@ucl.ac.uk>,
Bernhard Hausdorf <Hausdorf@zoologie.uni-hamburg.de>
Depends: R (>= 2.10), MASS, mclust
Suggests: spdep, maptools, foreign, mvtnorm
Description: Distance-based parametric bootstrap tests for clustering with
spatial neighborhood information. Some distance measures,
Clustering of presence-absence, abundance and multilocus genetical data
for species delimitation, nearest neighbor
based noise detection. Try package?prabclus for on overview.
Maintainer: Christian Hennig <c.hennig@ucl.ac.uk>
License: GPL
URL: http://www.homepages.ucl.ac.uk/~ucakche
Packaged: 2015-01-14 18:16:04 UTC; chrish
NeedsCompilation: no
Repository: CRAN
Date/Publication: 2015-01-14 22:49:24
0a750f1de08c409ca729b05dca16878e *DESCRIPTION
a90e954c24f5e2d12f77bc865afbd330 *NAMESPACE
9f070d14719b47e9d668a5f07ea95925 *R/NNclean.R
07436e115dbdc0ca7d8d96fce5620ea0 *R/abundfunctions.R
115b45f8378c745df5e8b1d06309f1ef *R/alleletest.R
f2a6aea419b2c435341fe67e2f9824f7 *R/autoreg.R
9f6e91d410bdc0a380b06560f3073db4 *R/cluspop.nb.R
19284a1ed4dfea41edf6e4e2303a79e3 *R/comp.test.R
1f266e319e9e57a4ffc62dcd0f9b4a7b *R/concomp.R
a077c04e17e30675a674c75543105aa2 *R/conregmat.R
c22e467136abc048adf0163d96505219 *R/distratio.R
373884ce2bb9bb7815d86d21df673354 *R/geodist.R
59f102ad318a5e9c1ca7a4c67115caaa *R/homogen.test.R
ce325d9a5293464c2f055fc757228cdd *R/incmatrix.R
36e08ea4257575c18dd6280f755e7d64 *R/jaccard.R
eef84c86efc5fa0fbaab5a8d19c88421 *R/kulczynski.R
a0648af343d515b17ea643093a7fce6f *R/lcomponent.R
7debeb4eb91c66b2f7b7281749f3d3f6 *R/nbtest.R
7d6dfeda66e615a3990bb7a8b2fda954 *R/newprabmatrix.R
26243bfd6071896f389235cc76e97d60 *R/nn.R
ca52f7676bebc47aae515c9d434f0c07 *R/pop.sim.R
64eaf3636eab62daa3ca086e07b26eb7 *R/prabclust.R
dba3746883c2bd49790407231b7d90d3 *R/prabinit.R
0d58d5839bc2d3877841762f9ccb6eb9 *R/print.prab.R
76c7823f07cf686c8d4ce10c2bfd37c0 *R/print.prabclust.R
a52b46422e0d6501a16859a990ba70a3 *R/qkulczynski.R
738ce7d3b8c47baa980d95f1fad72e85 *R/randpop.nb.R
f3650a649c680dbe2e0f4de1c9d0219d *README
0c19b3b2d075c6211df97f04505398cf *data/kykladspecreg.rda
85ad90aabb8fdacde3d3cb6114778c60 *data/kykladspecreg.txt.gz
1af235643236f0ca7514ed6478b5d751 *data/nb.rda
fc8ed3d0ead932f0cc212075d8203f2b *data/siskiyou.rda
38eab42bac0c9bc085cce901c0cbc2cb *data/tetragonula.rda
aa645a06ca8c4305aa8557b61408526e *data/veronica.rda
9fffc4771d7f653e0a9533c7ae4b2049 *data/waterdist.rda
5b032d85fb132f2a8234d4ea932eecf0 *data/waterdist.txt.gz
dbde8ff731b3a2457c9d715157b30bc2 *inst/extdata/00Index.old
0f3c2be3ead69bf0c7786e38bfa6073e *inst/extdata/Franck04koord.txt
303087dd59626d5ee5816aaebbe06448 *inst/extdata/Heterotrigona_indoFO.txt
69be07a3894dc5fbea1b998792cd7bf4 *inst/extdata/LeiMik1.txt
6da726fa97d224945b0c2e235f69e172 *inst/extdata/LeiMik1G.txt
e27cc50e27882b3c6fdcad93172205fd *inst/extdata/LeiMik1NB.txt
79ae26a340a7eeafb3266ddf008478be *inst/extdata/MartinezKoord.txt
fc853e1d16f0fccf2c1c9fdba34630ec *inst/extdata/MartinezOrtega04AFLP.txt
ab3277f6d6b0acd80d3b7219f711ef67 *inst/extdata/nb.R
762859f786fbba279e1c6b7daafd2bae *inst/extdata/nb.txt
25c75544b86897f1a9b503dd303fdf13 *inst/extdata/siskiyou.R
8a3f57a89015f448ec4cafe45bf2722c *inst/extdata/tetragonula.R
739a823c0fd3dd05476bc86e38cdbd11 *inst/extdata/veronica.R
ea50e3d982dc0c92ea16b7b917ff7b08 *man/NNclean.Rd
0f66c45ffcefb7a917fe7d22fb9c9df3 *man/abundtest.Rd
4474833c02423fc1d5c0cc8691ce7343 *man/allele2zeroone.Rd
2be794c8ae8a0dba03937dfa34691b3e *man/alleleconvert.Rd
22d6874b0577a9d609174638788a8d8b *man/alleledist.Rd
7ab7ac4424ad428fefde018ae2407dc7 *man/alleleinit.Rd
65a41f3ace73712c4c2fa4b7e80b217b *man/allelepaircomp.Rd
5488b7e83c72df28018f40f104d06c2d *man/autoconst.Rd
e711640852dc3ee7b0f045c3e5908c97 *man/build.charmatrix.Rd
ea79a6c3a6321a1530eb9c07b0254065 *man/build.ext.nblist.Rd
710659fbd387546b585226a32b918596 *man/build.nblist.Rd
12e30b591bc9eaa1f2782dc09ae011c7 *man/cluspop.nb.Rd
d653c5008330ff66a85dda3c253bd9b1 *man/comp.test.Rd
72302589f7eb073e2417ce7b8269b49a *man/concomp.Rd
b1a27be6f415841a8c3f924cc0452268 *man/conregmat.Rd
7055d79b9890ea376f44a6ccbcf1d42d *man/coord2dist.Rd
72c22a09c705a64a2db703f8cc800bd0 *man/crmatrix.Rd
20afdf5e051d736ee45f706aa2625207 *man/dicedist.Rd
cb6d84449a1f24804f797da846740ca9 *man/distratio.Rd
4ca0248d3aba01fd4e7d50f8ba71133e *man/geco.Rd
eaa9d39d0d089b0e706e08a4ebc58537 *man/geo2neighbor.Rd
cd53412faddf6b15e4bef3a607528655 *man/homogen.test.Rd
a3e68ae78eaf806fb3012cd2c7220d79 *man/hprabclust.Rd
8b5d0765a3b180ec9abe6d327b6193ae *man/incmatrix.Rd
a0ad902d8341ae5fca919193d29d2de6 *man/jaccard.Rd
cf8ca5f48fa5639a6158548545690219 *man/kulczynski.Rd
b0426a7dc7994631b8610896175b6245 *man/kykladspecreg.Rd
fa4e74ae8f0d4bdc27a04854431de890 *man/lcomponent.Rd
a4b8c35c9c2b23fcea83895fd42299ec *man/lociplots.Rd
b8e4c407f6b254c2aede8ad7b6b8e7a1 *man/nastats.Rd
6956bcf68789a87ef4d440b7f75fbd0a *man/nb.Rd
21056c50fff6c0747f449354aee066ed *man/nbtest.Rd
e065dfa8ebb5166547281b0661da4556 *man/nn.Rd
95ca35cca509aabc8469fc5b2586eb81 *man/piecewiselin.Rd
e0aa50fcaa4d8e42c36a48055462ae78 *man/pop.sim.Rd
a041f6797d6401dead6042e34b6b2fb2 *man/prab.sarestimate.Rd
22144c1e12180f6e07999297de8139a4 *man/prabclus-package.Rd
94170eb844e5a5bc4058a8834467a5a7 *man/prabclust.Rd
410aace4f8d660f896266a48403766d5 *man/prabinit.Rd
843bb6bd7c751db38c6c3b24e8b48736 *man/prabtest.Rd
92ae38e5561ffbf570f80c60e5cb397b *man/qkulczynski.Rd
6c5929e406e217481f7202ac4bb116a8 *man/randpop.nb.Rd
bcc948f36f1e17571b583f2aa2deac68 *man/regpop.sar.Rd
bae29f63520bd1e713bc977a5697dc25 *man/siskiyou.Rd
ab1d917e340826858cc374ad61314c78 *man/specgroups.Rd
4569d9cc4df41e739d0b51dbf7b54d62 *man/stressvals.Rd
960bc558cad2527dbfe65c86cb410140 *man/tetragonula.Rd
4a668b0423d5421223f6c3a0f84130ed *man/toprab.Rd
8a31c48615c2430a0127f3247b9fd440 *man/unbuild.charmatrix.Rd
eba2e1723307f7fa13789b8fc95e788a *man/veronica.Rd
c366a0dca80ff66e1188226a3c123d92 *man/waterdist.Rd
7a870e89f0e585fd5fca4f20311a2717 *tests/Examples/prabclus-Ex.Rout.save
4a311cd019aaf9e2ba7b6dbdb58dc3c8 *tests/donttestexamples.R
e4ebc8eb38def27e8c7ad3afe3c0f4a3 *tests/prabclustests.R
8c440cf7d044d5e5152d62236104ee17 *tests/prabclustests.Rout.save
# Remove the previous line if you edit this file
# Automatically generated one not changed but nicked
# Export all names
exportPattern(".")
# Import all packages listed as Imports or Depends
import(
MASS,
mclust
)
NNclean <-
function (data, k, distances = NULL, edge.correct = FALSE, wrap = 0.1,
convergence = 0.001, plot = FALSE, quiet=TRUE)
{
# require(mva)
data <- as.matrix(data)
d <- dim(data)[2]
n <- dim(data)[1]
if (n > 800) {
options(object.size = 5e+07)
}
if ((d == 2) && (edge.correct == TRUE)) {
r1 <- diff(range(data[, 1]))
r2 <- diff(range(data[, 2]))
tran2 <- matrix(c(rep(0, n), rep(r2, n)), byrow = FALSE,
nrow = n)
tran1 <- matrix(c(rep(r1, n), rep(0, n)), byrow = FALSE,
nrow = n)
aux.dat <- rbind(data + tran1, data - tran1, data + tran2,
data - tran2, data + tran1 + tran2, data - tran1 +
tran2, data - tran1 - tran2, data + tran1 - tran2)
aux.dat <- aux.dat[aux.dat[, 1] < (max(data[, 1]) + wrap *
r1) & aux.dat[, 1] > (min(data[, 1]) - wrap * r1) &
aux.dat[, 2] < (max(data[, 2]) + wrap * r2) & aux.dat[,
2] > (min(data[, 2]) - wrap * r2), ]
full.data <- rbind(data, aux.dat)
}
else {
full.data <- data
}
dDk <- function(x, lambda, k, d, alpha.d) {
(exp(-lambda * alpha.d * x^d) * 2 * (lambda * alpha.d)^k *
x^(d * k - 1))/gamma(k)
}
if (is.null(distances)) {
distances <- dist(full.data)
}
kthNND <- rep(0, n)
Labels <- 1:(n - 1)
kthNND[1] <- sort(distances[Labels])[k]
Labels[(2):(n - 1)] <- Labels[(2):(n - 1)] + (n - 1 - 1)
for (i in 2:n) {
kthNND[i] <- sort(distances[Labels])[k]
Labels[1:(i - 1)] <- Labels[1:(i - 1)] + 1
Labels[(i + 1):(n - 1)] <- Labels[(i + 1):(n - 1)] +
(n - i - 1)
}
kthNND <- kthNND[1:n]
alpha.d <- (2 * pi^(d/2))/(d * gamma(d/2))
delta <- rep(0, n)
delta[kthNND > (min(kthNND) + diff(range(kthNND))/3)] <- 1
delta[is.na(delta)] <- 0
p <- 0.5
lambda1 <- k/(alpha.d * mean((kthNND[delta == 0])^d))
lambda2 <- k/(alpha.d * mean((kthNND[delta == 1])^d))
loglik.old <- 0
loglik.new <- 1
while (abs((loglik.new - loglik.old)/loglik.new) > convergence) {
delta <- (p * dDk(kthNND, lambda1, k = k, d = d, alpha.d = alpha.d))/(p *
dDk(kthNND, lambda1, k = k, d = d, alpha.d = alpha.d) +
(1 - p) * dDk(kthNND, lambda2, k = k, d = d, alpha.d = alpha.d))
delta[is.na(delta)] <- 0
p <- sum(delta)/n
if (p>0)
lambda1 <- (k * sum(delta))/(alpha.d * sum((kthNND^d) *
delta))
if (p<1)
lambda2 <- (k * sum((1 - delta)))/(alpha.d * sum((kthNND^d) *
(1 - delta)))
if (p<=0)
lambda1 <- lambda2/2
if (p>=1)
lambda2 <- 2*lambda1
loglik.old <- loglik.new
loglik.new <- sum(-p * lambda1 * alpha.d * ((kthNND^d) *
delta) - (1 - p) * lambda2 * alpha.d * ((kthNND^d) *
(1 - delta)) + delta * k * log(lambda1 * alpha.d) +
(1 - delta) * k * log(lambda2 * alpha.d))
if (!quiet){
cat("l.new=",loglik.new," p=",p," l1=",lambda1,
" l2=",lambda2," ad=",alpha.d," knnd=", kthNND," delta=",delta,
"\n")
}
}
if (plot) {
hist(kthNND, nclass = 20, axes = TRUE, ylab = "Estimate of Mixture",
xlim = c(0, max(kthNND)), probability = TRUE, xlab = paste("Distance to",
eval(k), "th nearest neighbour"))
box()
support <- seq(0, max(kthNND), length = 200)
lines(support, (p * dDk(support, lambda1, k = k, d = d,
alpha.d = alpha.d) + (1 - p) * dDk(support, lambda2,
k = k, d = d, alpha.d = alpha.d)))
}
probs <- dDk(kthNND, lambda1, k = k, d = d, alpha.d = alpha.d)/(dDk(kthNND,
lambda1, k = k, d = d, alpha.d = alpha.d) + dDk(kthNND,
lambda2, k = k, d = d, alpha.d = alpha.d))
probs[is.na(probs)] <- 1
out <- list(z = round(probs), probs = probs, k = k, lambda1 = lambda1,
lambda2 = lambda2, p = p, kthNND = kthNND)
class(out) <- "nnclean"
return(out)
}
print.nnclean <- function(x, ...){
cat("Nearest neighbor noise detection\n")
cat("by Byers, S. and Raftery, A. E. (1998) Nearest-Neighbor Clutter\n")
cat("Removal for Estimating Features in Spatial Point Processes,\n")
cat("Journal of the American Statistical Association, 93, 577-584.\n")
cat("Classification: ( 0 means noise)\n", x$z, "\n")
cat("The Poisson process mixture of the distance to kth nearest neighbor\n")
cat("is characterized by k=",x$k,", p=",x$p,"\n")
cat("lambda1=",x$lambda1,", lambda2=",x$lambda2,"\n")
}
This diff is collapsed.
This diff is collapsed.
"autoreg" <-
function(x, probs, ejprob, plot=TRUE, nperp=4, species.fixed=TRUE, pdfnb=FALSE){
tjumps <- matrix(nrow=nperp*length(probs),ncol=3)
for (j in 1:length(probs)){
cat(" Estimating disj. parameter: Simulations for p= ",probs[j],"\n")
for (i in 1:nperp){
# print(x$n.species)
# print(x$specperreg)
# print(sum(x$specperreg))
test <- randpop.nb(x$nb,p.nb=probs[j], n.species=x$n.species,
vector.species=x$regperspec,
species.fixed=species.fixed,
pdf.regions=x$specperreg/sum(x$specperreg),
count=FALSE, pdfnb=pdfnb)
# print("Test generated")
# print(test)
# print(dim(test))
# print(x$regperspec)
nst <- apply(test,2,sum)
# print(nst)
# print(apply(test,1,sum) - x$specperreg)
tcn <- con.regmat(test,x$nb,count=FALSE)
# print("tcn computed")
ind <- (j-1)*nperp+i
tjumps[ind,1] <- probs[j]
tjumps[ind,2] <- sum(tcn-1)
# print(tjumps[ind,2])
tjumps[ind,3] <- tjumps[ind,2]/sum(nst-1)
}
}
ejlm <- lm(tjumps[,3]~tjumps[,1])
if (plot){
plot(tjumps[,1],tjumps[,3],xlab="pdisj",
ylab="qdisj")
# print(tjumps[,1])
# print(tjumps[,3])
abline(ejlm$coef)
abline(c(ejprob,0), lty=2)
}
pd <- (ejprob-ejlm$coef[1])/ejlm$coef[2]
cat(" Estimated disjunction parameter =",pd,"\n")
out <- list(pd=pd, coef=ejlm$coef)
out
}
This diff is collapsed.
comp.test <- function(cl, spg){
cs <- chisq.test(cl,spg,simulate.p.value=TRUE,B=10000)
print(cs)
invisible(cs)
}
"con.comp" <-
function(comat){
nc <- ncol(comat)
# print(nc)
ccn <- rep(0, times=nc) # con.comp number for each point
fhist <- rep(FALSE, times=nc) # indicator if point had been under consideration
stn <- 0 # current cc number
pn <- 1 # point nr. to which similar objects are looked for
while(pn>0){
stn <- stn+1
repeat{
sm <- 0 # smallest new point nr.
ccn[pn] <- stn
fhist[pn] <- TRUE
if(nc>1)
{
for(i in 2:nc)
{
# print(comat[i,])
# cat(i, pn, ccn[i], comat[i,pn],"\n")
if((ccn[i]==0) & (comat[i,pn]))
ccn[i] <- stn
if ((sm==0) & (ccn[i]==stn) & (fhist[i]==FALSE))
sm <- i
} # for i
# cat("stn=", stn, "sm=", sm, "\n")
} # if nc>1
if (sm>0)
pn <- sm
else
break
} # repeat
# print("repeat terminated")
pn <- 0
i <- 2
while(i<=nc){
if(ccn[i]==0){
pn <- i
i <- nc
} # if
i <- i+1
} # while i
} # while pn>0 (stn-loop)
ccn
}
con.regmat <- function (regmat, neighbors, count = FALSE)
{
nart <- ncol(regmat)
nreg <- nrow(regmat)
out <- c()
for (i in 1:nart) {
nspec <- sum(regmat[, i])
if (nspec>0){
if (count)
cat("Species ", i, " size ", nspec, "\n")
regions <- (1:nreg)[as.logical(regmat[, i])]
comat <- matrix(FALSE, ncol = nspec, nrow = nspec)
for (j in 1:nspec)
for (k in neighbors[[regions[j]]])
comat[j, (1:nspec)[regions == k]] <- TRUE
out[i] <- max(con.comp(comat))
}
else
out[i] <- NA
}
out
}
"distratio" <-
function(distmat, prop=0.25){
nc <- ncol(distmat)
net <- as.integer(nc*(nc-1)/2)
if (prop==(-1))
prop <- 0.25
vdist <- distmat[upper.tri(distmat)]
# cat("length=",length(vdist)," net=",net,"\n")
sdist <- sort(vdist)
lo <- floor(prop*net)
hi <- ceiling((1-prop)*net)+1
# cat("lo=", lo, " hi=",hi,"\n")
los <- sum(sdist[1:lo])
his <- sum(sdist[hi:net])
lowmean <- los/lo
himean <- his/(net+1-hi)
dr <- lowmean/himean
out <- list(dr=dr,lowmean=lowmean,himean=himean,prop=prop)
out
}
piecewiselin <- function(distmatrix, maxdist=0.1*max(distmatrix)){
ncd <- ncol(distmatrix)
nmatrix <- distmatrix/maxdist
for (i in 1:(ncd-1))
for (j in (i+1):ncd)
if (nmatrix[i,j]>1)
nmatrix[i,j] <- nmatrix[j,i] <- 1
nmatrix
}
# Geographic Kulczynski distance, rows are regions
geco <- function(regmat,
geodist=as.dist(matrix(as.integer(!diag(nrow(regmat))))),
transform="piece",
tf=0.1,
countmode=ncol(regmat)+1){
# print(tf)
nart <- ncol(regmat)
ncell <- nrow(regmat)
jdist <- rep(0, nart * nart)
dim(jdist) <- c(nart, nart)
if (transform=="none")
geodist <- as.matrix(geodist)
if (transform=="log")
geodist <- log(as.matrix(tf*geodist)+1)
if (transform=="sqrt")
geodist <- sqrt(as.matrix(tf*geodist))
if (transform=="piece"){
mg <- max(geodist)
geodist <- piecewiselin(as.matrix(geodist),tf*mg)
}
fi <- list()
nci <- numeric(0)
for (i in 1:nart){
fi[[i]] <- (1:ncell)[as.logical(regmat[,i])]
nci[[i]] <- sum(regmat[, i])
}
for (i in 1:(nart - 1)) {
if (round(i/countmode)==(i/countmode))
cat("Computing gb-distances for species ",i,"\n")
for (j in (i + 1):nart) {
nsi <- numeric(0)
gfi <- geodist[fi[[i]],fi[[j]],drop=FALSE]
# print(gfi)
nsi[1] <- sum(apply(gfi,1,min))
nsi[2] <- sum(apply(gfi,2,min))
jdist[i,j] <- jdist[j,i] <- (nsi[1]/nci[i]+nsi[2]/nci[j])/2
if (is.na(jdist[i, j]))
cat("Warning! NA at i=", i, ", j=", j, "\n")
}
}
jdist
}
geo2neighbor <- function(geodist,cut=0.1*max(geodist)){
geodist <- as.matrix(geodist)
n <- nrow(geodist)
nblist <- list()
for (i in 1:n) nblist[[i]] <- numeric(0)
for (i in 1:(n-1))
for(j in (i+1):n)
if (geodist[i,j]<=cut){
nblist[[i]] <- c(nblist[[i]],j)
nblist[[j]] <- c(nblist[[j]],i)
}
nblist
}
hprabclust <- function (prabobj, cutdist=0.4, cutout=1,
method="average", nnout=2, mdsplot=TRUE,
mdsmethod="classical")
{
cf <- match.call()
# if (mdsplot & mdsmethod!="classical")
# require(MASS)
# "Data-alphabetical" ordering
oregions <- order(prabobj$specperreg)
prabo1 <- prabobj$prab[oregions,]
ospecies <- do.call("order",as.data.frame(t(prabo1)))
dma <- prabobj$distmat[ospecies,ospecies]
if (mdsplot){
mdsdim=2
if (mdsmethod != "classical") {
mindm <- min(dma[dma > 0])/10
for (i in 1:(prabobj$n.species - 1))
for (j in (i + 1):prabobj$n.species) if (dma[i, j] < mindm)
dma[i, j] <- dma[j, i] <- mindm
}
mdsout <-
switch(mdsmethod, classical = cmdscale(dma, k = mdsdim),
kruskal = isoMDS(dma, k = mdsdim), sammon = sammon(dma,
k = mdsdim))
if (mdsmethod=="classical") mds <- mdsout
else mds <- mdsout$points
}
else mds <- NULL
n <- prabobj$n.species
nnd <- c()
nout <- rep(TRUE,n)
for (i in 1:n){
nnd[i] <- sort(dma[i, ])[nnout + 1]
if (nnd[i]>cutout) nout[i] <- FALSE
}
noisen <- n-sum(nout)
dm <- as.dist(dma[nout,nout])
cl1 <- hclust(dm, method=method)
rclustering <- cl2 <- cutree(cl1, h=cutdist)
nc <- max(cl2)
# nout[ospecies] <- nout
# ncl <- max(cl2)
csum <- function(nx, cv) {
out <- c()
for (i in 1:length(nx)) out[i] <- sum(cv == nx[i])
out
}
cs <- csum(1:nc, cl2)
ocs <- order(-cs)
for (i in 1:nc) cl2[rclustering == ocs[i]] <- i
clustering <- rep(0,n)
clustering[ospecies][nout] <- cl2
nmr <- max((1:nc)[cs[ocs]>nnout])
# print(cs[ocs])
# print(nmr)
rclustering <- rep(0,n)
rclustering[clustering<=nmr] <- clustering[clustering<=nmr]
symbols <- c("N")
if (nmr>0) symbols <- c("N", sapply(1:nmr, toString))
clsym <- symbols[rclustering+1]
if (mdsplot){
mds[ospecies,] <- mds
plot(mds[,1:2],pch=clsym)
}
out <- list(clustering = clustering, rclustering=rclustering,
cutdist=cutdist, method=method,
cutout=cutout,nnout=nnout,noisen=noisen,
symbols = clsym, points=mds, call=cf)
class(out) <- "comprabclust"
out
}
"print.comprabclust" <-
function(x, ...){
cat("* Clustered presence-absence matrix * \n\n")
cat("Clustered by hclust with noise detection, method=: ", x$method, "\n\n")
cat("Distance value to cut tree:", x$cutdist,"\n")
cat("Minimum cluster size (below is noise):", x$nnout+1,"\n")
cat("Call: \n")
print(x$call)
cat("\n Cluster memberships:\n")
print(x$rclustering)
cat("\n")
}
"homogen.test" <-
function(distmat,ne=ncol(distmat),testdist="erdos"){
nc <- ncol(distmat)
vdist <- distmat[upper.tri(distmat)]
sdist <- sort(vdist)
distcut <- sdist[ne]
iv <- nc
for (i in 1:nc)
iv <- iv - (min(distmat[-i,i])<=distcut)
if(testdist=="erdos"){
lambda <- exp(-2*ne/(nc-1) + log(nc))
p <- 1-ppois(iv-1,lambda)
if (iv>=lambda)
psymm <- 2*pnorm((lambda-iv)/sqrt(lambda))
else
psymm <- 2*pnorm((iv-lambda)/sqrt(lambda))
out <- list(p=p, p.twoside=psymm, iv=iv, lambda=lambda, distcut=distcut, ne=ne)
}
if(testdist=="ling"){
print("Computation of Ling-probabilities...")
prob <- rep(0,ne*(nc-iv))
dim(prob) <- c(ne,(nc-iv))
# prob <- rep(0,ne*nc)
# dim(prob) <- c(ne,nc)
prob[1,2] <- 1
n2 <- choose(nc,2)
for (i in 2:ne)
for(j in 3:(nc-iv))
# for(j in 3:nc)
prob[i,j] <- (choose(nc-j+2,2)*prob[i-1,j-2]+
(j-1)*(nc-j+1)*prob[i-1,j-1]+(choose(j,2)-(i-1))*prob[i-1,j])/
(n2-(i-1))
# cat(prob)
p2 <- p <- 0
for(j in 2:(nc-iv))
p <- p+prob[ne,j]
print("finished.")
out <- list(p=p, iv=iv, distcut=distcut, ne=ne)
}
out
}
"incmatrix" <-
function(regmat){
nart <- ncol(regmat)
nreg <- nrow(regmat)
neq <- 0
incmat <- matrix(0,ncol=nart,nrow=nart)
for (i in 1:(nart-1))
for (j in (i+1):nart){
# cat (i," ",j," ",sum(regmat[,i]<regmat[,j]),sum(regmat[,j]<regmat[,i]),"\n")
if (sum(regmat[,i]<regmat[,j])==0)
incmat[i,j] <- 1
if (sum(regmat[,j]<regmat[,i])==0)
incmat[j,i] <- 1
if (identical(regmat[,i],regmat[,j]))
# incmat[i,j] <- 0
neq <- neq+1
}
out <- list(m=incmat, ninc=sum(incmat), neq=neq)
out
}
"jaccard" <-
function(regmat){
nart <- ncol(regmat)
jdist <- rep(0, nart*nart)
dim(jdist) <- c(nart,nart)
reg.col.sum <- apply(regmat,2,sum)
reg.aggrement <- t(regmat) %*% regmat
jdist <- 1- reg.aggrement / (reg.col.sum-t(t(reg.aggrement)-reg.col.sum))
jdist
}
# "jaccard" <-
# function(regmat){
# nart <- ncol(regmat)
# jdist <- rep(0, nart*nart)
# dim(jdist) <- c(nart,nart)
# for (i in 1:(nart-1)){
# # cat("Row ",i,"\n")
# for (j in (i+1):nart){
# jdist[j,i] <- jdist[i,j] <- 1-sum(regmat[,i]+regmat[,j]>1.99)/sum(regmat[,i]+regmat[,j]>0.99)
# if (is.na(jdist[i,j]))
# cat("Warning! NA at i=",i,", j=", j,"\n")
# }
# }
# jdist
# }
"kulczynski" <-
function(regmat){
nart <- ncol(regmat)
jdist <- rep(0, nart*nart)
dim(jdist) <- c(nart,nart)