bam.r 102 KB
Newer Older
1
## routines for very large dataset generalized additive modelling.
2
## (c) Simon N. Wood 2009-2017
3 4 5 6 7 8 9 10 11 12 13 14 15


ls.size <- function(x) {
## If `x' is a list, return the size of its elements, in bytes, in a named array
## otherwise return the size of the object
 if (is.list(x)==FALSE) return(object.size(x))

 xn <- names(x)
 n <- length(x)
 sz <- rep(-1,n)
 for (i in 1:n) sz[i] <- object.size(x[[i]])
 names(sz) <- xn
 sz
16
} ## ls.size
17

18
rwMatrix <- function(stop,row,weight,X,trans=FALSE) {
19
## Routine to recombine the rows of a matrix X according to info in 
20
## stop, row and weight. Consider the ith row of the output matrix 
21 22 23
## ind <- 1:stop[i] if i==1 and ind <- (stop[i-1]+1):stop[i]
## otherwise. The ith output row is then X[row[ind],]*weight[ind]
  if (is.matrix(X)) { n <- nrow(X);p<-ncol(X);ok <- TRUE} else { n<- length(X);p<-1;ok<-FALSE}
24
  stop <- stop - 1;row <- row - 1 ## R indices -> C indices
25
  oo <-.C(C_rwMatrix,as.integer(stop),as.integer(row),as.double(weight),X=as.double(X),
26
          as.integer(n),as.integer(p),trans=as.integer(trans),work=as.double(rep(0,n*p)))
27 28
  if (ok) return(matrix(oo$X,n,p)) else
  return(oo$X) 
29
} ## rwMatrix
30

31
chol2qr <- function(XX,Xy,nt=1) {
32
## takes X'X and X'y and returns R and f
33 34
## equivalent to qr update.  
  op <- options(warn = -1) ## otherwise warns if +ve semidef
35
  R <- if (nt) pchol(XX,nt=nt) else chol(XX,pivot=TRUE)
36 37 38 39 40 41 42
  options(op)
  p <- length(Xy)
  ipiv <- piv <- attr(R,"pivot");ipiv[piv] <- 1:p
  rank <- attr(R,"rank");ind <- 1:rank
  if (rank<p) R[(rank+1):p,] <- 0 ## chol is buggy (R 3.1.0) with pivot=TRUE
  f <- c(forwardsolve(t(R[ind,ind]),Xy[piv[ind]]),rep(0,p-rank))[ipiv]
  R <- R[ipiv,ipiv]
43
  list(R=R,f=f)
44
} ## chol2qr
45

46
qr.update <- function(Xn,yn,R=NULL,f=rep(0,0),y.norm2=0,use.chol=FALSE,nt=1)
47
## Let X = QR and f = Q'y. This routine updates f and R
48 49 50 51 52
## when Xn is appended to X and yn appended to y. If R is NULL
## then initial QR of Xn is performed. ||y||^2 is accumulated as well.
## if use.chol==TRUE then quicker but less stable accumulation of X'X and
## X'y are used. Results then need post processing, to get R =chol(X'X)
## and f= R^{-1} X'y.
53
## if nt>1 and use.chol=FALSE then parallel QR is used 
54
{ p <- ncol(Xn)  
55
  y.norm2 <- y.norm2+sum(yn*yn)
56 57 58 59 60 61 62 63 64 65 66 67 68 69
  if (use.chol) { 
    if (is.null(R)) { 
      R <- crossprod(Xn)
      fn <- as.numeric(t(Xn)%*%yn) 
    } else {
      R <- R + crossprod(Xn)
      fn <- f + as.numeric(t(Xn)%*%yn)
    } 
    return(list(R=R,f=fn,y.norm2=y.norm2))
  } else { ## QR update
    if (!is.null(R)) {
      Xn <- rbind(R,Xn)
      yn <- c(f,yn)
    }
70
    qrx <- if (nt==1) qr(Xn,tol=0,LAPACK=TRUE) else pqr2(Xn,nt)
71 72 73
    fn <- qr.qty(qrx,yn)[1:p]
    rp <- qrx$pivot;rp[rp] <- 1:p # reverse pivot
    return(list(R = qr.R(qrx)[,rp],f=fn,y.norm2=y.norm2))
74
  }
75
} ## qr.update
76

77 78 79 80 81

qr.up <- function(arg) {
## routine for parallel computation of the QR factorization of 
## a large gam model matrix, suitable for calling with parLapply.
  wt <- rep(0,0) 
82 83 84
  dev <- 0
  eta <- arg$eta
  efam <- !is.null(arg$family) ## extended family?
85 86
  for (b in 1:arg$n.block) {
    ind <- arg$start[b]:arg$stop[b]
87
    X <- predict(arg$G,newdata=arg$mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
88
    rownames(X) <- NULL
89
    if (is.null(arg$coef)) eta1 <- arg$eta[ind] else eta[ind] <- eta1 <- drop(X%*%arg$coef) + arg$offset[ind]
90 91 92
    mu <- arg$linkinv(eta1) 
    y <- arg$G$y[ind] ## arg$G$model[[arg$response]] 
    weights <- arg$G$w[ind]
93 94 95 96
    if (efam) { ## extended family case
       dd <- dDeta(y,mu,weights,theta=arg$theta,arg$family,0)
       ## note: no handling of infinities and wz case yet
       w <- dd$EDeta2 * .5 
97
       #w <- w
98 99 100 101 102 103 104 105 106 107 108
       z <- (eta1-arg$offset[ind]) - dd$Deta.EDeta2
       good <- is.finite(z)&is.finite(w)
       w[!good] <- 0 ## drop if !good
       z[!good] <- 0 ## irrelevant
    } else { ## regular exp fam case
      mu.eta.val <- arg$mu.eta(eta1)
      good <- (weights > 0) & (mu.eta.val != 0)
      z <- (eta1 - arg$offset[ind])[good] + (y - mu)[good]/mu.eta.val[good]
      w <- (weights[good] * mu.eta.val[good]^2)/arg$variance(mu)[good]
    }  
    dev <- dev + if (efam) sum(arg$family$dev.resids(y,mu,weights,arg$theta)) else sum(arg$dev.resids(y,mu,weights))
109 110
    wt <- c(wt,w)
    w <- sqrt(w)
111
    ## note assumption that nt=1 in following qr.update - i.e. each cluster node is strictly serial
112 113
    if (b == 1) qrx <- qr.update(w*X[good,,drop=FALSE],w*z,use.chol=arg$use.chol) 
    else qrx <- qr.update(w*X[good,,drop=FALSE],w*z,qrx$R,qrx$f,qrx$y.norm2,use.chol=arg$use.chol)
114 115
    rm(X);if(arg$gc.level>1) gc() ## X can be large: remove and reclaim
  }
116
  qrx$dev <- dev;qrx$wt <- wt;qrx$eta <- eta
117 118
  if (arg$gc.level>1) { rm(arg,ind,mu,y,weights,mu.eta.val,good,z,w,wt,w);gc()}
  qrx
119
} ## qr.up
120

121 122 123 124 125 126 127
compress.df <- function(dat,m=NULL) {
## Takes dataframe in dat and compresses it by rounding and duplicate 
## removal. For metric variables we first find the unique cases.
## If there are <= m of these then these are employed, otherwise 
## rounding is used. Factors are always reduced to the number of 
## levels present in the data. Idea is that this function is called 
## with columns of dataframes corresponding to single smooths or marginals. 
128 129
  d <- ncol(dat) ## number of variables to deal with
  n <- nrow(dat) ## number of data/cases
130 131 132 133 134
  if (is.null(m)) m <- if (d==1) 1000 else if (d==2) 100 else 25 else
  if (d>1) m <- round(m^{1/d}) + 1
  
  mf <- mm <- 1 ## total grid points for factor and metric
  for (i in 1:d) if (is.factor(dat[,i])) {  
135
    mf <- mf * length(unique(as.vector(dat[,i]))) 
136 137 138
  } else {
    mm <- mm * m 
  } 
139 140 141 142 143 144
  if (is.matrix(dat[[1]])) { ## must replace matrix terms with vec(dat[[i]])
    dat0 <- data.frame(as.vector(dat[[1]]))
    if (d>1) for (i in 2:d) dat0[[i]] <- as.vector(dat[[i]])
    names(dat0) <- names(dat)
    dat <- dat0;rm(dat0)
  }
145
  xu <- uniquecombs(dat,TRUE)
146 147 148 149 150 151 152 153
  if (nrow(xu)>mm*mf) { ## too many unique rows to use only unique
    for (i in 1:d) if (!is.factor(dat[,i])) { ## round the metric variables
      xl <- range(dat[,i])
      xu <- seq(xl[1],xl[2],length=m)
      dx <- xu[2]-xu[1]
      kx <- round((dat[,i]-xl[1])/dx)+1
      dat[,i] <- xu[kx] ## rounding the metric variables
    }
154
    xu <- uniquecombs(dat,TRUE)
155 156 157 158
  }  
  k <- attr(xu,"index")
  ## shuffle rows in order to avoid induced dependencies between discretized
  ## covariates (which can mess up gam.side)...
159 160 161 162 163 164 165 166 167 168 169
  ## any setting should be done in routine calling this one!!
  #seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed
  #if (inherits(seed,"try-error")) {
  #   runif(1)
  #   seed <- get(".Random.seed",envir=.GlobalEnv)
  #}
  #kind <- RNGkind(NULL)
  #RNGkind("default","default")
  ## following line must be different to that used in
  ## tp constructor subsampling!
  #set.seed(8547) ## ensure repeatability
170 171 172
  
  ii <- sample(1:nrow(xu),nrow(xu),replace=FALSE) ## shuffling index
  
173 174
  #RNGkind(kind[1],kind[2])
  #assign(".Random.seed",seed,envir=.GlobalEnv) ## RNG behaves as if it had not been used
175 176 177 178
  
  xu[ii,] <- xu  ## shuffle rows of xu
  k <- ii[k]     ## correct k index accordingly
  ## ... finished shuffle
179 180
  ## if arguments were matrices, then return matrix index
  if (length(k)>n) k <- matrix(k,nrow=n) 
181 182 183 184
  k -> attr(xu,"index")
  xu
} ## compress.df

185 186 187 188 189 190 191 192 193
check.term <- function(term,rec) {
## utility function for discrete.mf. Checks whether variables in "term"
## have already been discretized, and if so whether this discretization 
## can be re-used for the current "term". Stops if term already discretized
## but we can't re-use discretization. Otherwise returns index of k index
## or 0 if the term is not in the existing list.
  ii <- which(rec$vnames%in%term)
  if (length(ii)) { ## at least one variable already discretized
    if (length(term)==rec$d[min(ii)]) { ## dimensions match previous discretization
194 195
      if (sum(!(term%in%rec$vnames[ii]))) stop("bam can not discretize with this nesting structure")
      else return(rec$ki[min(ii)]) ## all names match previous - return index of previous
196 197 198 199
    } else stop("bam can not discretize with this nesting structure")
  } else return(0) ## no match
} ## check.term

200
discrete.mf <- function(gp,mf,names.pmf,m=NULL,full=TRUE) {
201
## discretize the covariates for the terms specified in smooth.spec
202 203
## id not allowed. names.pmf gives the names of the parametric part
## of mf, and is used to create a model frame for just the 
204
## parametric terms --- mini.mf is applied to this.
205 206 207
## if full is FALSE then parametric and response terms are ignored
## and what is returned is a list where columns can be of 
## different lengths.
208 209 210 211 212 213 214 215 216 217 218 219 220
## On exit... 
## * mf is a model frame containing the unique discretized covariate
##   values, in randomized order, padded to all be same length
## * nr records the number of unique discretized covariate values
##   i.e. the number of rows before the padding starts
## * k.start contains the starting column in index vector k, for
##   each variable.
## * k is the index matrix. The ith record of the 1st column of the 
##   jth variable is in row k[i,k.start[j]] of the corresponding 
##   column of mf.
## ... there is an element of nr and k.start for each variable of 
## each smooth, but varaibles are onlt discretized and stored in mf
## once. If there are no matrix variables then k.start = 1:(ncol(k)+1) 
221 222 223 224 225 226 227 228 229 230 231 232 233
#  if (is.null(attr(mf,"terms"))) mf <- eval(gp$fake.formula[-2],mf) ## assumes model frame below

  ## some sub sampling here... want to set and restore RNG state used for this
  ## to ensure strict repeatability.
  
  seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed
  if (inherits(seed,"try-error")) {
       runif(1)
       seed <- get(".Random.seed",envir=.GlobalEnv)
  }
  kind <- RNGkind(NULL)
  RNGkind("default", "default")
  set.seed(8547) ## keep different to tps constructor!
234

235 236
  mf0 <- list()
  nk <- 0 ## count number of index vectors to avoid too much use of cbind
237
  for (i in 1:length(gp$smooth.spec)) nk <- nk + as.numeric(gp$smooth.spec[[i]]$by!="NA") +
238
    if (inherits(gp$smooth.spec[[i]],"tensor.smooth.spec")) length(gp$smooth.spec[[i]]$margin) else 1
239
  k <- matrix(0,nrow(mf),nk) ## each column is an index vector
240
  k.start <- 1:(nk+1) ## record last column for each term
241 242
  ik <- 0 ## index counter
  nr <- rep(0,nk) ## number of rows for term
243 244 245 246
  ## structure to record terms already processed...
  rec <- list(vnames = rep("",0), ## variable names
              ki = rep(0,0),      ## index of original index vector var relates to  
              d = rep(0,0))       ## dimension of terms involving this var
247 248
  ## loop through the terms discretizing the covariates...
  for (i in 1:length(gp$smooth.spec)) {
249 250
    nmarg <- if (inherits(gp$smooth.spec[[i]],"tensor.smooth.spec")) length(gp$smooth.spec[[i]]$margin) else 1
    maxj <- if (gp$smooth.spec[[i]]$by=="NA") nmarg else nmarg + 1 
251
    mi <- if (is.null(m)||length(m)==1) m else m[i]
252
    if (!is.null(gp$smooth.spec[[i]]$id)) stop("discretization can not handle smooth ids")
253 254 255 256 257 258 259
    j <- 0
    for (jj in 1:maxj) { ## loop through marginals
      if (jj==1&&maxj!=nmarg) termi <- gp$smooth.spec[[i]]$by else {
        j <- j + 1
        termi <- if (inherits(gp$smooth.spec[[i]],"tensor.smooth.spec")) gp$smooth.spec[[i]]$margin[[j]]$term else 
                 gp$smooth.spec[[i]]$term          
      } 
260 261 262 263
      ik.prev <- check.term(termi,rec) ## term already discretized?
      ik <- ik + 1 ## increment index counter
      if (ik.prev==0) { ## new discretization required
         mfd <- compress.df(mf[termi],m=mi)
264 265 266 267 268 269 270 271 272 273
         ki <- attr(mfd,"index")
         if (is.matrix(ki)) {
           ind <- (ik+1):length(k.start) 
           k.start[ind] <- k.start[ind] + ncol(ki)-1    ## adjust start indices
           k <- cbind(k,matrix(0,nrow(k),ncol(ki)-1)) ## extend index matrix
           ind <- k.start[ik]:(k.start[ik+1]-1) 
           k[,ind] <- ki 
         } else {
           k[,k.start[ik]] <- ki
         }
274 275 276
         nr[ik] <- nrow(mfd)
         mf0 <- c(mf0,mfd) 
         ## record variable discretization info...
277
         d <- length(termi)
278
         rec$vnames <- c(rec$vnames,termi)
279 280
         rec$ki <- c(rec$ki,rep(ik,d))
         rec$d <- c(rec$d,rep(d,d))
281
       } else { ## re-use an earlier discretization...
282 283 284 285 286 287
         ind.prev <- k.start[ik.prev]:(k.start[ik.prev+1]-1)
         ind <- (ik+1):length(k.start)
         k.start[ind] <- k.start[ind] + length(ind.prev)-1
         ind <- k.start[ik]:(k.start[ik+1]-1)
         k[,ind] <- k[,ind.prev]
         #k[,ik] <- k[,ik.prev]
288 289
         nr[ik] <- nr[ik.prev]
       }
290 291 292 293
    } ## end marginal jj loop
  } ## term loop (i)


294
  ## obtain parametric terms and..
295 296
  ## pad mf0 so that all rows are the same length
  ## padding is necessary if gam.setup is to be used for setup
297

298
  if (full) {
299 300 301 302 303 304 305 306 307 308 309
    maxr <- max(nr)
    ## If NA's caused rows to be dropped in mf, then they should
    ## also be dropped in pmf, otherwise we can end up with factors
    ## with more levels than unique observations, for example.
    ## The next couple of lines achieve this.
    ## find indices of terms in mf but not pmf...
    di <- sort(which(!names(mf) %in% names.pmf),decreasing=TRUE)
    ## create copy of mf with only pmf variables...
    mfp <- mf; for (i in di) mfp[[i]] <- NULL 
    #pmf0 <- mini.mf(pmf,maxr) ## deal with parametric components
    pmf0 <- mini.mf(mfp,maxr) ## deal with parametric components
310 311
    if (nrow(pmf0)>maxr) maxr <- nrow(pmf0)
    mf0 <- c(mf0,pmf0) ## add parametric terms to end of mf0
312

313 314 315 316
    for (i in 1:length(mf0)) {
      me <- length(mf0[[i]]) 
      if (me < maxr) mf0[[i]][(me+1):maxr] <- sample(mf0[[i]],maxr-me,replace=TRUE)
    }
317 318 319 320
    ## add response so that gam.setup can do its thing... 
  
    mf0[[gp$response]] <- sample(mf[[gp$response]],maxr,replace=TRUE)
    
321 322 323
    ## mf0 is the discretized model frame (actually a list), padded to have equal length rows
    ## k is the index vector for each sub-matrix, only the first nr rows of which are
    ## to be retained... Use of check.names=FALSE ensures, e.g. 'offset(x)' not changed...
324

325
    ## now copy back into mf so terms unchanged
326 327 328
    #mf <- mf[1:maxr,]
    mf <- mf[sample(1:nrow(mf),maxr,replace=TRUE),]
    for (na in names(mf0)) mf[[na]] <- mf0[[na]] 
329
   
330
  } else mf <- mf0
331 332 333
  ## reset RNG to old state...
  RNGkind(kind[1], kind[2])
  assign(".Random.seed", seed, envir = .GlobalEnv)
334 335

  ## finally one more pass through, expanding k, k.start and nr to deal with replication that
336
  ## will occur with factor by variables...
337
  ik <- ncol(k)+1 ## starting index col for this term in k.start
338 339 340 341 342 343 344 345 346 347 348 349 350 351
  for (i in length(gp$smooth.spec):1) { ## work down through terms so insertion painless
    if (inherits(gp$smooth.spec[[i]],"tensor.smooth.spec")) nd <-  
         length(gp$smooth.spec[[i]]$margin) else nd <- 1 ## number of indices
    ik <- ik - nd ## starting index if no by  
    if (gp$smooth.spec[[i]]$by!="NA") {
      ik <- ik - 1 ## first index
      nd <- nd + 1 ## number of indices
      byvar <- mf[[gp$smooth.spec[[i]]$by]]
      if (is.factor(byvar)) { ## then need to expand nr and index matrix
        nex <- length(levels(byvar))  ## number of copies of term indices
        if (is.ordered(byvar)) nex <- nex - 1 ## first level dropped
        if (nex>0) { ## insert index copies
          ii0 <- if (ik>1) 1:(ik-1) else rep(0,0) ## earlier
          ii1 <- if (ik+nd-1 < length(nr)) (ik+nd):length(nr) else rep(0,0) ## later
352 353 354 355 356 357
          ii <- ik:(ik+nd-1) ## cols for this term    
          ## indices for columns of k... 
          kk0 <- if (ik>1) 1:(k.start[ik]-1) else rep(0,0) ## earlier
          kk1 <- if (ik+nd-1 < length(nr)) k.start[ik+nd]:ncol(k) else rep(0,0) ## later
          kk <- k.start[ik]:(k.start[ik+nd]-1) ## cols for this term
          k <- cbind(k[,kk0,drop=FALSE],k[,rep(kk,nex),drop=FALSE],k[,kk1,drop=FALSE])
358
          nr <- c(nr[ii0],rep(nr[ii],nex),nr[ii1])
359 360 361 362
          ## expand k.start...
          nkk <- length(kk) ## number of k columns in term to be repeated
          k.start <- c(k.start[ii0],rep(k.start[ii],nex)+rep(0:(nex-1),each=nkk)*nkk,
                       (nex-1)*nkk+c(k.start[ii1],k.start[length(k.start)]))
363 364 365 366
        }
      } ## factor by 
    } ## existing by
  } ## smooth.spec loop
367
  list(mf=mf,k=k,nr=nr,k.start=k.start)
368 369
} ## discrete.mf

370 371 372
mini.mf <-function(mf,chunk.size) {
## takes a model frame and produces a representative subset of it, suitable for 
## basis setup.
373 374 375 376
  ## first count the minimum number of rows required for representiveness
  mn <- 0
  for (j in 1:length(mf)) mn <- mn + if (is.factor(mf[[j]])) length(levels(mf[[j]])) else 2
  if (chunk.size < mn) chunk.size <- mn
377
  n <- nrow(mf)
378 379
  if (n <= chunk.size) return(mf)

380 381 382 383 384
  seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed
  if (inherits(seed,"try-error")) {
     runif(1)
     seed <- get(".Random.seed",envir=.GlobalEnv)
  }
385 386
  kind <- RNGkind(NULL)
  RNGkind("default", "default")
387 388
  set.seed(66)  
  ## randomly sample from original frame...
389
  ind <- sample(1:n,chunk.size)
390
  mf0 <- mf[ind,,drop=FALSE]
391 392 393 394 395 396 397 398 399
  ## ... now need to ensure certain sorts of representativeness

  ## work through elements collecting the rows containing 
  ## max and min for each variable, and a random row for each 
  ## factor level....

  ind <- sample(1:n,n,replace=FALSE) ## randomized index for stratified sampling w.r.t. factor levels
  fun <- function(X,fac,ind) ind[fac[ind]==X][1] ## stratified sampler
  k <- 0 
400 401 402 403 404
  for (j in 1:length(mf)) if (is.numeric(mf0[[j]])) {
    if (is.matrix(mf0[[j]])) { ## find row containing minimum
      j.min <- min((1:n)[as.logical(rowSums(mf[[j]]==min(mf[[j]])))])
      j.max <- min((1:n)[as.logical(rowSums(mf[[j]]==max(mf[[j]])))])
    } else { ## vector
405 406
      j.min <- min(which(mf[[j]]==min(mf[[j]])))
      j.max <- min(which(mf[[j]]==max(mf[[j]])))
407
    }
408 409 410 411 412
    k <- k + 1; mf0[k,] <- mf[j.min,]
    k <- k + 1; mf0[k,] <- mf[j.max,] 
  } else if (is.factor(mf[[j]])) { ## factor variable...
    ## randomly sample one row from each factor level...
    find <- apply(X=as.matrix(levels(mf[[j]])),MARGIN=1,FUN=fun,fac=mf[[j]],ind=ind)
413
    find <- find[is.finite(find)] ## in case drop.unused.levels==FALSE, so that there ar levels without rows
414 415 416
    nf <- length(find)
    mf0[(k+1):(k+nf),] <- mf[find,]
    k <- k + nf
417
  }
418 419 420 421

  RNGkind(kind[1], kind[2])
  assign(".Random.seed", seed, envir = .GlobalEnv)

422
  mf0
423 424 425 426 427 428
} ## mini.mf


bgam.fitd <- function (G, mf, gp ,scale , coef=NULL,etastart = NULL,
    mustart = NULL, offset = rep(0, nobs),rho=0, control = gam.control(), intercept = TRUE, 
    gc.level=0,nobs.extra=0,npt=1) {
429 430
## This is a version of bgam.fit designed for use with discretized covariates. 
## Difference to bgam.fit is that XWX, XWy and Xbeta are computed in C
431 432
## code using compressed versions of X. Parallelization of XWX formation
## is performed at the C level using openMP.
433
## Alternative fitting iteration using Cholesky only, including for REML.
434 435 436 437 438 439 440 441 442
## Basic idea is to take only one Newton step for parameters per iteration
## and to control the step length to ensure that at the end of the step we
## are not going uphill w.r.t. the REML criterion...
    
    y <- mf[[gp$response]]
    weights <- G$w 
    conv <- FALSE
    nobs <- nrow(mf)
    offset <- G$offset 
443 444 445 446 447 448 449 450 451 452
   
    if (inherits(G$family,"extended.family")) { ## preinitialize extended family
      efam <- TRUE
      pini <- if (is.null(G$family$preinitialize)) NULL else G$family$preinitialize(y,G$family)
      if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta)
      if (!is.null(pini$y)) y <- pini$y
      scale <- if (is.null(G$family$scale)) 1 else G$family$scale
      if (scale < 0) scale <- var(y) *.1 ## initial guess
    } else efam <- FALSE

453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476

    if (rho!=0) { ## AR1 error model
      
      ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
      sd <- -rho*ld         ## sub diagonal
      N <- nobs    
      ## see rwMatrix() for how following are used...
      ar.row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)]) ## index of rows to reweight
      ar.weight <- c(1,rep(c(sd,ld),N-1))     ## row weights
      ar.stop <- c(1,1:(N-1)*2+1)    ## (stop[i-1]+1):stop[i] are the rows to reweight to get ith row
      if (!is.null(mf$"(AR.start)")) { ## need to correct the start of new AR sections...
        ii <- which(mf$"(AR.start)"==TRUE)
        if (length(ii)>0) {
          if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
          ar.weight[ii*2-2] <- 0 ## zero sub diagonal
          ar.weight[ii*2-1] <- 1 ## set leading diagonal to 1
        }
      }
    } else {## AR setup complete
      ar.row <- ar.weight <- ar.stop <- -1 ## signal no re-weighting
    }

    family <- G$family
    additive <- if (family$family=="gaussian"&&family$link=="identity") TRUE else FALSE
477 478 479 480 481 482 483
    linkinv <- family$linkinv;#dev.resids <- family$dev.resids
    if (!efam) {
      variance <- family$variance
      mu.eta <- family$mu.eta
      if (!is.function(variance) || !is.function(linkinv))
          stop("'family' argument seems not to be a valid family object")
    }
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
    valideta <- family$valideta
    if (is.null(valideta))
        valideta <- function(eta) TRUE
    validmu <- family$validmu
    if (is.null(validmu))
        validmu <- function(mu) TRUE
    if (is.null(mustart)) {
        eval(family$initialize)
    }
    else {
        mukeep <- mustart
        eval(family$initialize)
        mustart <- mukeep
    }
 
    eta <- if (!is.null(etastart))
         etastart
    else family$linkfun(mustart)
    
    mu <- linkinv(eta)
    if (!(validmu(mu) && valideta(eta)))
       stop("cannot find valid starting values: please specify some")
506
    dev <- sum(family$dev.resids(y, mu, weights))*2 ## just to avoid converging at iter 1
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522

    conv <- FALSE
   
    G$coefficients <- rep(0,ncol(G$X))
    class(G) <- "gam"  
    
    ## need to reset response and weights to post initialization values
    ## in particular to deal with binomial properly...
    G$y <- y
    G$w <- weights

    Sl <- Sl.setup(G) ## setup block diagonal penalty object
    rank <- 0
    for (b in 1:length(Sl)) rank <- rank + Sl[[b]]$rank
    Mp <- ncol(G$X) - rank ## null space dimension
    Nstep <- 0
523
    if (efam) theta <- family$getTheta()
524 525 526 527 528
    for (iter in 1L:control$maxit) { ## main fitting loop 
      devold <- dev
      dev <- 0
      ## accumulate the QR decomposition of the weighted model matrix
      if (iter==1||!additive) {
529 530
        qrx <- list()

531 532
        if (iter>1) {
          ## form eta = X%*%beta
533
          eta <- Xbd(G$Xd,coef,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop) + offset
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
	  lsp.full <- G$lsp0
	  if (n.sp>0) lsp.full <- lsp.full + if (is.null(G$L)) lsp[1:n.sp] else G$L %*% lsp[1:n.sp]
	  Sb <- Sl.Sb(Sl,lsp.full,prop$beta) ## store S beta to allow rapid step halving
	  if (iter>2) {
            Sb0 <- Sl.Sb(Sl,lsp.full,b0)
	    bSb0 <- sum(b0*Sb0) ## penalty at start of beta step
	    ## get deviance at step start, with current theta if efam
	    dev0 <- if (efam) sum(family$dev.resids(G$y,mu0,G$w,theta)) else
	                 sum(family$dev.resids(G$y,mu0,G$w))
          }
        }
	kk <- 1
	repeat {
          mu <- linkinv(eta)
	  dev <- if (efam) sum(family$dev.resids(G$y,mu,G$w,theta)) else
	                 sum(family$dev.resids(G$y,mu,G$w))
          if (iter>2) { ## coef step length control
	    bSb <- sum(prop$beta*Sb) ## penalty at end of beta step 
            if (dev0 + bSb0 < dev + bSb && kk < 30) { ## beta step not improving current pen dev
              coef <- (coef0 + coef)/2 ## halve the step
	      Sb <- (Sb0 + Sb)/2
	      eta <- (eta0 + eta)/2
	      prop$beta <- (b0 + prop$beta)/2
	      kk <- kk + 1
            } else break
          } else break
        }		 

        if (iter>1) { ## save components of penalized deviance for step control
          coef0 <- coef ## original para
	  eta0 <- eta
	  mu0 <- mu
	  b0 <- prop$beta ## beta repara
	  dev <- dev + sum(prop$beta*Sb) ## add penalty to deviance
	} else reml <- dev ## for convergence checking
	
	if (efam) { ## extended family
	  if (iter>1) { ## estimate theta
	    scale1 <- if (!is.null(family$scale)) family$scale else scale
            if (family$n.theta>0||scale<0) theta <- estimate.theta(theta,family,y,mu,scale=scale1,wt=G$w,tol=1e-7)
            if (!is.null(family$scale) && family$scale<0) {
	      scale <- exp(theta[family$n.theta+1])
	      theta <- theta[1:family$n.theta]
	    }  
            family$putTheta(theta)
          }
	  
          dd <- dDeta(y,mu,G$w,theta=theta,family,0)
	  ## note: no handling of infinities and wz case yet
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598

          if (rho==0) {
	    w <- dd$Deta2 * .5 
            z <- (eta-offset) - dd$Deta.Deta2
          } else { ## use fisher weights
	    w <- dd$EDeta2 * .5 
            z <- (eta-offset) - dd$Deta.EDeta2
	  }
          #if (rho!=0) { 
          #  ind <- which(w<0)
	  #  if (length(ind)>0) { ## substitute Fisher weights
	  #    w[ind] <- dd$EDeta2[ind] * .5
	  #    z[ind] <- (eta[ind]-offset[ind]) - dd$Deta.EDeta2[ind]
	  #  }  
          #}
          good <- is.finite(z)&is.finite(w)
599 600 601 602 603 604 605 606 607 608
	  w[!good] <- 0 ## drop if !good
	  z[!good] <- 0 ## irrelevant
	  #dev <- sum(family$dev.resids(G$y,mu,G$w,theta))
        } else { ## exponential family
          mu.eta.val <- mu.eta(eta)
          good <- mu.eta.val != 0
          mu.eta.val[!good] <- .1 ## irrelvant as weight is zero
          z <- (eta - offset) + (G$y - mu)/mu.eta.val
          w <- (G$w * mu.eta.val^2)/variance(mu)
          #dev <- sum(family$dev.resids(G$y,mu,G$w))
609 610
        }
      
611
  
612 613 614 615
        qrx$y.norm2 <- if (rho==0) sum(w*z^2) else   ## AR mod needed
          sum(rwMatrix(ar.stop,ar.row,ar.weight,sqrt(w)*z,trans=FALSE)^2) 
       
        ## form X'WX efficiently...
616
        qrx$R <- XWXd(G$Xd,w,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,npt,G$drop,ar.stop,ar.row,ar.weight)
617
        ## form X'Wz efficiently...
618
        qrx$f <- XWyd(G$Xd,w,z,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop,ar.stop,ar.row,ar.weight)
619 620 621 622
        if(gc.level>1) gc()
     
        ## following reparameterizes X'X and f=X'y, according to initial reparameterizarion...
        qrx$XX <- Sl.initial.repara(Sl,qrx$R,inverse=FALSE,both.sides=TRUE,cov=FALSE,nt=npt)
623
        qrx$Xy <- Sl.initial.repara(Sl,qrx$f,inverse=FALSE,both.sides=TRUE,cov=FALSE,nt=npt)  
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
        
        G$n <- nobs
      } else {  ## end of if (iter==1||!additive)
        dev <- qrx$y.norm2 - sum(coef*qrx$f) ## actually penalized deviance
      }
  
      if (control$trace)
         message(gettextf("Deviance = %s Iterations - %d", dev, iter, domain = "R-mgcv"))

      if (!is.finite(dev)) stop("Non-finite deviance")

      ## preparation for working model fit is ready, but need to test for convergence first
      if (iter>2 && abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
          conv <- TRUE
          #coef <- start
          break
      }

      ## use fast REML code
      ## block diagonal penalty object, Sl, set up before loop

      if (iter==1) { ## need to get initial smoothing parameters 
        lambda.0 <- initial.sp(qrx$R,G$S,G$off,XX=TRUE) ## note that this uses the untrasformed X'X in qrx$R
        ## convert intial s.p.s to account for L 
        lsp0 <- log(lambda.0) ## initial s.p.
649 650
        if (!is.null(G$L)) lsp0 <- 
          if (ncol(G$L)>0) as.numeric(coef(lm(lsp0 ~ G$L-1+offset(G$lsp0)))) else rep(0,0)
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
        n.sp <- length(lsp0) 
      }
     
      ## carry forward scale estimate if possible...
      if (scale>0) log.phi <- log(scale) else {
        if (iter==1) {
            if (is.null(coef)||qrx$y.norm2==0) lsp0[n.sp+1] <- log(var(as.numeric(G$y))*.05) else
               lsp0[n.sp+1] <- log(qrx$y.norm2/(nobs+nobs.extra))
        }
      }

      ## get beta, grad and proposed Newton step... 
      repeat { ## Take a Newton step to update log sp and phi
        lsp <- lsp0 + Nstep
        if (scale<=0) log.phi <- lsp[n.sp+1] 
        prop <- Sl.fitChol(Sl,qrx$XX,qrx$Xy,rho=lsp[1:n.sp],yy=qrx$y.norm2,L=G$L,rho0=G$lsp0,log.phi=log.phi,
667
                 phi.fixed=scale>0,nobs=nobs,Mp=Mp,nt=npt,tol=abs(reml)*.Machine$double.eps^.5)
668 669 670
        if (max(Nstep)==0) { 
          Nstep <- prop$step;lsp0 <- lsp;
          break 
671
        } else { ## step length control
672 673 674 675 676 677 678 679 680 681 682 683 684 685
          if (sum(prop$grad*Nstep)>dev*1e-7) Nstep <- Nstep/2 else {
            Nstep <- prop$step;lsp0 <- lsp;break;
          }
        }
      } ## end of sp update

      coef <- Sl.initial.repara(Sl,prop$beta,inverse=TRUE,both.sides=FALSE,cov=FALSE)

      if (any(!is.finite(coef))) {
          conv <- FALSE
          warning(gettextf("non-finite coefficients at iteration %d",
                  iter))
          break
      }
686
      reml <- (dev/exp(log.phi) - prop$ldetS + prop$ldetXXS)/2
687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708
    } ## end fitting iteration

    if (!conv)
       warning("algorithm did not converge")
   
    eps <- 10 * .Machine$double.eps
    if (family$family == "binomial") {
         if (any(mu > 1 - eps) || any(mu < eps))
                warning("fitted probabilities numerically 0 or 1 occurred")
    }
    if (family$family == "poisson") {
            if (any(mu < eps))
                warning("fitted rates numerically 0 occurred")
    }
  Mp <- G$nsdf
  if (length(G$smooth)>1) for (i in 1:length(G$smooth)) Mp <- Mp + G$smooth[[i]]$null.space.dim
  scale <- exp(log.phi)
  reml <- (dev/scale - prop$ldetS + prop$ldetXXS + (length(y)-Mp)*log(2*pi*scale))/2
  if (rho!=0) { ## correct REML score for AR1 transform
    df <- if (is.null(mf$"(AR.start)")) 1 else sum(mf$"(AR.start)")
    reml <- reml - (nobs-df)*log(ld)
  }
709 710 711 712

  for (i in 1:ncol(prop$db)) prop$db[,i] <- ## d beta / d rho matrix
        Sl.initial.repara(Sl,as.numeric(prop$db[,i]),inverse=TRUE,both.sides=TRUE,cov=TRUE,nt=npt) 

713 714 715 716 717
  object <- list(db.drho=prop$db,
                 gcv.ubre=reml,mgcv.conv=conv,rank=prop$r,
                 scale.estimated = scale<=0,outer.info=NULL,
                 optimizer=c("perf","chol"))
  object$coefficients <- coef
718
  object$family <- family
719
  ## form linear predictor efficiently...
720
  object$linear.predictors <- Xbd(G$Xd,coef,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop) + G$offset
721 722 723 724 725 726 727 728 729 730 731 732 733
  object$fitted.values <- family$linkinv(object$linear.predictors)
  if (efam) { ## deal with any post processing
     if (!is.null(family$postproc)) {
      posr <- family$postproc(family=object$family,y=y,prior.weights=G$w,
              fitted=object$fitted.values,linear.predictors=object$linear.predictors,offset=G$offset,
	      intercept=G$intercept)
      if (!is.null(posr$family)) object$family$family <- posr$family
      if (!is.null(posr$deviance)) object$deviance <- posr$deviance
      if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance
    }
    if (is.null(object$null.deviance)) object$null.deviance <- sum(family$dev.resids(y,weighted.mean(y,G$w),G$w,theta))   
  }

734 735 736 737 738 739 740 741 742
  PP <- Sl.initial.repara(Sl,prop$PP,inverse=TRUE,both.sides=TRUE,cov=TRUE,nt=npt)
  F <- pmmult(PP,qrx$R,FALSE,FALSE,nt=npt)  ##crossprod(PP,qrx$R) - qrx$R contains X'WX in this case
  object$edf <- diag(F)
  object$edf1 <- 2*object$edf - rowSums(t(F)*F) 
  object$sp <- exp(lsp[1:n.sp]) 
  object$sig2 <- object$scale <- scale
  object$Vp <- PP * scale
  object$Ve <- pmmult(F,object$Vp,FALSE,FALSE,nt=npt) ## F%*%object$Vp
  ## sp uncertainty correction... 
743
  if (!is.null(G$L)) prop$db <- prop$db%*%G$L
744
  M <- ncol(prop$db) 
745 746 747 748 749 750 751
  if (M>0) {
    ev <- eigen(prop$hess,symmetric=TRUE)
    ind <- ev$values <= 0
    ev$values[ind] <- 0;ev$values[!ind] <- 1/sqrt(ev$values[!ind])
    rV <- (ev$values*t(ev$vectors))[,1:M]
    Vc <- pcrossprod(rV%*%t(prop$db),nt=npt)
  } else Vc <- 0
752 753 754 755
  Vc <- object$Vp + Vc  ## Bayesian cov matrix with sp uncertainty
  object$edf2 <- rowSums(Vc*qrx$R)/scale
  object$Vc <- Vc
  object$outer.info <- list(grad = prop$grad,hess=prop$hess)  
756
  object$AR1.rho <- rho
757 758 759 760 761 762
  object$R <- pchol(qrx$R,npt)
  piv <- attr(object$R,"pivot") 
  object$R[,piv] <- object$R   
  object$iter <- iter 
  object$wt <- w
  object$y <- G$y
763
  object$prior.weights <- G$w
764 765 766 767 768
  rm(G);if (gc.level>0) gc()
  object
} ## end bgam.fitd


769 770 771 772 773 774 775 776 777 778
regular.Sb <- function(S,off,sp,beta) {
## form S %*% beta for a normal G list
  a <- beta*0
  if (length(S)>0) for (i in 1:length(S)) {
    ind <- off[i] - 1 + 1:ncol(S[[i]])
    a[ind] <- a[ind] + sp[i] * S[[i]] %*% beta[ind]
  }
  a
} ## regular.Sb

779

780 781
bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, coef=NULL,etastart = NULL,
    mustart = NULL, offset = rep(0, nobs), control = gam.control(), intercept = TRUE, 
782
    cl = NULL,gc.level=0,use.chol=FALSE,nobs.extra=0,samfrac=1,npt=1)
783 784 785 786
{   y <- mf[[gp$response]]
    weights <- G$w
    conv <- FALSE
    nobs <- nrow(mf)
787
    ##nvars <- ncol(G$X)
788 789
    offset <- G$offset
    family <- G$family
790 791 792 793 794 795 796 797 798 799 800

    if (inherits(G$family,"extended.family")) { ## preinitialize extended family
      efam <- TRUE
      pini <- if (is.null(G$family$preinitialize)) NULL else G$family$preinitialize(y,G$family)
      if (!is.null(pini$Theta)) G$family$putTheta(pini$Theta)
      if (!is.null(pini$y)) y <- pini$y
      scale <- if (is.null(G$family$scale)) 1 else G$family$scale
      if (scale < 0) scale <- var(y) *.1 ## initial guess
    } else efam <- FALSE

 
801
    G$family <- gaussian() ## needed if REML/ML used
802
    G$family$drop.intercept <- family$drop.intercept ## needed in predict.gam
803
    linkinv <- family$linkinv
804 805 806 807
    if (!efam) {
      variance <- family$variance
      mu.eta <- family$mu.eta
      if (!is.function(variance) || !is.function(linkinv))
808
        stop("'family' argument seems not to be a valid family object")
809 810 811 812
    }
    dev.resids <- family$dev.resids
    ## aic <- family$aic
   
813 814 815 816 817 818 819 820 821 822 823 824 825 826 827
    valideta <- family$valideta
    if (is.null(valideta))
        valideta <- function(eta) TRUE
    validmu <- family$validmu
    if (is.null(validmu))
        validmu <- function(mu) TRUE
    if (is.null(mustart)) {
        eval(family$initialize)
    }
    else {
        mukeep <- mustart
        eval(family$initialize)
        mustart <- mukeep
    }
 
828
    ##coefold <- NULL
829 830 831 832 833 834 835 836
    eta <- if (!is.null(etastart))
         etastart
    else family$linkfun(mustart)
    
    mu <- linkinv(eta)
    if (!(validmu(mu) && valideta(eta)))
       stop("cannot find valid starting values: please specify some")
    dev <- sum(dev.resids(y, mu, weights))*2 ## just to avoid converging at iter 1
837
    conv <- FALSE
838
   
839
    G$coefficients <- rep(0,ncol(G$X))
840
    class(G) <- "gam"  
841 842 843 844 845 846
    
    ## need to reset response and weights to post initialization values
    ## in particular to deal with binomial properly...
    G$y <- y
    G$w <- weights

847
  
848
    ## set up cluster for parallel computation...
849 850 851

    if (!is.null(cl)&&inherits(cl,"cluster")) {
      n.threads <- length(cl)
852 853 854 855
      while(nobs/n.threads < ncol(G$X)) n.threads <- n.threads - 1
      if (n.threads < length(cl)) { 
        warning("Too many cluster nodes to use all efficiently")
      }
856
    } else n.threads <- 1
857

858 859 860 861 862 863 864 865
    if (n.threads>1) { ## set up thread argument lists
      ## number of obs per thread
      nt <- rep(ceiling(nobs/n.threads),n.threads)
      nt[n.threads] <- nobs - sum(nt[-n.threads])
      arg <- list()
      n1 <- 0
      for (i in 1:n.threads) {
        n0 <- n1+1;n1 <- n1+nt[i]
866
        ind <- n0:n1 ## this thread's data block from mf
867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
        n.block <- nt[i]%/%chunk.size ## number of full sized blocks
        stub <- nt[i]%%chunk.size ## size of end block
        if (n.block>0) {
          start <- (0:(n.block-1))*chunk.size+1
          stop <- (1:n.block)*chunk.size
          if (stub>0) {
            start[n.block+1] <- stop[n.block]+1
            stop[n.block+1] <- nt[i]
            n.block <- n.block+1
          } 
        } else {
          n.block <- 1
          start <- 1
          stop <- nt[i]
        }
        arg[[i]] <- list(nobs= nt[i],start=start,stop=stop,n.block=n.block,
                         linkinv=linkinv,dev.resids=dev.resids,gc.level=gc.level,
884
                         mf = mf[ind,],
885
                         eta = eta[ind],offset = offset[ind],G = G,use.chol=use.chol)
886 887 888 889 890 891
        if (efam) {
          arg[[i]]$family <- family
        } else {
          arg[[i]]$mu.eta <- mu.eta
	  arg[[i]]$variance <- variance
        }
892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913
        arg[[i]]$G$w <- G$w[ind];arg[[i]]$G$model <- NULL
        arg[[i]]$G$y <- G$y[ind]
      }
    } else { ## single thread, requires single indices
      ## construct indices for splitting up model matrix construction... 
      n.block <- nobs%/%chunk.size ## number of full sized blocks
      stub <- nobs%%chunk.size ## size of end block
      if (n.block>0) {
        start <- (0:(n.block-1))*chunk.size+1
        stop <- (1:n.block)*chunk.size
        if (stub>0) {
          start[n.block+1] <- stop[n.block]+1
          stop[n.block+1] <- nobs
          n.block <- n.block+1
        } 
      } else {
        n.block <- 1
        start <- 1
        stop <- nobs
      }
   } ## single thread indices complete
 
914
    conv <- FALSE
915 916

    if (method=="fREML") Sl <- Sl.setup(G) ## setup block diagonal penalty object
917 918 919

    if (efam) theta <- family$getTheta()

920 921 922
    for (iter in 1L:control$maxit) { ## main fitting loop
       ## accumulate the QR decomposition of the weighted model matrix
       devold <- dev
923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
       kk <- 0
       repeat { 
         dev <- 0;wt <- rep(0,0) 
         if (n.threads == 1) { ## use original serial update code
           wt <- G$y
           for (b in 1:n.block) {
             ind <- start[b]:stop[b]
             X <- predict(G,newdata=mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
             rownames(X) <- NULL
             if (is.null(coef)) eta1 <- eta[ind] else eta[ind] <- eta1 <- drop(X%*%coef) + offset[ind]
             mu <- linkinv(eta1) 
             y <- G$y[ind] ## G$model[[gp$response]] ## - G$offset[ind]
             weights <- G$w[ind]
             if (efam) { ## extended family case
                dd <- dDeta(y,mu,weights,theta=theta,family,0)
	        ## note: no handling of infinities and wz case yet
               
	        w <- dd$EDeta2 * .5 
941
	        #w <- w
942 943 944 945 946 947 948 949 950 951 952 953 954 955
                z <- (eta1-offset[ind]) - dd$Deta.EDeta2
	        good <- is.finite(z)&is.finite(w)
	        w[!good] <- 0 ## drop if !good
	        z[!good] <- 0 ## irrelevant
             } else { ## regular exp fam case
               mu.eta.val <- mu.eta(eta1)
               good <- (weights > 0) & (mu.eta.val != 0)
               z <- (eta1 - offset[ind])[good] + (y - mu)[good]/mu.eta.val[good]
               w <- (weights[good] * mu.eta.val[good]^2)/variance(mu)[good]
             }
             dev <- dev + if (efam) sum(dev.resids(y,mu,weights,theta)) else sum(dev.resids(y,mu,weights))  
             wt[ind] <- w ## wt <- c(wt,w)
             w <- sqrt(w)
             ## note that QR may be parallel using npt>1, even under serial accumulation...
956 957
             if (b == 1) qrx <- qr.update(w*X[good,,drop=FALSE],w*z,use.chol=use.chol,nt=npt) 
             else qrx <- qr.update(w*X[good,,drop=FALSE],w*z,qrx$R,qrx$f,qrx$y.norm2,use.chol=use.chol,nt=npt)
958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005
             rm(X);if(gc.level>1) gc() ## X can be large: remove and reclaim
          }
          if (use.chol) { ## post proc to get R and f...
            y.norm2 <- qrx$y.norm2 
            qrx <- chol2qr(qrx$R,qrx$f,nt=npt)
            qrx$y.norm2 <- y.norm2
          }
        } else { ## use parallel accumulation
	  
          for (i in 1:length(arg)) {
	    arg[[i]]$coef <- coef
	    if (efam) arg[[i]]$theta <- theta
	  }
          res <- parallel::parLapply(cl,arg,qr.up) 
          ## single thread debugging version 
          #res <- list()
          #for (i in 1:length(arg)) {
          #  res[[i]] <- qr.up(arg[[i]])
          #}
          ## now consolidate the results from the parallel threads...
          if (use.chol) {
            R <- res[[1]]$R;f <- res[[1]]$f;dev <- res[[1]]$dev
            wt <- res[[1]]$wt;y.norm2 <- res[[1]]$y.norm2
	    eta <- res[[1]]$eta
            for (i in 2:n.threads) {
              R <- R + res[[i]]$R; f <- f + res[[i]]$f
              wt <- c(wt,res[[i]]$wt);eta <- c(eta,res[[i]]$eta);
	      dev <- dev + res[[i]]$dev
              y.norm2 <- y.norm2 + res[[i]]$y.norm2
            }         
            qrx <- chol2qr(R,f,nt=npt)
            qrx$y.norm2 <- y.norm2
          } else { ## proper QR
            R <- res[[1]]$R;f <- res[[1]]$f;dev <- res[[1]]$dev
            wt <- res[[1]]$wt;y.norm2 <- res[[1]]$y.norm2; eta <- res[[1]]$eta
            for (i in 2:n.threads) {
              R <- rbind(R,res[[i]]$R); f <- c(f,res[[i]]$f)
              wt <- c(wt,res[[i]]$wt);eta <- c(eta,res[[i]]$eta)
	      dev <- dev + res[[i]]$dev
              y.norm2 <- y.norm2 + res[[i]]$y.norm2
            }         
            ## use parallel QR here if npt>1...
            qrx <- if (npt>1) pqr2(R,npt) else qr(R,tol=0,LAPACK=TRUE) 
            f <- qr.qty(qrx,f)[1:ncol(R)]
            rp <- qrx$pivot;rp[rp] <- 1:ncol(R) # reverse pivot
            qrx <- list(R=qr.R(qrx)[,rp],f=f,y.norm2=y.norm2)
          }
        } 
1006

1007 1008
        ## if the routine has been called with only a random sample of the data, then 
        ## R, f and ||y||^2 can be corrected to estimate the full versions...
1009
 
1010 1011 1012
        qrx$R <- qrx$R/sqrt(samfrac)
        qrx$f <- qrx$f/sqrt(samfrac)
        qrx$y.norm2 <- qrx$y.norm2/samfrac
1013

1014
        G$n <- nobs
1015
      
1016
        rss.extra <- qrx$y.norm2 - sum(qrx$f^2)
1017
      
1018 1019
        if (control$trace)
           message(gettextf("Deviance = %s Iterations - %d", dev, iter, domain = "R-mgcv"))
1020

1021
        if (!is.finite(dev)) stop("Non-finite deviance")
1022

1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
        ## preparation for working model fit is ready, but need to test for convergence first
        if (iter>2 && abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
            conv <- TRUE
            coef <- start
            break
        }
        if (kk > 0) break ## already shrunk the step
        ## At this point it is worth checking that coef update actually improved the penalized
        ## deviance. If not try step halving, and redo the above once a suitable step has been
        ## found...
        if (iter>2) { ## can test divergence
          ## need to compute penalty at start and end of step
	  if (efam) {
	    dev0 <- sum(dev.resids(G$y,linkinv(eta0),G$w,theta0)) ## depends on theta, which will have changed
	    dev1 <- sum(dev.resids(G$y,linkinv(eta),G$w,theta0)) ## depends on theta, which will have changed
          } else { dev1 <- dev }
          if (method=="fREML") {
	    pcoef <- fit$beta
            Sb0 <- Sl.Sb(um$Sl,rho=log(object$full.sp),pcoef0)
	    Sb <- Sl.Sb(um$Sl,rho=log(object$full.sp),pcoef)
          } else {
	    pcoef <- coef
	    full.sp <- if (is.null(object$full.sp)) object$sp else object$full.sp
            Sb0 <- regular.Sb(G$S,G$off,full.sp,pcoef0)
	    Sb <- regular.Sb(G$S,G$off,full.sp,pcoef)
          }
	  while (dev0 + sum(pcoef0*Sb0) < dev1 + sum(pcoef * Sb) && kk < 6) {
            ## shrink step ...
            coef <- (coef0 + coef)/2
	    pcoef <- (pcoef0 + pcoef)/2
	    eta <- (eta0 + eta)/2
	    Sb <- (Sb0 + Sb)/2
	    ## recompute deviance ...
	    dev <- if (efam) sum(dev.resids(G$y,linkinv(eta),G$w,theta)) else sum(dev.resids(G$y,linkinv(eta),G$w)) 
            dev1 <- if (efam) sum(dev.resids(G$y,linkinv(eta),G$w,theta0)) else dev
            kk <- kk + 1
          }
        }
	if (kk == 0) break ## step was ok
      } ## repeat
      
      if (conv) break

      if (iter>1) { ## store coef and eta for divergence checking
        coef0 <- coef
	if (efam) theta0 <- theta ## theta used for determining step
	pcoef0 <- if (method=="fREML") fit$beta else coef
	eta0 <- eta
	dev0 <- dev
1072 1073
      }

1074 1075
      if (efam && iter>1) { ## estimate theta
	scale1 <- if (!is.null(family$scale)) family$scale else scale
1076
        if (family$n.theta>0||scale<0) theta <- estimate.theta(theta,family,G$y,linkinv(eta),scale=scale1,wt=G$w,tol=1e-7)
1077 1078 1079 1080 1081 1082 1083
        if (!is.null(family$scale) && family$scale<0) {
	   scale <- exp(theta[family$n.theta+1])
	   theta <- theta[1:family$n.theta]
	}  
        family$putTheta(theta)
      }
	  
1084 1085
      if (method=="GCV.Cp") {
         fit <- magic(qrx$f,qrx$R,G$sp,G$S,G$off,L=G$L,lsp0=G$lsp0,rank=G$rank,
1086 1087
                      H=G$H,C=matrix(0,0,ncol(qrx$R)),     ##C=G$C,
                      gamma=gamma,scale=scale,gcv=(scale<=0),
1088
                      extra.rss=rss.extra,n.score=nobs+nobs.extra)
1089 1090
 
         post <- magic.post.proc(qrx$R,fit,qrx$f*0+1) 
1091 1092
      } else if (method=="fREML") { ## use fast REML code
        ## block diagonal penalty object, Sl, set up before loop
1093
        um <- Sl.Xprep(Sl,qrx$R,nt=npt)
1094 1095 1096 1097 1098
        lambda.0 <- initial.sp(qrx$R,G$S,G$off)
        lsp0 <- log(lambda.0) ## initial s.p.
        ## carry forward scale estimate if possible...
        if (scale>0) log.phi <- log(scale) else {
          if (iter>1) log.phi <- log(object$scale) else {
1099
            if (is.null(coef)||qrx$y.norm2==0) log.phi <- log(var(as.numeric(G$y))*.05) else
1100 1101 1102 1103 1104
               log.phi <- log(qrx$y.norm2/(nobs+nobs.extra))
          }
        }
        fit <- fast.REML.fit(um$Sl,um$X,qrx$f,rho=lsp0,L=G$L,rho.0=G$lsp0,
                             log.phi=log.phi,phi.fixed=scale>0,rss.extra=rss.extra,
1105
                             nobs =nobs+nobs.extra,Mp=um$Mp,nt=npt)
1106
        res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=FALSE,L=G$L,nt=npt)
1107
        object <- list(coefficients=res$beta,db.drho=fit$d1b,
1108 1109 1110 1111
                       gcv.ubre=fit$reml,mgcv.conv=list(iter=fit$iter,
                       message=fit$conv),rank=ncol(um$X),
                       Ve=NULL,scale.estimated = scale<=0,outer.info=fit$outer.info,
                        optimizer=c("perf","newton"))
1112
 
1113 1114 1115 1116
        if (scale<=0) { ## get sp's and scale estimate
          nsp <- length(fit$rho)
          object$sig2 <- object$scale <- exp(fit$rho[nsp])
          object$sp <- exp(fit$rho[-nsp]) 
1117 1118
          nsp <- length(fit$rho.full)
          object$full.sp <- exp(fit$rho.full[-nsp])
1119 1120 1121
        } else { ## get sp's
          object$sig2 <- object$scale <- scale  
          object$sp <- exp(fit$rho)
1122
          object$full.sp <- exp(fit$rho.full)
1123 1124 1125
        }
        class(object)<-c("gam")               
      } else { ## method is one of "ML", "P-REML" etc...
1126 1127 1128 1129 1130 1131 1132 1133
        y <- G$y; w <- G$w; n <- G$n;offset <- G$offset
        G$y <- qrx$f
        G$w <- G$y*0+1
        G$X <- qrx$R
        G$n <- length(G$y)
        G$offset <- G$y*0
        G$dev.extra <- rss.extra
        G$pearson.extra <- rss.extra
1134
        G$n.true <- nobs+nobs.extra
1135
        object <- gam(G=G,method=method,gamma=gamma,scale=scale,control=gam.control(nthreads=npt))
1136
        y -> G$y; w -> G$w; n -> G$n;offset -> G$offset
1137
	object$fitted.values <- NULL
1138
      }
1139
     
1140 1141 1142
      if (method=="GCV.Cp") { 
        object <- list()
        object$coefficients <- fit$b
1143 1144
        object$edf <- post$edf 
        object$edf1 <- post$edf1
1145
        ##object$F <- post$F
1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
        object$full.sp <- fit$sp.full
        object$gcv.ubre <- fit$score
        object$hat <- post$hat
        object$mgcv.conv <- fit$gcv.info 
        object$optimizer="magic"
        object$rank <- fit$gcv.info$rank
        object$Ve <- post$Ve
        object$Vp <- post$Vb
        object$sig2 <- object$scale <- fit$scale
        object$sp <- fit$sp
1156
        names(object$sp) <- names(G$sp)
1157 1158 1159 1160 1161 1162 1163
        class(object)<-c("gam")
      }

      coef <- object$coefficients
        
      if (any(!is.finite(coef))) {
          conv <- FALSE
1164 1165
          warning(gettextf("non-finite coefficients at iteration %d",
                  iter))
1166 1167
          break
      }
1168 1169 1170
    } ## end fitting iteration

    if (method=="fREML") { ## do expensive cov matrix cal only at end
1171
      res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=TRUE,scale=scale,L=G$L,nt=npt)
1172
      object$edf <- res$edf
1173
      object$edf1 <- res$edf1
1174 1175
      object$edf2 <- res$edf2
      ##object$F <- res$F
1176
      object$hat <- res$hat
1177 1178
      object$Vp <- res$Vp
      object$Ve <- res$Ve
1179
      object$Vc <- res$Vc
1180
    }
1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191
    
    if (efam) { ## deal with any post processing
       if (!is.null(family$postproc)) {
         object$family <- family
         posr <- family$postproc(family=family,y=y,prior.weights=G$w,
              fitted=linkinv(eta),linear.predictors=eta,offset=G$offset,
	      intercept=G$intercept)
         if (!is.null(posr$family)) object$family$family <- posr$family
         if (!is.null(posr$deviance)) object$deviance <- posr$deviance
         if (!is.null(posr$null.deviance)) object$null.deviance <- posr$null.deviance
       }
1192
      if (is.null(object$null.deviance)) object$null.deviance <- sum(family$dev.resids(G$y,weighted.mean(G$y,G$w),G$w,theta))   
1193
    }
1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206

    if (!conv)
       warning("algorithm did not converge")
   
    eps <- 10 * .Machine$double.eps
    if (family$family == "binomial") {
         if (any(mu > 1 - eps) || any(mu < eps))
                warning("fitted probabilities numerically 0 or 1 occurred")
    }
    if (family$family == "poisson") {
            if (any(mu < eps))
                warning("fitted rates numerically 0 occurred")
    }
1207
  object$R <- qrx$R    
1208
  object$iter <- iter 
1209
  object$wt <- wt
1210
  object$y <- G$y
1211
  object$prior.weights <- G$w
1212
  rm(G);if (gc.level>0) gc()
1213 1214 1215 1216
  object
} ## end bgam.fit


1217

1218

1219
ar.qr.up <- function(arg) {
1220
## function to perform QR updating with AR residuals, on one execution thread
1221 1222 1223 1224 1225
  if (arg$rho!=0) { ## AR1 error model
     ld <- 1/sqrt(1 - arg$rho^2) ## leading diagonal of root inverse correlation
     sd <- -arg$rho * ld         ## sub diagonal
  } 
  yX.last <- NULL
1226
  qrx <- list(R=NULL,f=array(0,0),y.norm2=0) ## initial empty qr object
1227 1228 1229 1230
  for (i in 1:arg$n.block) {
    ind <- arg$start[i]:arg$end[i] 
    if (arg$rho!=0) { ## have to find AR1 transform...
       N <- arg$end[i]-arg$start[i]+1
1231
       ## note first row implied by this transform
1232 1233 1234 1235
       ## is always dropped, unless really at beginning of data.
       row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)])
       weight <- c(1,rep(c(sd,ld),N-1))
       stop <- c(1,1:(N-1)*2+1)
1236 1237 1238 1239 1240 1241 1242 1243
       if (!is.null(arg$mf$"(AR.start)")) { ## need to correct the start of new AR sections...
           ii <- which(arg$mf$"(AR.start)"[ind]==TRUE)
           if (length(ii)>0) {
             if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
             weight[ii*2-2] <- 0 ## zero sub diagonal
             weight[ii*2-1] <- 1 ## set leading diagonal to 1
           }
       }
1244
     } 
1245
     ## arg$G$model <- arg$mf[ind,]
1246
     w <- sqrt(arg$G$w[ind])
1247 1248
     X <- w*predict(arg$G,newdata=arg$mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
     y <- w*(arg$mf[ind,arg$response] - arg$offset[ind]) ## w*(arg$G$model[[arg$response]] - arg$offset[ind])
1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260
     if (arg$rho!=0) {
       ## Apply transform...
       if (arg$last&&arg$end[i]==arg$nobs) yX.last <- 
           c(y[nrow(X)],X[nrow(X),]) ## store final row, in case of update
       if (arg$first&&i==1) {
          X <- rwMatrix(stop,row,weight,X)
          y <- rwMatrix(stop,row,weight,y)
       } else {
          X <- rwMatrix(stop,row,weight,X)[-1,]
          y <- rwMatrix(stop,row,weight,y)[-1]
       } 
     } ## dealt with AR1      
1261
     qrx <- qr.update(X,y,qrx$R,qrx$f,qrx$y.norm2,use.chol=arg$use.chol)
1262 1263 1264 1265 1266
     rm(X);if (arg$gc.level>1) {gc()} ## X can be large: remove and reclaim
  } ## all blocks dealt with
  qrx$yX.last <- yX.last
  if (arg$gc.level>1) {rm(arg,w,y,ind);gc()}
  qrx
1267
} ## ar.qr.up
1268

1269 1270 1271 1272 1273 1274 1275 1276
pabapr <- function(arg) {
## function for parallel calling of predict.gam
## QUERY: ... handling?
  predict.gam(arg$object,newdata=arg$newdata,type=arg$type,se.fit=arg$se.fit,terms=arg$terms,
                        block.size=arg$block.size,newdata.guaranteed=arg$newdata.guaranteed,
                        na.action=arg$na.action)
}

1277
predict.bam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,exclude=NULL,
1278
                        block.size=50000,newdata.guaranteed=FALSE,na.action=na.pass,
1279
                        cluster=NULL,discrete=TRUE,n.threads=1,...) {
1280
## function for prediction from a bam object, possibly in parallel
1281 1282
  
  #if (is.function(na.action)) na.action <- deparse(substitute(na.action)) ## otherwise predict.gam can't detect type
1283 1284 1285 1286
  if (discrete && !is.null(object$dinfo)) {
    return(predict.bamd(object,newdata,type,se.fit,terms,exclude,
                        block.size,newdata.guaranteed,na.action,n.threads,...))
  }
1287
  ## remove some un-needed stuff from object
1288 1289 1290 1291
  object$Sl <- object$qrx <- object$R <- object$F <- object$Ve <-
  object$Vc <- object$G <- object$residuals <- object$fitted.values <-
  object$linear.predictors <- NULL
  gc()
Dirk Eddelbuettel's avatar