bam.r 90.4 KB
Newer Older
1
## routines for very large dataset generalized additive modelling.
2
## (c) Simon N. Wood 2009-2015
3 4 5 6 7 8 9 10 11 12 13 14 15


ls.size <- function(x) {
## If `x' is a list, return the size of its elements, in bytes, in a named array
## otherwise return the size of the object
 if (is.list(x)==FALSE) return(object.size(x))

 xn <- names(x)
 n <- length(x)
 sz <- rep(-1,n)
 for (i in 1:n) sz[i] <- object.size(x[[i]])
 names(sz) <- xn
 sz
16
} ## ls.size
17

18
rwMatrix <- function(stop,row,weight,X,trans=FALSE) {
19
## Routine to recombine the rows of a matrix X according to info in 
20
## stop, row and weight. Consider the ith row of the output matrix 
21 22 23
## ind <- 1:stop[i] if i==1 and ind <- (stop[i-1]+1):stop[i]
## otherwise. The ith output row is then X[row[ind],]*weight[ind]
  if (is.matrix(X)) { n <- nrow(X);p<-ncol(X);ok <- TRUE} else { n<- length(X);p<-1;ok<-FALSE}
24
  stop <- stop - 1;row <- row - 1 ## R indices -> C indices
25
  oo <-.C(C_rwMatrix,as.integer(stop),as.integer(row),as.double(weight),X=as.double(X),
26
          as.integer(n),as.integer(p),trans=as.integer(trans),work=as.double(rep(0,n*p)))
27 28
  if (ok) return(matrix(oo$X,n,p)) else
  return(oo$X) 
29
} ## rwMatrix
30

31
chol2qr <- function(XX,Xy,nt=1) {
32
## takes X'X and X'y and returns R and f
33 34
## equivalent to qr update.  
  op <- options(warn = -1) ## otherwise warns if +ve semidef
35
  R <- if (nt) pchol(XX,nt=nt) else chol(XX,pivot=TRUE)
36 37 38 39 40 41 42
  options(op)
  p <- length(Xy)
  ipiv <- piv <- attr(R,"pivot");ipiv[piv] <- 1:p
  rank <- attr(R,"rank");ind <- 1:rank
  if (rank<p) R[(rank+1):p,] <- 0 ## chol is buggy (R 3.1.0) with pivot=TRUE
  f <- c(forwardsolve(t(R[ind,ind]),Xy[piv[ind]]),rep(0,p-rank))[ipiv]
  R <- R[ipiv,ipiv]
43
  list(R=R,f=f)
44
} ## chol2qr
45

46
qr.update <- function(Xn,yn,R=NULL,f=rep(0,0),y.norm2=0,use.chol=FALSE,nt=1)
47
## Let X = QR and f = Q'y. This routine updates f and R
48 49 50 51 52
## when Xn is appended to X and yn appended to y. If R is NULL
## then initial QR of Xn is performed. ||y||^2 is accumulated as well.
## if use.chol==TRUE then quicker but less stable accumulation of X'X and
## X'y are used. Results then need post processing, to get R =chol(X'X)
## and f= R^{-1} X'y.
53
## if nt>1 and use.chol=FALSE then parallel QR is used 
54
{ p <- ncol(Xn)  
55
  y.norm2 <- y.norm2+sum(yn*yn)
56 57 58 59 60 61 62 63 64 65 66 67 68 69
  if (use.chol) { 
    if (is.null(R)) { 
      R <- crossprod(Xn)
      fn <- as.numeric(t(Xn)%*%yn) 
    } else {
      R <- R + crossprod(Xn)
      fn <- f + as.numeric(t(Xn)%*%yn)
    } 
    return(list(R=R,f=fn,y.norm2=y.norm2))
  } else { ## QR update
    if (!is.null(R)) {
      Xn <- rbind(R,Xn)
      yn <- c(f,yn)
    }
70
    qrx <- if (nt==1) qr(Xn,tol=0,LAPACK=TRUE) else pqr2(Xn,nt)
71 72 73
    fn <- qr.qty(qrx,yn)[1:p]
    rp <- qrx$pivot;rp[rp] <- 1:p # reverse pivot
    return(list(R = qr.R(qrx)[,rp],f=fn,y.norm2=y.norm2))
74
  }
75
} ## qr.update
76

77 78 79 80 81 82 83 84

qr.up <- function(arg) {
## routine for parallel computation of the QR factorization of 
## a large gam model matrix, suitable for calling with parLapply.
  wt <- rep(0,0) 
  dev <- 0    
  for (b in 1:arg$n.block) {
    ind <- arg$start[b]:arg$stop[b]
85
    X <- predict(arg$G,newdata=arg$mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
86
    rownames(X) <- NULL
87 88 89 90 91 92 93 94 95 96 97
    if (is.null(arg$coef)) eta1 <- arg$eta[ind] else eta1 <- drop(X%*%arg$coef) + arg$offset[ind]
    mu <- arg$linkinv(eta1) 
    y <- arg$G$y[ind] ## arg$G$model[[arg$response]] 
    weights <- arg$G$w[ind]
    mu.eta.val <- arg$mu.eta(eta1)
    good <- (weights > 0) & (mu.eta.val != 0)
    z <- (eta1 - arg$offset[ind])[good] + (y - mu)[good]/mu.eta.val[good]
    w <- (weights[good] * mu.eta.val[good]^2)/arg$variance(mu)[good]
    dev <- dev + sum(arg$dev.resids(y,mu,weights))
    wt <- c(wt,w)
    w <- sqrt(w)
98
    ## note assumption that nt=1 in following qr.update - i.e. each cluster node is strictly serial
99 100
    if (b == 1) qrx <- qr.update(w*X[good,],w*z,use.chol=arg$use.chol) 
    else qrx <- qr.update(w*X[good,],w*z,qrx$R,qrx$f,qrx$y.norm2,use.chol=arg$use.chol)
101 102 103 104 105
    rm(X);if(arg$gc.level>1) gc() ## X can be large: remove and reclaim
  }
  qrx$dev <- dev;qrx$wt <- wt
  if (arg$gc.level>1) { rm(arg,ind,mu,y,weights,mu.eta.val,good,z,w,wt,w);gc()}
  qrx
106
} ## qr.up
107

108 109 110 111 112 113 114
compress.df <- function(dat,m=NULL) {
## Takes dataframe in dat and compresses it by rounding and duplicate 
## removal. For metric variables we first find the unique cases.
## If there are <= m of these then these are employed, otherwise 
## rounding is used. Factors are always reduced to the number of 
## levels present in the data. Idea is that this function is called 
## with columns of dataframes corresponding to single smooths or marginals. 
115 116
  d <- ncol(dat) ## number of variables to deal with
  n <- nrow(dat) ## number of data/cases
117 118 119 120 121
  if (is.null(m)) m <- if (d==1) 1000 else if (d==2) 100 else 25 else
  if (d>1) m <- round(m^{1/d}) + 1
  
  mf <- mm <- 1 ## total grid points for factor and metric
  for (i in 1:d) if (is.factor(dat[,i])) {  
122
    mf <- mf * length(unique(as.vector(dat[,i]))) 
123 124 125
  } else {
    mm <- mm * m 
  } 
126 127 128 129 130 131
  if (is.matrix(dat[[1]])) { ## must replace matrix terms with vec(dat[[i]])
    dat0 <- data.frame(as.vector(dat[[1]]))
    if (d>1) for (i in 2:d) dat0[[i]] <- as.vector(dat[[i]])
    names(dat0) <- names(dat)
    dat <- dat0;rm(dat0)
  }
132
  xu <- uniquecombs(dat)
133 134 135 136 137 138 139 140
  if (nrow(xu)>mm*mf) { ## too many unique rows to use only unique
    for (i in 1:d) if (!is.factor(dat[,i])) { ## round the metric variables
      xl <- range(dat[,i])
      xu <- seq(xl[1],xl[2],length=m)
      dx <- xu[2]-xu[1]
      kx <- round((dat[,i]-xl[1])/dx)+1
      dat[,i] <- xu[kx] ## rounding the metric variables
    }
141
    xu <- uniquecombs(dat)
142 143 144 145
  }  
  k <- attr(xu,"index")
  ## shuffle rows in order to avoid induced dependencies between discretized
  ## covariates (which can mess up gam.side)...
146 147 148 149 150 151 152 153 154 155 156
  ## any setting should be done in routine calling this one!!
  #seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed
  #if (inherits(seed,"try-error")) {
  #   runif(1)
  #   seed <- get(".Random.seed",envir=.GlobalEnv)
  #}
  #kind <- RNGkind(NULL)
  #RNGkind("default","default")
  ## following line must be different to that used in
  ## tp constructor subsampling!
  #set.seed(8547) ## ensure repeatability
157 158 159
  
  ii <- sample(1:nrow(xu),nrow(xu),replace=FALSE) ## shuffling index
  
160 161
  #RNGkind(kind[1],kind[2])
  #assign(".Random.seed",seed,envir=.GlobalEnv) ## RNG behaves as if it had not been used
162 163 164 165
  
  xu[ii,] <- xu  ## shuffle rows of xu
  k <- ii[k]     ## correct k index accordingly
  ## ... finished shuffle
166 167
  ## if arguments were matrices, then return matrix index
  if (length(k)>n) k <- matrix(k,nrow=n) 
168 169 170 171
  k -> attr(xu,"index")
  xu
} ## compress.df

172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
check.term <- function(term,rec) {
## utility function for discrete.mf. Checks whether variables in "term"
## have already been discretized, and if so whether this discretization 
## can be re-used for the current "term". Stops if term already discretized
## but we can't re-use discretization. Otherwise returns index of k index
## or 0 if the term is not in the existing list.
  ii <- which(rec$vnames%in%term)
  if (length(ii)) { ## at least one variable already discretized
    if (length(term)==rec$d[min(ii)]) { ## dimensions match previous discretization
      if (sum(!(term%in%rec$vnames[ii]))) ("bam can not discretize with this nesting structure")
      else return(rec$ki[min(ii)]) ## all names match previous - retun index of previous
    } else stop("bam can not discretize with this nesting structure")
  } else return(0) ## no match
} ## check.term

187
discrete.mf <- function(gp,mf,pmf,m=NULL,full=TRUE) {
188
## discretize the covariates for the terms specified in smooth.spec
189
## id not allowed. pmf is a model frame for just the 
190
## parametric terms --- mini.mf is applied to this.
191 192 193
## if full is FALSE then parametric and response terms are ignored
## and what is returned is a list where columns can be of 
## different lengths.
194 195 196 197 198 199 200 201 202 203 204 205 206
## On exit... 
## * mf is a model frame containing the unique discretized covariate
##   values, in randomized order, padded to all be same length
## * nr records the number of unique discretized covariate values
##   i.e. the number of rows before the padding starts
## * k.start contains the starting column in index vector k, for
##   each variable.
## * k is the index matrix. The ith record of the 1st column of the 
##   jth variable is in row k[i,k.start[j]] of the corresponding 
##   column of mf.
## ... there is an element of nr and k.start for each variable of 
## each smooth, but varaibles are onlt discretized and stored in mf
## once. If there are no matrix variables then k.start = 1:(ncol(k)+1) 
207 208 209 210 211 212 213 214 215 216 217 218 219
#  if (is.null(attr(mf,"terms"))) mf <- eval(gp$fake.formula[-2],mf) ## assumes model frame below

  ## some sub sampling here... want to set and restore RNG state used for this
  ## to ensure strict repeatability.
  
  seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed
  if (inherits(seed,"try-error")) {
       runif(1)
       seed <- get(".Random.seed",envir=.GlobalEnv)
  }
  kind <- RNGkind(NULL)
  RNGkind("default", "default")
  set.seed(8547) ## keep different to tps constructor!
220

221 222
  mf0 <- list()
  nk <- 0 ## count number of index vectors to avoid too much use of cbind
223
  for (i in 1:length(gp$smooth.spec)) nk <- nk + as.numeric(gp$smooth.spec[[i]]$by!="NA") +
224
    if (inherits(gp$smooth.spec[[i]],"tensor.smooth.spec")) length(gp$smooth.spec[[i]]$margin) else 1
225
  k <- matrix(0,nrow(mf),nk) ## each column is an index vector
226
  k.start <- 1:(nk+1) ## record last column for each term
227 228
  ik <- 0 ## index counter
  nr <- rep(0,nk) ## number of rows for term
229 230 231 232
  ## structure to record terms already processed...
  rec <- list(vnames = rep("",0), ## variable names
              ki = rep(0,0),      ## index of original index vector var relates to  
              d = rep(0,0))       ## dimension of terms involving this var
233 234
  ## loop through the terms discretizing the covariates...
  for (i in 1:length(gp$smooth.spec)) {
235 236
    nmarg <- if (inherits(gp$smooth.spec[[i]],"tensor.smooth.spec")) length(gp$smooth.spec[[i]]$margin) else 1
    maxj <- if (gp$smooth.spec[[i]]$by=="NA") nmarg else nmarg + 1 
237
    mi <- if (is.null(m)||length(m)==1) m else m[i]
238
    if (!is.null(gp$smooth.spec[[i]]$id)) stop("discretization can not handle smooth ids")
239 240 241 242 243 244 245
    j <- 0
    for (jj in 1:maxj) { ## loop through marginals
      if (jj==1&&maxj!=nmarg) termi <- gp$smooth.spec[[i]]$by else {
        j <- j + 1
        termi <- if (inherits(gp$smooth.spec[[i]],"tensor.smooth.spec")) gp$smooth.spec[[i]]$margin[[j]]$term else 
                 gp$smooth.spec[[i]]$term          
      } 
246 247 248 249
      ik.prev <- check.term(termi,rec) ## term already discretized?
      ik <- ik + 1 ## increment index counter
      if (ik.prev==0) { ## new discretization required
         mfd <- compress.df(mf[termi],m=mi)
250 251 252 253 254 255 256 257 258 259
         ki <- attr(mfd,"index")
         if (is.matrix(ki)) {
           ind <- (ik+1):length(k.start) 
           k.start[ind] <- k.start[ind] + ncol(ki)-1    ## adjust start indices
           k <- cbind(k,matrix(0,nrow(k),ncol(ki)-1)) ## extend index matrix
           ind <- k.start[ik]:(k.start[ik+1]-1) 
           k[,ind] <- ki 
         } else {
           k[,k.start[ik]] <- ki
         }
260 261 262
         nr[ik] <- nrow(mfd)
         mf0 <- c(mf0,mfd) 
         ## record variable discretization info...
263
         d <- length(termi)
264
         rec$vnames <- c(rec$vnames,termi)
265 266
         rec$ki <- c(rec$ki,rep(ik,d))
         rec$d <- c(rec$d,rep(d,d))
267
       } else { ## re-use an earlier discretization...
268 269 270 271 272 273
         ind.prev <- k.start[ik.prev]:(k.start[ik.prev+1]-1)
         ind <- (ik+1):length(k.start)
         k.start[ind] <- k.start[ind] + length(ind.prev)-1
         ind <- k.start[ik]:(k.start[ik+1]-1)
         k[,ind] <- k[,ind.prev]
         #k[,ik] <- k[,ik.prev]
274 275
         nr[ik] <- nr[ik.prev]
       }
276 277 278 279
    } ## end marginal jj loop
  } ## term loop (i)


280
  ## obtain parametric terms and..
281 282
  ## pad mf0 so that all rows are the same length
  ## padding is necessary if gam.setup is to be used for setup
283

284 285 286 287 288
  if (full) {
    maxr <- max(nr) 
    pmf0 <- mini.mf(pmf,maxr) ## deal with parametric components
    if (nrow(pmf0)>maxr) maxr <- nrow(pmf0)
    mf0 <- c(mf0,pmf0) ## add parametric terms to end of mf0
289

290 291 292 293
    for (i in 1:length(mf0)) {
      me <- length(mf0[[i]]) 
      if (me < maxr) mf0[[i]][(me+1):maxr] <- sample(mf0[[i]],maxr-me,replace=TRUE)
    }
294 295 296 297
    ## add response so that gam.setup can do its thing... 
  
    mf0[[gp$response]] <- sample(mf[[gp$response]],maxr,replace=TRUE)
    
298 299 300
    ## mf0 is the discretized model frame (actually a list), padded to have equal length rows
    ## k is the index vector for each sub-matrix, only the first nr rows of which are
    ## to be retained... Use of check.names=FALSE ensures, e.g. 'offset(x)' not changed...
301

302
    ## now copy back into mf so terms unchanged
303 304 305
    #mf <- mf[1:maxr,]
    mf <- mf[sample(1:nrow(mf),maxr,replace=TRUE),]
    for (na in names(mf0)) mf[[na]] <- mf0[[na]] 
306
   
307
  } else mf <- mf0
308 309 310
  ## reset RNG to old state...
  RNGkind(kind[1], kind[2])
  assign(".Random.seed", seed, envir = .GlobalEnv)
311 312

  ## finally one more pass through, expanding k, k.start and nr to deal with replication that
313
  ## will occur with factor by variables...
314
  ik <- ncol(k)+1 ## starting index col for this term in k.start
315 316 317 318 319 320 321 322 323 324 325 326 327 328
  for (i in length(gp$smooth.spec):1) { ## work down through terms so insertion painless
    if (inherits(gp$smooth.spec[[i]],"tensor.smooth.spec")) nd <-  
         length(gp$smooth.spec[[i]]$margin) else nd <- 1 ## number of indices
    ik <- ik - nd ## starting index if no by  
    if (gp$smooth.spec[[i]]$by!="NA") {
      ik <- ik - 1 ## first index
      nd <- nd + 1 ## number of indices
      byvar <- mf[[gp$smooth.spec[[i]]$by]]
      if (is.factor(byvar)) { ## then need to expand nr and index matrix
        nex <- length(levels(byvar))  ## number of copies of term indices
        if (is.ordered(byvar)) nex <- nex - 1 ## first level dropped
        if (nex>0) { ## insert index copies
          ii0 <- if (ik>1) 1:(ik-1) else rep(0,0) ## earlier
          ii1 <- if (ik+nd-1 < length(nr)) (ik+nd):length(nr) else rep(0,0) ## later
329 330 331 332 333 334
          ii <- ik:(ik+nd-1) ## cols for this term    
          ## indices for columns of k... 
          kk0 <- if (ik>1) 1:(k.start[ik]-1) else rep(0,0) ## earlier
          kk1 <- if (ik+nd-1 < length(nr)) k.start[ik+nd]:ncol(k) else rep(0,0) ## later
          kk <- k.start[ik]:(k.start[ik+nd]-1) ## cols for this term
          k <- cbind(k[,kk0,drop=FALSE],k[,rep(kk,nex),drop=FALSE],k[,kk1,drop=FALSE])
335
          nr <- c(nr[ii0],rep(nr[ii],nex),nr[ii1])
336 337 338 339
          ## expand k.start...
          nkk <- length(kk) ## number of k columns in term to be repeated
          k.start <- c(k.start[ii0],rep(k.start[ii],nex)+rep(0:(nex-1),each=nkk)*nkk,
                       (nex-1)*nkk+c(k.start[ii1],k.start[length(k.start)]))
340 341 342 343
        }
      } ## factor by 
    } ## existing by
  } ## smooth.spec loop
344
  list(mf=mf,k=k,nr=nr,k.start=k.start)
345 346
} ## discrete.mf

347 348 349
mini.mf <-function(mf,chunk.size) {
## takes a model frame and produces a representative subset of it, suitable for 
## basis setup.
350 351 352 353
  ## first count the minimum number of rows required for representiveness
  mn <- 0
  for (j in 1:length(mf)) mn <- mn + if (is.factor(mf[[j]])) length(levels(mf[[j]])) else 2
  if (chunk.size < mn) chunk.size <- mn
354
  n <- nrow(mf)
355 356
  if (n <= chunk.size) return(mf)

357 358 359 360 361
  seed <- try(get(".Random.seed",envir=.GlobalEnv),silent=TRUE) ## store RNG seed
  if (inherits(seed,"try-error")) {
     runif(1)
     seed <- get(".Random.seed",envir=.GlobalEnv)
  }
362 363
  kind <- RNGkind(NULL)
  RNGkind("default", "default")
364 365
  set.seed(66)  
  ## randomly sample from original frame...
366
  ind <- sample(1:n,chunk.size)
367
  mf0 <- mf[ind,,drop=FALSE]
368 369 370 371 372 373 374 375 376
  ## ... now need to ensure certain sorts of representativeness

  ## work through elements collecting the rows containing 
  ## max and min for each variable, and a random row for each 
  ## factor level....

  ind <- sample(1:n,n,replace=FALSE) ## randomized index for stratified sampling w.r.t. factor levels
  fun <- function(X,fac,ind) ind[fac[ind]==X][1] ## stratified sampler
  k <- 0 
377 378 379 380 381
  for (j in 1:length(mf)) if (is.numeric(mf0[[j]])) {
    if (is.matrix(mf0[[j]])) { ## find row containing minimum
      j.min <- min((1:n)[as.logical(rowSums(mf[[j]]==min(mf[[j]])))])
      j.max <- min((1:n)[as.logical(rowSums(mf[[j]]==max(mf[[j]])))])
    } else { ## vector
382 383
      j.min <- min(which(mf[[j]]==min(mf[[j]])))
      j.max <- min(which(mf[[j]]==max(mf[[j]])))
384
    }
385 386 387 388 389
    k <- k + 1; mf0[k,] <- mf[j.min,]
    k <- k + 1; mf0[k,] <- mf[j.max,] 
  } else if (is.factor(mf[[j]])) { ## factor variable...
    ## randomly sample one row from each factor level...
    find <- apply(X=as.matrix(levels(mf[[j]])),MARGIN=1,FUN=fun,fac=mf[[j]],ind=ind)
390
    find <- find[is.finite(find)] ## in case drop.unused.levels==FALSE, so that there ar levels without rows
391 392 393
    nf <- length(find)
    mf0[(k+1):(k+nf),] <- mf[find,]
    k <- k + nf
394
  }
395 396 397 398

  RNGkind(kind[1], kind[2])
  assign(".Random.seed", seed, envir = .GlobalEnv)

399
  mf0
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496
} ## mini.mf


bgam.fitd <- function (G, mf, gp ,scale , coef=NULL,etastart = NULL,
    mustart = NULL, offset = rep(0, nobs),rho=0, control = gam.control(), intercept = TRUE, 
    gc.level=0,nobs.extra=0,npt=1) {
## This is a version of bgam.fit1 designed for use with discretized covariates. 
## Difference to bgam.fit1 is that XWX, XWy and Xbeta are computed in C
## code using compressed versions of X. Parallelization of XWX formation
## is performed at the C level using openMP.
## Alternative fitting iteration using Choleski only, including for REML.
## Basic idea is to take only one Newton step for parameters per iteration
## and to control the step length to ensure that at the end of the step we
## are not going uphill w.r.t. the REML criterion...
    
    y <- mf[[gp$response]]
    weights <- G$w 
    conv <- FALSE
    nobs <- nrow(mf)
    offset <- G$offset 

    if (rho!=0) { ## AR1 error model
      
      ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
      sd <- -rho*ld         ## sub diagonal
      N <- nobs    
      ## see rwMatrix() for how following are used...
      ar.row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)]) ## index of rows to reweight
      ar.weight <- c(1,rep(c(sd,ld),N-1))     ## row weights
      ar.stop <- c(1,1:(N-1)*2+1)    ## (stop[i-1]+1):stop[i] are the rows to reweight to get ith row
      if (!is.null(mf$"(AR.start)")) { ## need to correct the start of new AR sections...
        ii <- which(mf$"(AR.start)"==TRUE)
        if (length(ii)>0) {
          if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
          ar.weight[ii*2-2] <- 0 ## zero sub diagonal
          ar.weight[ii*2-1] <- 1 ## set leading diagonal to 1
        }
      }
    } else {## AR setup complete
      ar.row <- ar.weight <- ar.stop <- -1 ## signal no re-weighting
    }

    family <- G$family
    additive <- if (family$family=="gaussian"&&family$link=="identity") TRUE else FALSE
    variance <- family$variance
    dev.resids <- family$dev.resids
    linkinv <- family$linkinv
    mu.eta <- family$mu.eta
    if (!is.function(variance) || !is.function(linkinv))
        stop("'family' argument seems not to be a valid family object")
    valideta <- family$valideta
    if (is.null(valideta))
        valideta <- function(eta) TRUE
    validmu <- family$validmu
    if (is.null(validmu))
        validmu <- function(mu) TRUE
    if (is.null(mustart)) {
        eval(family$initialize)
    }
    else {
        mukeep <- mustart
        eval(family$initialize)
        mustart <- mukeep
    }
 
    eta <- if (!is.null(etastart))
         etastart
    else family$linkfun(mustart)
    
    mu <- linkinv(eta)
    if (!(validmu(mu) && valideta(eta)))
       stop("cannot find valid starting values: please specify some")
    dev <- sum(dev.resids(y, mu, weights))*2 ## just to avoid converging at iter 1

    conv <- FALSE
   
    G$coefficients <- rep(0,ncol(G$X))
    class(G) <- "gam"  
    
    ## need to reset response and weights to post initialization values
    ## in particular to deal with binomial properly...
    G$y <- y
    G$w <- weights

    Sl <- Sl.setup(G) ## setup block diagonal penalty object
    rank <- 0
    for (b in 1:length(Sl)) rank <- rank + Sl[[b]]$rank
    Mp <- ncol(G$X) - rank ## null space dimension
    Nstep <- 0
    for (iter in 1L:control$maxit) { ## main fitting loop 
      devold <- dev
      dev <- 0
      ## accumulate the QR decomposition of the weighted model matrix
      if (iter==1||!additive) {
        qrx <- list() 
        if (iter>1) {
          ## form eta = X%*%beta
497
          eta <- Xbd(G$Xd,coef,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop)
498 499 500 501 502 503 504 505 506 507 508 509 510
        }
        mu <- linkinv(eta)
        mu.eta.val <- mu.eta(eta)
        good <- mu.eta.val != 0
        mu.eta.val[!good] <- .1 ## irrelvant as weight is zero
        z <- (eta - offset) + (G$y - mu)/mu.eta.val
        w <- (G$w * mu.eta.val^2)/variance(mu)
        dev <- sum(dev.resids(G$y,mu,G$w))
      
        qrx$y.norm2 <- if (rho==0) sum(w*z^2) else   ## AR mod needed
          sum(rwMatrix(ar.stop,ar.row,ar.weight,sqrt(w)*z,trans=FALSE)^2) 
       
        ## form X'WX efficiently...
511
        qrx$R <- XWXd(G$Xd,w,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,npt,G$drop,ar.stop,ar.row,ar.weight)
512
        ## form X'Wz efficiently...
513
        qrx$f <- XWyd(G$Xd,w,z,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop,ar.stop,ar.row,ar.weight)
514 515 516 517
        if(gc.level>1) gc()
     
        ## following reparameterizes X'X and f=X'y, according to initial reparameterizarion...
        qrx$XX <- Sl.initial.repara(Sl,qrx$R,inverse=FALSE,both.sides=TRUE,cov=FALSE,nt=npt)
518
        qrx$Xy <- Sl.initial.repara(Sl,qrx$f,inverse=FALSE,both.sides=TRUE,cov=FALSE,nt=npt)  
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
        
        G$n <- nobs
      } else {  ## end of if (iter==1||!additive)
        dev <- qrx$y.norm2 - sum(coef*qrx$f) ## actually penalized deviance
      }
  
      if (control$trace)
         message(gettextf("Deviance = %s Iterations - %d", dev, iter, domain = "R-mgcv"))

      if (!is.finite(dev)) stop("Non-finite deviance")

      ## preparation for working model fit is ready, but need to test for convergence first
      if (iter>2 && abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
          conv <- TRUE
          #coef <- start
          break
      }

      ## use fast REML code
      ## block diagonal penalty object, Sl, set up before loop

      if (iter==1) { ## need to get initial smoothing parameters 
        lambda.0 <- initial.sp(qrx$R,G$S,G$off,XX=TRUE) ## note that this uses the untrasformed X'X in qrx$R
        ## convert intial s.p.s to account for L 
        lsp0 <- log(lambda.0) ## initial s.p.
544 545
        if (!is.null(G$L)) lsp0 <- 
          if (ncol(G$L)>0) as.numeric(coef(lm(lsp0 ~ G$L-1+offset(G$lsp0)))) else rep(0,0)
546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602
        n.sp <- length(lsp0) 
      }
     
      ## carry forward scale estimate if possible...
      if (scale>0) log.phi <- log(scale) else {
        if (iter==1) {
            if (is.null(coef)||qrx$y.norm2==0) lsp0[n.sp+1] <- log(var(as.numeric(G$y))*.05) else
               lsp0[n.sp+1] <- log(qrx$y.norm2/(nobs+nobs.extra))
        }
      }

      ## get beta, grad and proposed Newton step... 
      repeat { ## Take a Newton step to update log sp and phi
        lsp <- lsp0 + Nstep
        if (scale<=0) log.phi <- lsp[n.sp+1] 
        prop <- Sl.fitChol(Sl,qrx$XX,qrx$Xy,rho=lsp[1:n.sp],yy=qrx$y.norm2,L=G$L,rho0=G$lsp0,log.phi=log.phi,
                 phi.fixed=scale>0,nobs=nobs,Mp=Mp,nt=npt,tol=dev*.Machine$double.eps^.7)
        if (max(Nstep)==0) { 
          Nstep <- prop$step;lsp0 <- lsp;
          break 
        } else {
          if (sum(prop$grad*Nstep)>dev*1e-7) Nstep <- Nstep/2 else {
            Nstep <- prop$step;lsp0 <- lsp;break;
          }
        }
      } ## end of sp update

      coef <- Sl.initial.repara(Sl,prop$beta,inverse=TRUE,both.sides=FALSE,cov=FALSE)

      if (any(!is.finite(coef))) {
          conv <- FALSE
          warning(gettextf("non-finite coefficients at iteration %d",
                  iter))
          break
      }
    } ## end fitting iteration

    if (!conv)
       warning("algorithm did not converge")
   
    eps <- 10 * .Machine$double.eps
    if (family$family == "binomial") {
         if (any(mu > 1 - eps) || any(mu < eps))
                warning("fitted probabilities numerically 0 or 1 occurred")
    }
    if (family$family == "poisson") {
            if (any(mu < eps))
                warning("fitted rates numerically 0 occurred")
    }
  Mp <- G$nsdf
  if (length(G$smooth)>1) for (i in 1:length(G$smooth)) Mp <- Mp + G$smooth[[i]]$null.space.dim
  scale <- exp(log.phi)
  reml <- (dev/scale - prop$ldetS + prop$ldetXXS + (length(y)-Mp)*log(2*pi*scale))/2
  if (rho!=0) { ## correct REML score for AR1 transform
    df <- if (is.null(mf$"(AR.start)")) 1 else sum(mf$"(AR.start)")
    reml <- reml - (nobs-df)*log(ld)
  }
603 604 605 606

  for (i in 1:ncol(prop$db)) prop$db[,i] <- ## d beta / d rho matrix
        Sl.initial.repara(Sl,as.numeric(prop$db[,i]),inverse=TRUE,both.sides=TRUE,cov=TRUE,nt=npt) 

607 608 609 610 611 612
  object <- list(db.drho=prop$db,
                 gcv.ubre=reml,mgcv.conv=conv,rank=prop$r,
                 scale.estimated = scale<=0,outer.info=NULL,
                 optimizer=c("perf","chol"))
  object$coefficients <- coef
  ## form linear predictor efficiently...
613
  object$linear.predictors <- Xbd(G$Xd,coef,G$kd,G$ks,G$ts,G$dt,G$v,G$qc,G$drop) + G$offset
614 615 616 617 618 619 620 621 622
  PP <- Sl.initial.repara(Sl,prop$PP,inverse=TRUE,both.sides=TRUE,cov=TRUE,nt=npt)
  F <- pmmult(PP,qrx$R,FALSE,FALSE,nt=npt)  ##crossprod(PP,qrx$R) - qrx$R contains X'WX in this case
  object$edf <- diag(F)
  object$edf1 <- 2*object$edf - rowSums(t(F)*F) 
  object$sp <- exp(lsp[1:n.sp]) 
  object$sig2 <- object$scale <- scale
  object$Vp <- PP * scale
  object$Ve <- pmmult(F,object$Vp,FALSE,FALSE,nt=npt) ## F%*%object$Vp
  ## sp uncertainty correction... 
623
  if (!is.null(G$L)) prop$db <- prop$db%*%G$L
624
  M <- ncol(prop$db) 
625 626 627 628 629 630 631
  if (M>0) {
    ev <- eigen(prop$hess,symmetric=TRUE)
    ind <- ev$values <= 0
    ev$values[ind] <- 0;ev$values[!ind] <- 1/sqrt(ev$values[!ind])
    rV <- (ev$values*t(ev$vectors))[,1:M]
    Vc <- pcrossprod(rV%*%t(prop$db),nt=npt)
  } else Vc <- 0
632 633 634 635
  Vc <- object$Vp + Vc  ## Bayesian cov matrix with sp uncertainty
  object$edf2 <- rowSums(Vc*qrx$R)/scale
  object$Vc <- Vc
  object$outer.info <- list(grad = prop$grad,hess=prop$hess)  
636
  object$AR1.rho <- rho
637 638 639 640 641 642
  object$R <- pchol(qrx$R,npt)
  piv <- attr(object$R,"pivot") 
  object$R[,piv] <- object$R   
  object$iter <- iter 
  object$wt <- w
  object$y <- G$y
643
  object$prior.weights <- G$w
644 645 646 647 648
  rm(G);if (gc.level>0) gc()
  object
} ## end bgam.fitd


649

650 651
bgam.fit <- function (G, mf, chunk.size, gp ,scale ,gamma,method, coef=NULL,etastart = NULL,
    mustart = NULL, offset = rep(0, nobs), control = gam.control(), intercept = TRUE, 
652
    cl = NULL,gc.level=0,use.chol=FALSE,nobs.extra=0,samfrac=1,npt=1)
653 654 655 656
{   y <- mf[[gp$response]]
    weights <- G$w
    conv <- FALSE
    nobs <- nrow(mf)
657
    ##nvars <- ncol(G$X)
658 659 660 661 662
    offset <- G$offset
    family <- G$family
    G$family <- gaussian() ## needed if REML/ML used
    variance <- family$variance
    dev.resids <- family$dev.resids
663
    ## aic <- family$aic
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
    linkinv <- family$linkinv
    mu.eta <- family$mu.eta
    if (!is.function(variance) || !is.function(linkinv))
        stop("'family' argument seems not to be a valid family object")
    valideta <- family$valideta
    if (is.null(valideta))
        valideta <- function(eta) TRUE
    validmu <- family$validmu
    if (is.null(validmu))
        validmu <- function(mu) TRUE
    if (is.null(mustart)) {
        eval(family$initialize)
    }
    else {
        mukeep <- mustart
        eval(family$initialize)
        mustart <- mukeep
    }
 
683
    ##coefold <- NULL
684 685 686 687 688 689 690 691
    eta <- if (!is.null(etastart))
         etastart
    else family$linkfun(mustart)
    
    mu <- linkinv(eta)
    if (!(validmu(mu) && valideta(eta)))
       stop("cannot find valid starting values: please specify some")
    dev <- sum(dev.resids(y, mu, weights))*2 ## just to avoid converging at iter 1
692 693
    ##boundary <- 
    conv <- FALSE
694
   
695
    G$coefficients <- rep(0,ncol(G$X))
696
    class(G) <- "gam"  
697 698 699 700 701 702
    
    ## need to reset response and weights to post initialization values
    ## in particular to deal with binomial properly...
    G$y <- y
    G$w <- weights

703
    ## set up cluster for parallel computation...
704 705 706

    if (!is.null(cl)&&inherits(cl,"cluster")) {
      n.threads <- length(cl)
707 708 709 710
      while(nobs/n.threads < ncol(G$X)) n.threads <- n.threads - 1
      if (n.threads < length(cl)) { 
        warning("Too many cluster nodes to use all efficiently")
      }
711
    } else n.threads <- 1
712

713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739
    if (n.threads>1) { ## set up thread argument lists
      ## number of obs per thread
      nt <- rep(ceiling(nobs/n.threads),n.threads)
      nt[n.threads] <- nobs - sum(nt[-n.threads])
      arg <- list()
      n1 <- 0
      for (i in 1:n.threads) {
        n0 <- n1+1;n1 <- n1+nt[i]
        ind <- n0:n1 ## this threads data block from mf
        n.block <- nt[i]%/%chunk.size ## number of full sized blocks
        stub <- nt[i]%%chunk.size ## size of end block
        if (n.block>0) {
          start <- (0:(n.block-1))*chunk.size+1
          stop <- (1:n.block)*chunk.size
          if (stub>0) {
            start[n.block+1] <- stop[n.block]+1
            stop[n.block+1] <- nt[i]
            n.block <- n.block+1
          } 
        } else {
          n.block <- 1
          start <- 1
          stop <- nt[i]
        }
        arg[[i]] <- list(nobs= nt[i],start=start,stop=stop,n.block=n.block,
                         linkinv=linkinv,dev.resids=dev.resids,gc.level=gc.level,
                         mu.eta=mu.eta,variance=variance,mf = mf[ind,],
740
                         eta = eta[ind],offset = offset[ind],G = G,use.chol=use.chol)
741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
        arg[[i]]$G$w <- G$w[ind];arg[[i]]$G$model <- NULL
        arg[[i]]$G$y <- G$y[ind]
      }
    } else { ## single thread, requires single indices
      ## construct indices for splitting up model matrix construction... 
      n.block <- nobs%/%chunk.size ## number of full sized blocks
      stub <- nobs%%chunk.size ## size of end block
      if (n.block>0) {
        start <- (0:(n.block-1))*chunk.size+1
        stop <- (1:n.block)*chunk.size
        if (stub>0) {
          start[n.block+1] <- stop[n.block]+1
          stop[n.block+1] <- nobs
          n.block <- n.block+1
        } 
      } else {
        n.block <- 1
        start <- 1
        stop <- nobs
      }
   } ## single thread indices complete
 
763
    conv <- FALSE
764 765 766

    if (method=="fREML") Sl <- Sl.setup(G) ## setup block diagonal penalty object
   
767 768 769 770
    for (iter in 1L:control$maxit) { ## main fitting loop
       ## accumulate the QR decomposition of the weighted model matrix
       wt <- rep(0,0) 
       devold <- dev
771 772 773 774
       dev <- 0
       if (n.threads == 1) { ## use original serial update code     
         for (b in 1:n.block) {
           ind <- start[b]:stop[b]
775
           X <- predict(G,newdata=mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
776
           rownames(X) <- NULL
777
           if (is.null(coef)) eta1 <- eta[ind] else eta1 <- drop(X%*%coef) + offset[ind]
778 779 780 781 782 783 784 785 786 787
           mu <- linkinv(eta1) 
           y <- G$y[ind] ## G$model[[gp$response]] ## - G$offset[ind]
           weights <- G$w[ind]
           mu.eta.val <- mu.eta(eta1)
           good <- (weights > 0) & (mu.eta.val != 0)
           z <- (eta1 - offset[ind])[good] + (y - mu)[good]/mu.eta.val[good]
           w <- (weights[good] * mu.eta.val[good]^2)/variance(mu)[good]
           dev <- dev + sum(dev.resids(y,mu,weights))
           wt <- c(wt,w)
           w <- sqrt(w)
788 789 790
           ## note that QR may be parallel using npt>1, even under serial accumulation...
           if (b == 1) qrx <- qr.update(w*X[good,],w*z,use.chol=use.chol,nt=npt) 
           else qrx <- qr.update(w*X[good,],w*z,qrx$R,qrx$f,qrx$y.norm2,use.chol=use.chol,nt=npt)
791 792
           rm(X);if(gc.level>1) gc() ## X can be large: remove and reclaim
        }
793 794
        if (use.chol) { ## post proc to get R and f...
          y.norm2 <- qrx$y.norm2 
795
          qrx <- chol2qr(qrx$R,qrx$f,nt=npt)
796 797
          qrx$y.norm2 <- y.norm2
        }
798
      } else { ## use parallel accumulation 
799
         for (i in 1:length(arg)) arg[[i]]$coef <- coef
800
         res <- parallel::parLapply(cl,arg,qr.up) 
801 802 803 804 805 806
         ## single thread debugging version 
         #res <- list()
         #for (i in 1:length(arg)) {
         #  res[[i]] <- qr.up(arg[[i]])
         #}
        ## now consolidate the results from the parallel threads...
807 808 809 810 811 812 813 814
        if (use.chol) {
          R <- res[[1]]$R;f <- res[[1]]$f;dev <- res[[1]]$dev
          wt <- res[[1]]$wt;y.norm2 <- res[[1]]$y.norm2
          for (i in 2:n.threads) {
            R <- R + res[[i]]$R; f <- f + res[[i]]$f
            wt <- c(wt,res[[i]]$wt); dev <- dev + res[[i]]$dev
            y.norm2 <- y.norm2 + res[[i]]$y.norm2
          }         
815
          qrx <- chol2qr(R,f,nt=npt)
816 817 818 819 820 821 822 823 824
          qrx$y.norm2 <- y.norm2
        } else { ## proper QR
          R <- res[[1]]$R;f <- res[[1]]$f;dev <- res[[1]]$dev
          wt <- res[[1]]$wt;y.norm2 <- res[[1]]$y.norm2
          for (i in 2:n.threads) {
            R <- rbind(R,res[[i]]$R); f <- c(f,res[[i]]$f)
            wt <- c(wt,res[[i]]$wt); dev <- dev + res[[i]]$dev
            y.norm2 <- y.norm2 + res[[i]]$y.norm2
          }         
825 826
          ## use parallel QR here if npt>1...
          qrx <- if (npt>1) pqr2(R,npt) else qr(R,tol=0,LAPACK=TRUE) 
827 828 829 830
          f <- qr.qty(qrx,f)[1:ncol(R)]
          rp <- qrx$pivot;rp[rp] <- 1:ncol(R) # reverse pivot
          qrx <- list(R=qr.R(qrx)[,rp],f=f,y.norm2=y.norm2)
        }
831 832
      } 

833 834 835 836 837 838 839
      ## if the routine has been called with only a random sample of the data, then 
      ## R, f and ||y||^2 can be corrected to estimate the full versions...
 
      qrx$R <- qrx$R/sqrt(samfrac)
      qrx$f <- qrx$f/sqrt(samfrac)
      qrx$y.norm2 <- qrx$y.norm2/samfrac

840
      G$n <- nobs
841
      
842 843 844
      rss.extra <- qrx$y.norm2 - sum(qrx$f^2)
      
      if (control$trace)
845
         message(gettextf("Deviance = %s Iterations - %d", dev, iter, domain = "R-mgcv"))
846 847 848 849 850 851 852 853 854 855 856 857

      if (!is.finite(dev)) stop("Non-finite deviance")

      ## preparation for working model fit is ready, but need to test for convergence first
      if (iter>2 && abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
          conv <- TRUE
          coef <- start
          break
      }

      if (method=="GCV.Cp") {
         fit <- magic(qrx$f,qrx$R,G$sp,G$S,G$off,L=G$L,lsp0=G$lsp0,rank=G$rank,
858 859
                      H=G$H,C=matrix(0,0,ncol(qrx$R)),     ##C=G$C,
                      gamma=gamma,scale=scale,gcv=(scale<=0),
860
                      extra.rss=rss.extra,n.score=nobs+nobs.extra)
861 862
 
         post <- magic.post.proc(qrx$R,fit,qrx$f*0+1) 
863 864
      } else if (method=="fREML") { ## use fast REML code
        ## block diagonal penalty object, Sl, set up before loop
865
        um <- Sl.Xprep(Sl,qrx$R,nt=npt)
866 867 868 869 870
        lambda.0 <- initial.sp(qrx$R,G$S,G$off)
        lsp0 <- log(lambda.0) ## initial s.p.
        ## carry forward scale estimate if possible...
        if (scale>0) log.phi <- log(scale) else {
          if (iter>1) log.phi <- log(object$scale) else {
871
            if (is.null(coef)||qrx$y.norm2==0) log.phi <- log(var(as.numeric(G$y))*.05) else
872 873 874 875 876
               log.phi <- log(qrx$y.norm2/(nobs+nobs.extra))
          }
        }
        fit <- fast.REML.fit(um$Sl,um$X,qrx$f,rho=lsp0,L=G$L,rho.0=G$lsp0,
                             log.phi=log.phi,phi.fixed=scale>0,rss.extra=rss.extra,
877
                             nobs =nobs+nobs.extra,Mp=um$Mp,nt=npt)
878
        res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=FALSE,L=G$L,nt=npt)
879
        object <- list(coefficients=res$beta,db.drho=fit$d1b,
880 881 882 883
                       gcv.ubre=fit$reml,mgcv.conv=list(iter=fit$iter,
                       message=fit$conv),rank=ncol(um$X),
                       Ve=NULL,scale.estimated = scale<=0,outer.info=fit$outer.info,
                        optimizer=c("perf","newton"))
884
 
885 886 887 888
        if (scale<=0) { ## get sp's and scale estimate
          nsp <- length(fit$rho)
          object$sig2 <- object$scale <- exp(fit$rho[nsp])
          object$sp <- exp(fit$rho[-nsp]) 
889 890
          nsp <- length(fit$rho.full)
          object$full.sp <- exp(fit$rho.full[-nsp])
891 892 893
        } else { ## get sp's
          object$sig2 <- object$scale <- scale  
          object$sp <- exp(fit$rho)
894
          object$full.sp <- exp(fit$rho.full)
895 896 897
        }
        class(object)<-c("gam")               
      } else { ## method is one of "ML", "P-REML" etc...
898 899 900 901 902 903 904 905
        y <- G$y; w <- G$w; n <- G$n;offset <- G$offset
        G$y <- qrx$f
        G$w <- G$y*0+1
        G$X <- qrx$R
        G$n <- length(G$y)
        G$offset <- G$y*0
        G$dev.extra <- rss.extra
        G$pearson.extra <- rss.extra
906
        G$n.true <- nobs+nobs.extra
907
        object <- gam(G=G,method=method,gamma=gamma,scale=scale,control=gam.control(nthreads=npt))
908 909
        y -> G$y; w -> G$w; n -> G$n;offset -> G$offset
      }
910
     
911 912 913
      if (method=="GCV.Cp") { 
        object <- list()
        object$coefficients <- fit$b
914 915
        object$edf <- post$edf 
        object$edf1 <- post$edf1
916
        ##object$F <- post$F
917 918 919 920 921 922 923 924 925 926
        object$full.sp <- fit$sp.full
        object$gcv.ubre <- fit$score
        object$hat <- post$hat
        object$mgcv.conv <- fit$gcv.info 
        object$optimizer="magic"
        object$rank <- fit$gcv.info$rank
        object$Ve <- post$Ve
        object$Vp <- post$Vb
        object$sig2 <- object$scale <- fit$scale
        object$sp <- fit$sp
927
        names(object$sp) <- names(G$sp)
928 929 930 931 932 933 934
        class(object)<-c("gam")
      }

      coef <- object$coefficients
        
      if (any(!is.finite(coef))) {
          conv <- FALSE
935 936
          warning(gettextf("non-finite coefficients at iteration %d",
                  iter))
937 938
          break
      }
939 940 941
    } ## end fitting iteration

    if (method=="fREML") { ## do expensive cov matrix cal only at end
942
      res <- Sl.postproc(Sl,fit,um$undrop,qrx$R,cov=TRUE,scale=scale,L=G$L,nt=npt)
943
      object$edf <- res$edf
944
      object$edf1 <- res$edf1
945 946
      object$edf2 <- res$edf2
      ##object$F <- res$F
947
      object$hat <- res$hat
948 949
      object$Vp <- res$Vp
      object$Ve <- res$Ve
950
      object$Vc <- res$Vc
951
    }
952 953 954 955 956 957 958 959 960 961 962 963 964

    if (!conv)
       warning("algorithm did not converge")
   
    eps <- 10 * .Machine$double.eps
    if (family$family == "binomial") {
         if (any(mu > 1 - eps) || any(mu < eps))
                warning("fitted probabilities numerically 0 or 1 occurred")
    }
    if (family$family == "poisson") {
            if (any(mu < eps))
                warning("fitted rates numerically 0 occurred")
    }
965
  object$R <- qrx$R    
966
  object$iter <- iter 
967
  object$wt <- wt
968
  object$y <- G$y
969
  object$prior.weights <- G$w
970
  rm(G);if (gc.level>0) gc()
971 972 973 974
  object
} ## end bgam.fit


975

976

977
ar.qr.up <- function(arg) {
978
## function to perform QR updating with AR residuals, on one execution thread
979 980 981 982 983
  if (arg$rho!=0) { ## AR1 error model
     ld <- 1/sqrt(1 - arg$rho^2) ## leading diagonal of root inverse correlation
     sd <- -arg$rho * ld         ## sub diagonal
  } 
  yX.last <- NULL
984
  qrx <- list(R=NULL,f=array(0,0),y.norm2=0) ## initial empty qr object
985 986 987 988
  for (i in 1:arg$n.block) {
    ind <- arg$start[i]:arg$end[i] 
    if (arg$rho!=0) { ## have to find AR1 transform...
       N <- arg$end[i]-arg$start[i]+1
989
       ## note first row implied by this transform
990 991 992 993
       ## is always dropped, unless really at beginning of data.
       row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)])
       weight <- c(1,rep(c(sd,ld),N-1))
       stop <- c(1,1:(N-1)*2+1)
994 995 996 997 998 999 1000 1001
       if (!is.null(arg$mf$"(AR.start)")) { ## need to correct the start of new AR sections...
           ii <- which(arg$mf$"(AR.start)"[ind]==TRUE)
           if (length(ii)>0) {
             if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
             weight[ii*2-2] <- 0 ## zero sub diagonal
             weight[ii*2-1] <- 1 ## set leading diagonal to 1
           }
       }
1002
     } 
1003
     ## arg$G$model <- arg$mf[ind,]
1004
     w <- sqrt(arg$G$w[ind])
1005 1006
     X <- w*predict(arg$G,newdata=arg$mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
     y <- w*(arg$mf[ind,arg$response] - arg$offset[ind]) ## w*(arg$G$model[[arg$response]] - arg$offset[ind])
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
     if (arg$rho!=0) {
       ## Apply transform...
       if (arg$last&&arg$end[i]==arg$nobs) yX.last <- 
           c(y[nrow(X)],X[nrow(X),]) ## store final row, in case of update
       if (arg$first&&i==1) {
          X <- rwMatrix(stop,row,weight,X)
          y <- rwMatrix(stop,row,weight,y)
       } else {
          X <- rwMatrix(stop,row,weight,X)[-1,]
          y <- rwMatrix(stop,row,weight,y)[-1]
       } 
     } ## dealt with AR1      
1019
     qrx <- qr.update(X,y,qrx$R,qrx$f,qrx$y.norm2,use.chol=arg$use.chol)
1020 1021 1022 1023 1024
     rm(X);if (arg$gc.level>1) {gc()} ## X can be large: remove and reclaim
  } ## all blocks dealt with
  qrx$yX.last <- yX.last
  if (arg$gc.level>1) {rm(arg,w,y,ind);gc()}
  qrx
1025
} ## ar.qr.up
1026

1027 1028 1029 1030 1031 1032 1033 1034
pabapr <- function(arg) {
## function for parallel calling of predict.gam
## QUERY: ... handling?
  predict.gam(arg$object,newdata=arg$newdata,type=arg$type,se.fit=arg$se.fit,terms=arg$terms,
                        block.size=arg$block.size,newdata.guaranteed=arg$newdata.guaranteed,
                        na.action=arg$na.action)
}

1035
predict.bam <- function(object,newdata,type="link",se.fit=FALSE,terms=NULL,exclude=NULL,
1036
                        block.size=50000,newdata.guaranteed=FALSE,na.action=na.pass,
1037
                        cluster=NULL,discrete=TRUE,n.threads=1,...) {
1038
## function for prediction from a bam object, possibly in parallel
1039 1040
  
  #if (is.function(na.action)) na.action <- deparse(substitute(na.action)) ## otherwise predict.gam can't detect type
1041 1042 1043 1044
  if (discrete && !is.null(object$dinfo)) {
    return(predict.bamd(object,newdata,type,se.fit,terms,exclude,
                        block.size,newdata.guaranteed,na.action,n.threads,...))
  }
1045
  ## remove some un-needed stuff from object
1046 1047 1048 1049
  object$Sl <- object$qrx <- object$R <- object$F <- object$Ve <-
  object$Vc <- object$G <- object$residuals <- object$fitted.values <-
  object$linear.predictors <- NULL
  gc()
1050
  if (!is.null(cluster)&&inherits(cluster,"cluster")) { 
1051
     ## require(parallel)
1052 1053
     n.threads <- length(cluster)
  } else n.threads <- 1
1054 1055 1056
  if (missing(newdata)) n <- nrow(object$model) else {
    n <- if (is.matrix(newdata[[1]])) nrow(newdata[[1]]) else length(newdata[[1]]) 
  }
1057 1058 1059
  if (n < 100*n.threads) n.threads <- 1 ## not worth the overheads
  if (n.threads==1) { ## single threaded call
    if (missing(newdata)) return(
1060
      predict.gam(object,newdata=object$model,type=type,se.fit=se.fit,terms=terms,exclude=exclude,
1061 1062 1063
                        block.size=block.size,newdata.guaranteed=newdata.guaranteed,
                        na.action=na.action,...)
    ) else return(
1064
      predict.gam(object,newdata=newdata,type=type,se.fit=se.fit,terms=terms,exclude=exclude,
1065 1066 1067 1068 1069 1070 1071 1072 1073 1074
                        block.size=block.size,newdata.guaranteed=newdata.guaranteed,
                        na.action=na.action,...))
  } else { ## parallel call...
    nt <- rep(floor(n/n.threads),n.threads)
    nt[1] <- n - sum(nt[-1])
    arg <- list()
    n1 <- 0
    for (i in 1:n.threads) { 
      n0 <- n1+1;n1 <- n1+nt[i]
      ind <- n0:n1 ## this thread's data block from mf
1075
      arg[[i]] <- list(object=object,type=type,se.fit=se.fit,terms=terms,exclude=exclude,
1076 1077 1078 1079 1080 1081 1082 1083 1084
                        block.size=block.size,newdata.guaranteed=newdata.guaranteed,
                        na.action=na.action)
      arg[[i]]$object$model <- object$model[1:2,] ## save space
      if (missing(newdata)) {
        arg[[i]]$newdata <- object$model[ind,]
      } else {
        arg[[i]]$newdata <- newdata[ind,]
      }
    } ## finished setting up arguments
1085 1086 1087 1088
    ## newdata and object no longer needed - all info in thread lists...
    if (!missing(newdata)) rm(newdata)
    rm(object)
    gc()
1089
    res <- parallel::parLapply(cluster,arg,pabapr) ## perform parallel prediction
1090
    gc()
1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121
    ## and splice results back together...
    if (type=="lpmatrix") {
      X <- res[[1]]
      for (i in 2:length(res)) X <- rbind(X,res[[i]])
      return(X)
    } else if (se.fit==TRUE) {
      rt <- list(fit = res[[1]]$fit,se.fit = res[[1]]$se.fit)
      if (type=="terms") {
        for (i in 2:length(res)) { 
          rt$fit <- rbind(rt$fit,res[[i]]$fit)
          rt$se.fit <- rbind(rt$se.fit,res[[i]]$se.fit)
        }
      } else {
        for (i in 2:length(res)) { 
          rt$fit <- c(rt$fit,res[[i]]$fit)
          rt$se.fit <- c(rt$se.fit,res[[i]]$se.fit)
        }
      }
      return(rt)
    } else { ## no se's returned
      rt <- res[[1]]
       if (type=="terms") {
        for (i in 2:length(res)) rt <- rbind(rt,res[[i]])
      } else {
        for (i in 2:length(res)) rt <- c(rt,res[[i]])
      }
      return(rt)
    } 
  }
} ## end predict.bam 

1122

1123
bam.fit <- function(G,mf,chunk.size,gp,scale,gamma,method,rho=0,
1124
                    cl=NULL,gc.level=0,use.chol=FALSE,npt=1) 
1125
## function that does big additive model fit in strictly additive case
1126 1127 1128 1129 1130 1131 1132
{  ## first perform the QR decomposition, blockwise....
   n <- nrow(mf)
   if (rho!=0) { ## AR1 error model
     ld <- 1/sqrt(1-rho^2) ## leading diagonal of root inverse correlation
     sd <- -rho*ld         ## sub diagonal
   }

1133 1134 1135
   if (n>chunk.size) { ## then use QR accumulation approach
     if (!is.null(cl)&&inherits(cl,"cluster")) { 
       n.threads <- length(cl)
1136 1137 1138 1139
       while(n/n.threads < ncol(G$X)) n.threads <- n.threads - 1
       if (n.threads < length(cl)) { 
         warning("Too many cluster nodes to use all efficiently")
       }
1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179
     } else n.threads <- 1

     G$coefficients <- rep(0,ncol(G$X))
     class(G) <- "gam"

     if (n.threads>1) { ## set up thread argument lists
       ## number of obs per thread
       nt <- rep(ceiling(n/n.threads),n.threads)
       nt[n.threads] <- n - sum(nt[-n.threads])
       arg <- list()
       n1 <- 0
       for (i in 1:n.threads) { 
         n0 <- n1+1;n1 <- n1+nt[i]
         if (i>1&&rho!=0) { ## need to start from end of last block if rho!=0
           n0 <- n0-1;nt[i] <- nt[i]+1 
         }   
         ind <- n0:n1 ## this thread's data block from mf
         n.block <- nt[i]%/%chunk.size ## number of full sized blocks
         stub <- nt[i]%%chunk.size ## size of end block
         if (n.block>0) { 
           ## each block is of size 
           start <- (0:(n.block-1))*chunk.size+1
           end <- start + chunk.size - 1
           if (stub>0) {
             start[n.block+1] <- end[n.block]+1
             end[n.block+1] <- nt[i]
             n.block <- n.block+1
           } 
           if (rho!=0) { ## then blocks must overlap
             ns <- length(start)
             if (ns>1) start[2:ns] <- start[2:ns]-1 
           }
         } else {
           n.block <- 1
           start <- 1
           end <- nt[i]
         }
         arg[[i]] <- list(nobs= nt[i],start=start,end=end,n.block=n.block,
                         rho=rho,mf = mf[ind,],gc.level=gc.level,
                         offset = G$offset[ind],G = G,response=gp$response,
1180
                         first=FALSE,last=FALSE,use.chol=use.chol)
1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196
         if (i==1) arg[[1]]$first <- TRUE
         if (i==n.threads) arg[[i]]$last <- TRUE 
         arg[[i]]$G$w <- G$w[ind];arg[[i]]$G$model <- NULL
       }
     } else { ## single thread, requires single indices 
       n.block <- n%/%chunk.size ## number of full sized blocks
       stub <- n%%chunk.size ## size of end block
       if (stub>0) n.block <- n.block + 1
       start <- 0:(n.block-1)*chunk.size    ## block starts
       end <- start + chunk.size;           ## block ends
       end[n.block] <- n
       if (rho==0) start <- start + 1  ## otherwise most blocks go to 1 before block start
       start[1] <- 1  
     } 
    
     if (n.threads==1) { ## use original single thread method...
1197
       qrx <- list(R=NULL,f=array(0,0),y.norm2=0) ## initial empty qr object
1198 1199 1200 1201 1202 1203 1204
       for (i in 1:n.block) {
         ind <- start[i]:end[i] 
         if (rho!=0) {
           N <- end[i]-start[i]+1

           row <- c(1,rep(1:N,rep(2,N))[-c(1,2*N)])
           weight <- c(1,rep(c(sd,ld),N-1))
1205 1206 1207 1208 1209 1210 1211 1212 1213
           stop <- c(1,1:(N-1)*2+1) 
           if (!is.null(mf$"(AR.start)")) { ## need to correct the start of new AR sections...
             ii <- which(mf$"(AR.start)"[ind]==TRUE)
             if (length(ii)>0) {
               if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
               weight[ii*2-2] <- 0 ## zero sub diagonal
               weight[ii*2-1] <- 1 ## set leading diagonal to 1
             }
           }
1214
         } 
1215
         #G$model <- mf[ind,]
1216
         w <- sqrt(G$w[ind])
1217 1218
         X <- w*predict(G,newdata=mf[ind,],type="lpmatrix",newdata.guaranteed=TRUE,block.size=length(ind))
         y <- w*(mf[ind,gp$response]-G$offset[ind])  ## w*(G$model[[gp$response]] - G$offset[ind])
1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230
         if (rho!=0) {
           ## Apply transform...
           if (end[i]==n) yX.last <- c(y[nrow(X)],X[nrow(X),]) ## store final row, in case of update
           if (i==1) {
             X <- rwMatrix(stop,row,weight,X)
             y <- rwMatrix(stop,row,weight,y)
           } else {
             X <- rwMatrix(stop,row,weight,X)[-1,]
             y <- rwMatrix(stop,row,weight,y)[-1]
           } 
         }      

1231
         qrx <- qr.update(X,y,qrx$R,qrx$f,qrx$y.norm2,use.chol=use.chol,nt=npt)
1232 1233 1234
         rm(X)
         if (gc.level>1) {gc()} ## X can be large: remove and reclaim
       } ## end of single thread block loop
1235 1236
       if (use.chol) { ## post proc to get R and f...
          y.norm2 <- qrx$y.norm2 
1237
          qrx <- chol2qr(qrx$R,qrx$f,nt=npt)
1238 1239
          qrx$y.norm2 <- y.norm2
        }
1240 1241
     } else { ## use parallel accumulation
     
1242
       res <- parallel::parLapply(cl,arg,ar.qr.up)
1243 1244 1245 1246 1247 1248 1249
       ## Single thread de-bugging...
       # res <- list()
       # for (i in 1:length(arg)) {
       #   res[[i]] <- ar.qr.up(arg[[i]])
       # }

       ## now consolidate the results from the parallel threads...
1250
       R <- res[[1]]$R;f <- res[[1]]$f; ## dev <- res[[1]]$dev
1251 1252
       y.norm2 <- res[[1]]$y.norm2
       for (i in 2:n.threads) {
1253 1254 1255 1256 1257
         if (use.chol) {
           R <- R + res[[i]]$R; f <- f + res[[i]]$f
         } else {
           R <- rbind(R,res[[i]]$R); f <- c(f,res[[i]]$f)
         }
1258
         y.norm2 <- y.norm2 + res[[i]]$y.norm2
1259 1260
       } 
       if (use.chol) {
1261
         qrx <- chol2qr(R,f,nt=npt)
1262
         qrx$y.norm2 <- y.norm2
1263 1264 1265
       } else { ## proper QR         
         ## use parallel QR if npt>1...
         qrx <- if (npt>1) pqr2(R,npt) else qr(R,tol=0,LAPACK=TRUE) 
1266 1267 1268 1269
         f <- qr.qty(qrx,f)[1:ncol(R)]
         rp <- qrx$pivot;rp[rp] <- 1:ncol(R) # reverse pivot
         qrx <- list(R=qr.R(qrx)[,rp],f=f,y.norm2=y.norm2)
       }
1270 1271 1272 1273
       yX.last <- res[[n.threads]]$yX.last
     } 
     G$n <- n
     G$y <- mf[[gp$response]]
1274
   
1275
   } else { ## n <= chunk.size
1276
     if (rho==0) qrx <- qr.update(sqrt(G$w)*G$X,sqrt(G$w)*(G$y-G$offset),use.chol=use.chol,nt=npt) else {
1277 1278 1279
       row <- c(1,rep(1:n,rep(2,n))[-c(1,2*n)])
       weight <- c(1,rep(c(sd,ld),n-1))
       stop <- c(1,1:(n-1)*2+1)
1280 1281 1282 1283 1284 1285 1286 1287
       if (!is.null(mf$"(AR.start)")) { ## need to correct the start of new AR sections...
         ii <- which(mf$"(AR.start)"==TRUE)
         if (length(ii)>0) {
           if (ii[1]==1) ii <- ii[-1] ## first observation does not need any correction
           weight[ii*2-2] <- 0 ## zero sub diagonal
           weight[ii*2-1] <- 1 ## set leading diagonal to 1
         }
       }
1288 1289 1290
       yX.last <- c(G$y[n],G$X[n,])  ## store final row, in case of update
       X <- rwMatrix(stop,row,weight,sqrt(G$w)*G$X)
       y <- rwMatrix(stop,row,weight,sqrt(G$w)*G$y)
1291
       qrx <- qr.update(X,