sort.R 10.3 KB
Newer Older
1
#  File src/library/base/R/sort.R
2
#  Part of the R package, https://www.R-project.org
3
#
4
#  Copyright (C) 1995-2018 The R Core Team
5
#
6 7 8 9 10 11 12 13 14 15 16
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at
17
#  https://www.R-project.org/Licenses/
18

19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
.doSortWrap <- local({
    ## this matches the enum in Rinternals.h
    INCR_NA_1ST <-  2
    INCR        <-  1
    DECR        <- -1
    DECR_NA_1ST <- -2
    UNSORTED    <-  0
    UNKNOWN     <-  NA_integer_

    .makeSortEnum <- function(decr, na.last) {
        if(decr) {
            if (is.na(na.last) || na.last) DECR
            else DECR_NA_1ST
        } else {
            if (is.na(na.last) || na.last) INCR
            else INCR_NA_1ST
        }
    }

    function(vec, decr, nalast, noNA = NA) {
        if (length(vec) > 0 && is.numeric(vec)) {
            sorted <- .makeSortEnum(decr, nalast)
            if (is.na(noNA)) {
                if(is.na(nalast)) ## NAs were removed
                    noNA <- TRUE
                else if(nalast) ## NAs are last
                    noNA <- !is.na(vec[length(vec)])
                else ## NAs are first
                    noNA <- !is.na(vec[1L])
            }
            .Internal(wrap_meta(vec, sorted, noNA))
        }
        else vec
    }
})
## temporary, for sort.int and sort.list captured as S4 default methods
.doWrap <- .doSortWrap

57 58
sort <- function(x, decreasing = FALSE, ...)
{
59
    if(!is.logical(decreasing) || length(decreasing) != 1L)
60 61 62 63 64 65 66
        stop("'decreasing' must be a length-1 logical vector.\nDid you intend to set 'partial'?")
    UseMethod("sort")
}

sort.default <- function(x, decreasing = FALSE, na.last = NA, ...)
{
    ## The first case includes factors.
67 68 69 70 71 72 73 74 75

    ## no wrapping/altrep fastpass here because sortedness may not correspond
    ## to what other code assumes. ie for factors the vector itself is
    ## not guaranteed to be sorted in numeric order, since it goes by level
    ## values
    if(is.object(x))
        x[order(x, na.last = na.last, decreasing = decreasing)]
    else
        sort.int(x, na.last = na.last, decreasing = decreasing, ...)
76 77 78 79
}

sort.int <-
    function(x, partial = NULL, na.last = NA, decreasing = FALSE,
80 81
             method = c("auto", "shell", "quick", "radix"),
             index.return = FALSE)
82
{
83 84 85 86 87 88 89
    ## fastpass
    decreasing <- as.logical(decreasing)
    if (is.null(partial) && !index.return && is.numeric(x)) {
        if (.Internal(sorted_fpass(x, decreasing, na.last))) {
            ## strip attributes other than 'names'
            attr <- attributes(x)
            if (! is.null(attr) && ! identical(names(attr), "names"))
90
                attributes(x) <- list(names = names(x))
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
            return(x)
        }
    }
    method <- match.arg(method)
    if (method == "auto" && is.null(partial) &&
        (is.numeric(x) || is.factor(x) || is.logical(x)) &&
        is.integer(length(x)))
        method <- "radix"
    if (method == "radix") {
        if (!is.null(partial)) {
            stop("'partial' sorting not supported by radix method")
        }
        if (index.return && is.na(na.last)) {
            x <- x[!is.na(x)]
            na.last <- TRUE
        }
        o <- order(x, na.last = na.last, decreasing = decreasing,
                   method = "radix")
        y <- x[o]

        y <- .doSortWrap(y, decreasing, na.last)
        return(if (index.return) list(x = y, ix = o) else y)
    }
    else if (method == "auto" || !is.numeric(x))
          method <- "shell" # explicitly prevent 'quick' for non-numeric data

117
    if(isfact <- is.factor(x)) {
118
        if(index.return) stop("'index.return' only for non-factors")
119 120
	lev <- levels(x)
	nlev <- nlevels(x)
121
 	isord <- is.ordered(x)
122
        x <- c(x) # drop attributes
123
    } else if(!is.atomic(x))
124
        stop("'x' must be atomic")
125

126 127 128
    if(has.na <- any(ina <- is.na(x))) {
        nas <- x[ina]
        x <-  x[!ina]
129
    }
130
    if(index.return && !is.na(na.last))
131
        stop("'index.return' only for 'na.last = NA'")
132
    if(!is.null(partial)) {
133
        if(index.return || decreasing || isfact || method != "shell")
134 135
	    stop("unsupported options for partial sorting")
        if(!all(is.finite(partial))) stop("non-finite 'partial'")
136
        y <- if(length(partial) <= 10L) {
137 138
            partial <- .Internal(qsort(partial, FALSE))
            .Internal(psort(x, partial))
139 140
        } else if (is.double(x)) .Internal(qsort(x, FALSE))
        else .Internal(sort(x, FALSE))
141
    } else {
142
        nms <- names(x)
143
	switch(method,
144 145
               "quick" = {
                   if(!is.null(nms)) {
146
                       if(decreasing) x <- -x
147
                       y <- .Internal(qsort(x, TRUE))
148
                       if(decreasing) y$x <- -y$x
149 150
                       names(y$x) <- nms[y$ix]
                       if (!index.return) y <- y$x
151 152
                   } else {
                       if(decreasing) x <- -x
153
                       y <- .Internal(qsort(x, index.return))
154 155 156
                       if(decreasing)
                           if(index.return) y$x <- -y$x else y <- -y
                   }
157 158 159 160 161 162 163 164 165
               },
               "shell" = {
                   if(index.return || !is.null(nms)) {
                       o <- sort.list(x, decreasing = decreasing)
                       y <- if (index.return) list(x = x[o], ix = o) else x[o]
                   }
                   else
                       y <- .Internal(sort(x, decreasing))
               })
166
    }
167 168 169
    if (!is.na(na.last) && has.na)
	y <- if (!na.last) c(nas, y) else c(y, nas)
    if (isfact)
170
        y <- (if (isord) ordered else factor)(y, levels = seq_len(nlev),
171 172 173 174
            labels = lev)
    if (is.null(partial)) {
        y <- .doSortWrap(y, decreasing, na.last)
    }
175 176 177
    y
}

178 179
order <- function(..., na.last = TRUE, decreasing = FALSE,
                  method = c("auto", "shell", "radix"))
180
{
181
    z <- list(...)
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213

    ## fastpass, take advantage of ALTREP metadata
    decreasing <- as.logical(decreasing)
    if (length(z) == 1L && is.numeric(z[[1L]]) && !is.object(z[[1]]) &&
       length(z[[1L]]) > 0) {
        x <- z[[1L]]
        if (.Internal(sorted_fpass(x, decreasing, na.last)))
            return(seq(along = x))
    }

    method <- match.arg(method)
    if(any(vapply(z, is.object, logical(1L)))) {
        z <- lapply(z, function(x) if(is.object(x)) as.vector(xtfrm(x)) else x)
        return(do.call("order", c(z, na.last = na.last, decreasing = decreasing,
                                  method = method)))
    }

    if (method == "auto") {
        useRadix <- all(vapply(z, function(x) {
            (is.numeric(x) || is.factor(x) || is.logical(x)) &&
                is.integer(length(x))
        }, logical(1L)))
        method <- if (useRadix) "radix" else "shell"
    }

    if(method != "radix" && !is.na(na.last)) {
        return(.Internal(order(na.last, decreasing, ...)))
    }

    if (method == "radix") {
        decreasing <- rep_len(as.logical(decreasing), length(z))
        return(.Internal(radixsort(na.last, decreasing, FALSE, TRUE, ...)))
214 215 216
    }

    ## na.last = NA case: remove nas
217
    if(any(diff((l.z <- lengths(z)) != 0L)))
218
        stop("argument lengths differ")
219 220
    na <- vapply(z, is.na, rep.int(NA, l.z[1L]))
    ok <- if(is.matrix(na)) rowSums(na) == 0L else !any(na)
221
    if(all(!ok)) return(integer())
222
    z[[1L]][!ok] <- NA
223
    ans <- do.call("order", c(z, decreasing = decreasing))
224
    ans[ok[ans]]
225 226
}

227
sort.list <- function(x, partial = NULL, na.last = TRUE, decreasing = FALSE,
228
                      method = c("auto", "shell", "quick", "radix"))
229
{
230 231 232 233 234 235 236 237
    ## fastpass, take advantage of ALTREP metadata
    decreasing <- as.logical(decreasing)
    if(is.null(partial) && is.numeric(x) && !is.object(x) &&
       length(x) > 0){
        if (.Internal(sorted_fpass(x, decreasing, na.last)))
            return(seq(along = x))
    }

238
    method <- match.arg(method)
239 240 241
    if (method == "auto" && (is.numeric(x) || is.factor(x) || is.logical(x)) &&
        is.integer(length(x)))
        method <- "radix"
242
    if(!is.atomic(x))
243
        stop("'x' must be atomic for 'sort.list'\nHave you called 'sort' on a list?")
244 245
    if(!is.null(partial))
        .NotYetUsed("partial != NULL")
246 247 248 249 250
    if(method == "quick") {
        if(is.factor(x)) x <- as.integer(x) # sort the internal codes
        if(is.numeric(x))
            return(sort(x, na.last = na.last, decreasing = decreasing,
                        method = "quick", index.return = TRUE)$ix)
251
        else stop("method = \"quick\" is only for numeric 'x'")
252
    }
253 254 255 256
    if (is.na(na.last)) {
        x <- x[!is.na(x)]
        na.last <- TRUE
    }
257
    if(method == "radix") {
258
        return(order(x, na.last=na.last, decreasing=decreasing, method="radix"))
259 260
    }
    ## method == "shell"
261
    .Internal(order(na.last, decreasing, x))
262
}
263

264 265 266

## xtfrm is now primitive
## xtfrm <- function(x) UseMethod("xtfrm")
267
xtfrm.default <- function(x)
268 269
    if(is.numeric(x)) unclass(x) else as.vector(rank(x, ties.method = "min",
                                                     na.last = "keep"))
270 271
xtfrm.factor <- function(x) as.integer(x) # primitive, so needs a wrapper
xtfrm.Surv <- function(x)
272
    order(if(ncol(x) == 2L) order(x[,1L], x[,2L]) else order(x[,1L], x[,2L], x[,3L])) # needed by 'party'
273 274
xtfrm.AsIs <- function(x)
{
275
    if(length(cl <- class(x)) > 1) oldClass(x) <- cl[-1L]
276 277
    NextMethod("xtfrm")
}
278

279
## callback from C code for rank/order
280 281 282 283 284 285
.gt <- function(x, i, j)
{
    xi <- x[i]; xj <- x[j]
    if (xi == xj) 0L else if(xi > xj) 1L else -1L;
}

286
## callback for C code for is.unsorted, hence negation.
287 288 289
.gtn <- function(x, strictly)
{
    n <- length(x)
290
    if(strictly) !all(x[-1L] > x[-n]) else !all(x[-1L] >= x[-n])
291
}
292 293 294 295 296 297 298 299 300 301 302 303 304

grouping <- function(...) {
    z <- list(...)
    if(any(vapply(z, is.object, logical(1L)))) {
        z <- lapply(z, function(x) if(is.object(x)) as.vector(xtfrm(x)) else x)
        return(do.call("grouping", z))
    }
    nalast <- TRUE
    decreasing <- rep_len(FALSE, length(z))
    group <- TRUE
    sortStr <- FALSE
    return(.Internal(radixsort(nalast, decreasing, group, sortStr, ...)))
}