Commit 29e933ea authored by Andreas Tille's avatar Andreas Tille

New upstream version 0.2.3

parent 3b983164
......@@ -3,7 +3,7 @@ Type: Package
Title: Diffs for R Objects
Description: Generate a colorized diff of two R objects for an intuitive
visualization of their differences.
Version: 0.2.2
Version: 0.2.3
Authors@R: c(
person(
"Brodie", "Gaslam", email="brodie.gaslam@yahoo.com",
......@@ -17,7 +17,7 @@ License: GPL (>= 2)
LazyData: true
URL: https://github.com/brodieG/diffobj
BugReports: https://github.com/brodieG/diffobj/issues
RoxygenNote: 6.1.0.9000
RoxygenNote: 6.1.1
VignetteBuilder: knitr
Encoding: UTF-8
Suggests: knitr, rmarkdown, testthat
......@@ -28,10 +28,10 @@ Collate: 'capt.R' 'options.R' 'pager.R' 'check.R' 'finalizer.R'
'tochar.R' 'trim.R' 'word.R'
Imports: crayon (>= 1.3.2), tools, methods, utils, stats
NeedsCompilation: yes
Packaged: 2019-03-15 01:48:48 UTC; bg
Packaged: 2019-05-13 01:50:00 UTC; bg
Author: Brodie Gaslam [aut, cre],
Michael B. Allen [ctb, cph] (Original C implementation of Myers Diff
Algorithm)
Maintainer: Brodie Gaslam <brodie.gaslam@yahoo.com>
Repository: CRAN
Date/Publication: 2019-03-15 19:03:48 UTC
Date/Publication: 2019-05-13 18:50:03 UTC
This diff is collapsed.
# Generated by roxygen2: do not edit by hand
S3method(as.character,diffobj_ogewlhgiadfl2)
S3method(print,diffobj_ogewlhgiadfl)
export(AlignThreshold)
export(Pager)
......
# diffobj
## v0.2.3
This is a bugfix release.
* [#136](https://github.com/brodieG/diffobj/issues/136): Documentation for
`ignore.white.space` (h/t @flying-sheep) and `max.diffs` parameters listed
incorrect defaults.
* [#135](https://github.com/brodieG/diffobj/issues/135): Incorrect handling of
potential meta data strings when unwrapping atomics would cause a "wrong sign
in by argument" error (h/t @flying-sheep). We also fixed other bugs related
to the handling of meta data in atomic vectors that were uncovered while
debugging this issue.
* [#134](https://github.com/brodieG/diffobj/issues/134): Forwarding `...` to
`diff*` functions no longer breaks substitution of arguments for diff banners
(h/t @noamross)..
* [#133](https://github.com/brodieG/diffobj/issues/133): `diffFile` considers
files with equal content but different locations to be `all.equal` now (h/t
@noamross).
* [#132](https://github.com/brodieG/diffobj/issues/132): Duplicate pager slot
for baseline `Pager` removed (h/t Bill Dunlap).
There are also several other small internal changes that in theory should not
affect user facing behavior.
## v0.2.2
* Set `RNGversion()` due to changes to sampling mechanism.
......
......@@ -368,7 +368,7 @@ capt_file <- function(target, current, etc, err, extra) {
if(isTRUE(etc@trim)) etc@trim <- trimFile
diff.out <- line_diff(
target, current, html_ent_sub(tar.capt, etc@style),
tar.capt, cur.capt, html_ent_sub(tar.capt, etc@style),
html_ent_sub(cur.capt, etc@style), etc=etc
)
diff.out@capt.mode <- "file"
......
......@@ -439,7 +439,8 @@ line_diff <- function(
# - raw: the original captured text line by line, with strip_hz applied
# - trim: as above, but with row meta data removed
# - trim.ind: the indices used to re-insert `trim` into `raw`
# - comp: the strings that will have the line diffs run on
# - comp: the strings that will have the line diffs run on, these can be
# modified to force a particular outcome, e.g. by word_to_line_map
# - eq: the portion of `trim` that is equal post word-diff
# - fin: the final character string for display to user
# - word.ind: for use by `regmatches<-` to re-insert colored words
......@@ -462,59 +463,53 @@ line_diff <- function(
tok.rat=rep(1, length(cur.capt.p))
)
# Word diffs in wrapped form is atomic; note this will potentially change
# the length of the vectors
# the length of the vectors.
tar.wrap.diff <- integer(0L)
cur.wrap.diff <- integer(0L)
tar.dat.w <- tar.dat
cur.dat.w <- cur.dat
if(
is.atomic(target) && is.atomic(current) &&
is.null(dim(target)) && is.null(dim(current)) &&
length(tar.rh <- which_atomic_cont(tar.capt.p, target)) &&
length(cur.rh <- which_atomic_cont(cur.capt.p, current)) &&
is.null(names(target)) && is.null(names(current)) &&
etc@unwrap.atomic && etc@word.diff
) {
# For historical compatibility we allow `diffChr` to get into this step if
# the text format is right, even though it is arguable whether it should be
# allowed or not.
if(!all(diff(tar.rh) == 1L) || !all(diff(cur.rh)) == 1L){
# nocov start
stop("Logic Error, row headers must be sequential; contact maintainer.")
# nocov end
}
# Only do this for the portion of the data that actually matches up with
# the atomic row headers (NOTE: need to check what happens with named
# vectors without row headers)
tar.dat.sub <- lapply(tar.dat, "[", tar.rh)
cur.dat.sub <- lapply(cur.dat, "[", cur.rh)
# the atomic row headers.
diff.word <- diff_word2(
tar.dat.sub, cur.dat.sub, tar.ind=tar.rh, cur.ind=cur.rh,
tar.dat, cur.dat, tar.ind=tar.rh, cur.ind=cur.rh,
diff.mode="wrap", warn=warn, etc=etc
)
warn <- !diff.word$hit.diffs.max
dat.up <- function(orig, new, ind) {
if(!length(ind)) {
orig
} else {
start <- orig[seq_along(orig) < min(ind)]
end <- orig[seq_along(orig) > max(ind)]
c(start, new, end)
}
}
tar.dat <-
Map(dat.up, tar.dat, diff.word$tar.dat, MoreArgs=list(ind=tar.rh))
cur.dat <-
Map(dat.up, cur.dat, diff.word$cur.dat, MoreArgs=list(ind=cur.rh))
tar.dat.w <- diff.word$tar.dat
cur.dat.w <- diff.word$cur.dat
# Mark the lines that were wrapped diffed; necessary b/c tar/cur.rh are
# defined even if other conditions to get in this loop are not, and also
# because the addition of the fill lines moves everything around
# (effectively tar/cur.wrap.diff are the fill-offset versions of tar/cur.rh)
tar.wrap.diff <- seq_along(tar.dat$fill)[!tar.dat$fill][tar.rh]
cur.wrap.diff <- seq_along(cur.dat$fill)[!cur.dat$fill][cur.rh]
tar.wrap.diff <- seq_along(tar.dat.w$fill)[!tar.dat.w$fill][tar.rh]
cur.wrap.diff <- seq_along(cur.dat.w$fill)[!cur.dat.w$fill][cur.rh]
}
# Actual line diff
diffs <- char_diff(
tar.dat$comp, cur.dat$comp, etc=etc, diff.mode="line", warn=warn
tar.dat.w$comp, cur.dat.w$comp, etc=etc, diff.mode="line", warn=warn
)
warn <- !diffs$hit.diffs.max
......@@ -524,9 +519,12 @@ line_diff <- function(
# word.diffs list; bad part here is that we keep overwriting the overall
# diff data for each hunk, which might be slow
tar.dat.ww <- tar.dat.w
cur.dat.ww <- cur.dat.w
if(etc@word.diff) {
# Word diffs on hunks, excluding all values that have already been wrap
# diffed as in tar.rh and cur.rh
# diffed as in tar.rh and cur.rh / tar.wrap.diff and cur.wrap.diff
for(h.a in hunks.flat) {
if(h.a$context) next
......@@ -534,12 +532,12 @@ line_diff <- function(
h.a.tar.ind <- setdiff(h.a.ind[h.a.ind > 0], tar.wrap.diff)
h.a.cur.ind <- setdiff(abs(h.a.ind[h.a.ind < 0]), cur.wrap.diff)
h.a.w.d <- diff_word2(
tar.dat, cur.dat, h.a.tar.ind, h.a.cur.ind, diff.mode="hunk", warn=warn,
etc=etc
tar.dat.ww, cur.dat.ww, h.a.tar.ind, h.a.cur.ind, diff.mode="hunk",
warn=warn, etc=etc
)
tar.dat <- h.a.w.d$tar.dat
cur.dat <- h.a.w.d$cur.dat
warn <- !h.a.w.d$hit.diffs.max
tar.dat.ww <- h.a.w.d[['tar.dat']]
cur.dat.ww <- h.a.w.d[['cur.dat']]
warn <- warn || !h.a.w.d[['hit.diffs.max']]
}
# Compute the token ratios
......@@ -550,23 +548,23 @@ line_diff <- function(
else max(0, (wc - length(y)) / wc),
numeric(1L)
)
tar.dat$tok.rat <- tok_ratio_compute(tar.dat$word.ind)
cur.dat$tok.rat <- tok_ratio_compute(cur.dat$word.ind)
tar.dat.ww$tok.rat <- tok_ratio_compute(tar.dat.ww$word.ind)
cur.dat.ww$tok.rat <- tok_ratio_compute(cur.dat.ww$word.ind)
# Deal with mixed UTF/plain strings
tar.dat$word.ind <- fix_word_ind(tar.dat$word.ind)
cur.dat$word.ind <- fix_word_ind(cur.dat$word.ind)
tar.dat.ww$word.ind <- fix_word_ind(tar.dat.ww$word.ind)
cur.dat.ww$word.ind <- fix_word_ind(cur.dat.ww$word.ind)
# Remove different words to make equal strings
tar.dat$eq <- with(tar.dat, `regmatches<-`(trim, word.ind, value=""))
cur.dat$eq <- with(cur.dat, `regmatches<-`(trim, word.ind, value=""))
tar.dat.ww$eq <- with(tar.dat.ww, `regmatches<-`(trim, word.ind, value=""))
cur.dat.ww$eq <- with(cur.dat.ww, `regmatches<-`(trim, word.ind, value=""))
}
# Instantiate result
hunk.grps.raw <- group_hunks(
hunks.flat, etc=etc, tar.capt=tar.dat$raw, cur.capt=cur.dat$raw
hunks.flat, etc=etc, tar.capt=tar.dat.ww$raw, cur.capt=cur.dat.ww$raw
)
gutter.dat <- etc@gutter
max.w <- etc@text.width
......@@ -580,7 +578,8 @@ line_diff <- function(
}
# Trim hunks to the extent needed to make sure we fit in lines
hunk.grps <- trim_hunks(hunk.grps.raw, etc.group, tar.dat$raw, cur.dat$raw)
hunk.grps <-
trim_hunks(hunk.grps.raw, etc.group, tar.dat.ww$raw, cur.dat.ww$raw)
hunks.flat <- unlist(hunk.grps, recursive=FALSE)
# Compact to width of widest element, so retrieve all char values; also
......@@ -595,7 +594,7 @@ line_diff <- function(
# be faster to use tar.dat and cur.dat directly
chr.ind <- unlist(lapply(hunks.flat, "[", c("A", "B")))
chr.dat <- get_dat_raw(chr.ind, tar.dat$raw, cur.dat$raw)
chr.dat <- get_dat_raw(chr.ind, tar.dat.ww$raw, cur.dat.ww$raw)
chr.size <- integer(length(chr.dat))
ranges <- vapply(
......@@ -616,8 +615,8 @@ line_diff <- function(
hunks.flat, function(h.a) {
with(
h.a, c(
rng_non_fill(tar.rng.sub, which(tar.dat$fill)),
rng_non_fill(cur.rng.sub, which(cur.dat$fill))
rng_non_fill(tar.rng.sub, which(tar.dat.ww$fill)),
rng_non_fill(cur.rng.sub, which(cur.dat.ww$fill))
) )
},
integer(4L)
......@@ -627,7 +626,7 @@ line_diff <- function(
# that adjusted ranges are not necessarily contiguous
hunk.heads <-
lapply(hunk.grps, make_hh, etc@mode, tar.dat, cur.dat, ranges.orig)
lapply(hunk.grps, make_hh, etc@mode, tar.dat.ww, cur.dat.ww, ranges.orig)
h.h.chars <- nchar2(
chr_trim(
unlist(hunk.heads), etc@line.width, sgr.supported=etc@sgr.supported
......@@ -647,7 +646,7 @@ line_diff <- function(
new(
"Diff", diffs=hunk.grps, target=target, current=current,
hit.diffs.max=!warn, tar.dat=tar.dat, cur.dat=cur.dat, etc=etc,
hit.diffs.max=!warn, tar.dat=tar.dat.ww, cur.dat=cur.dat.ww, etc=etc,
hunk.heads=hunk.heads, trim.dat=attr(hunk.grps, 'meta')
)
}
......@@ -68,8 +68,10 @@ make_diff_fun <- function(capt_fun) {
extra=list()
) {
# nocov end
frame # force frame so that `par_frame` called in this context
frame # force frame so that `par_frame` called in this context
call.dat <- extract_call(sys.calls(), frame)
target # force target/current so if one missing we get an error here
current # and not later
# Check args and evaluate all the auto-selection arguments
......@@ -138,15 +140,14 @@ make_diff_fun <- function(capt_fun) {
#' Diff \code{print}ed Objects
#'
#' Runs the diff between the \code{print} or \code{show} output produced by
#' \code{target} and \code{current}.
#' \code{target} and \code{current}. Given the extensive parameter list, this
#' documentation page is intended as a reference for all the \code{diff*}
#' methods. For a high level introduction see \code{vignette("diffobj")}.
#'
#' @description This documentation page is intended as a reference document
#' for all the \code{diff*} methods. For a high level introduction see
#' \code{vignette("diffobj")} and the examples. Almost all aspects of how the
#' diffs are computed and displayed are controllable through the \code{diff*}
#' methods parameters. This results in a lengthy parameter list, but in
#' practice you should rarely need to adjust anything past the
#' \code{color.mode} parameter. Default values are specified
#' Almost all aspects of how the diffs are computed and displayed are
#' controllable through the \code{diff*} methods parameters. This results in a
#' lengthy parameter list, but in practice you should rarely need to adjust
#' anything past the \code{color.mode} parameter. Default values are specified
#' as options so that users may configure diffs in a persistent manner.
#' \code{\link{gdo}} is a shorthand function to access \code{diffobj} options.
#'
......@@ -162,6 +163,19 @@ make_diff_fun <- function(capt_fun) {
#' Note that while the generics include \code{...} as an argument, none of the
#' methods do.
#'
#' @section Matrices and Data Frames:
#'
#' While \code{diffPrint} attempts to handle the default R behavior that wraps
#' wide tables, the results are often sub-optimal. A better approach is to set
#' the \code{disp.width} parameter to a large enough value such that wrapping is
#' not necessary, and a browser-based \code{pager}. In the future we will add
#' the capability to specify different capture widths and wrap widths so that
#' this is an option for terminal output (see
#' \href{https://github.com/brodieG/diffobj/issues/109}{issue 109}).
#'
#' One thing to keep in mind is that \code{diffPrint} is not designed to work
#' with very large data frames.
#'
#' @export
#' @seealso \code{\link{diffObj}}, \code{\link{diffStr}},
#' \code{\link{diffChr}} to compare character vectors directly,
......@@ -279,7 +293,7 @@ make_diff_fun <- function(capt_fun) {
#' in a different color to indicate they are not part of the hunk. If a
#' function, the function should accept as the first argument the object
#' being diffed, and the second the character representation of the object.
#' The function should return the indices of the elements of the second
#' The function should return the indices of the elements of the
#' character representation that should be treated as guides. See
#' \code{\link{guides}} for more details.
#' @param trim TRUE (default), FALSE, or a function that accepts at least two
......@@ -294,49 +308,58 @@ make_diff_fun <- function(capt_fun) {
#' and carries out the diff on the object instead of the original argument.
#' Currently there is no mechanism for specifying additional arguments to
#' \code{readRDS}
#' @param unwrap.atomic TRUE (default) or FALSE. Only relevant for
#' @param unwrap.atomic TRUE (default) or FALSE. Relevant primarily for
#' \code{diffPrint}, if TRUE, and \code{word.diff} is also TRUE, and both
#' \code{target} and \code{current} are \emph{unnamed} and atomic, the vectors
#' are unwrapped and diffed element by element, and then re-wrapped. Since
#' \code{diffPrint} is fundamentally a line diff, the re-wrapped lines are
#' lined up in a manner that is as consistent as possible with the unwrapped
#' diff. Lines that contain the location of the word differences will be
#' paired up. Since the vectors may well be wrapped with different
#' periodicities this will result in lines that are paired up that look like
#' they should not be paired up, though the locations of the differences
#' should be. If is entirely possible that setting this parameter to FALSE
#' will result in a slower diff. This happens if two vectors are actually
#' fairly similar, but their line representations are not. For example, in
#' comparing \code{1:100} to \code{c(100, 1:99)}, there is really only one
#' difference at the \dQuote{word} level, but every screen line is different.
#' \code{target} and \code{current} are \emph{unnamed} one-dimension atomics ,
#' the vectors are unwrapped and diffed element by element, and then
#' re-wrapped. Since \code{diffPrint} is fundamentally a line diff, the
#' re-wrapped lines are lined up in a manner that is as consistent as possible
#' with the unwrapped diff. Lines that contain the location of the word
#' differences will be paired up. Since the vectors may well be wrapped with
#' different periodicities this will result in lines that are paired up that
#' look like they should not be paired up, though the locations of the
#' differences should be. If is entirely possible that setting this parameter
#' to FALSE will result in a slower diff. This happens if two vectors are
#' actually fairly similar, but their line representations are not. For
#' example, in comparing \code{1:100} to \code{c(100, 1:99)}, there is really
#' only one difference at the \dQuote{word} level, but every screen line is
#' different. \code{diffChr} will also do the unwrapping if it is given a
#' character vector that contains output that looks like the atomic vectors
#' described above. This is a bug, but as the functionality could be useful
#' when diffing e.g. \code{capture.output} data, we now declare it a feature.
#' @param line.limit integer(2L) or integer(1L), if length 1 how many lines of
#' output to show, where \code{-1} means no limit. If length 2, the first
#' value indicates the threshold of screen lines to begin truncating output,
#' and the second the number of lines to truncate to, which should be fewer
#' than the threshold. Note that this parameter is implemented on a
#' best-efforts basis and should not be relied on to produce the exact
#' number of lines requested. If you want a specific number of lines use
#' \code{[} or \code{head} / \code{tail}. One advantage of \code{line.limit}
#' over these other options is that you can combine it with
#' \code{context="auto"} and auto \code{max.level} selection (the latter for
#' \code{diffStr}), which allows the diff to dynamically adjust to make best
#' use of the available display lines. \code{[}, \code{head}, and \code{tail}
#' just subset the text of the output.
#' number of lines requested. In particular do not expect it to work well for
#' for values small enough that the banner portion of the diff would have to
#' be trimmed. If you want a specific number of lines use \code{[} or
#' \code{head} / \code{tail}. One advantage of \code{line.limit} over these
#' other options is that you can combine it with \code{context="auto"} and
#' auto \code{max.level} selection (the latter for \code{diffStr}), which
#' allows the diff to dynamically adjust to make best use of the available
#' display lines. \code{[}, \code{head}, and \code{tail} just subset the text
#' of the output.
#' @param hunk.limit integer(2L) or integer (1L), how many diff hunks to show.
#' Behaves similarly to \code{line.limit}. How many hunks are in a
#' particular diff is a function of how many differences, and also how much
#' \code{context} is used since context can cause two hunks to bleed into
#' each other and become one.
#' @param max.diffs integer(1L), number of \emph{differences} after which we
#' abandon the \code{O(n^2)} diff algorithm in favor of a linear one. Set to
#' \code{-1L} to always stick to the original algorithm (defaults to 10000L).
#' abandon the \code{O(n^2)} diff algorithm in favor of a naive element by
#' element comparison. Set to \code{-1L} to always stick to the original
#' algorithm (defaults to 50000L).
#' @param disp.width integer(1L) number of display columns to take up; note that
#' in \dQuote{sidebyside} \code{mode} the effective display width is half this
#' number (set to 0L to use default widths which are \code{getOption("width")}
#' for normal styles and \code{80L} for HTML styles.
#' for normal styles and \code{80L} for HTML styles. Future versions of
#' \code{diffobj} may change this to larger values for two dimensional objects
#' for better diffs (see details).
#' @param ignore.white.space TRUE or FALSE, whether to consider differences in
#' horizontal whitespace (i.e. spaces and tabs) as differences (defaults to
#' FALSE).
#' TRUE).
#' @param convert.hz.white.space TRUE or FALSE, whether modify input strings
#' that contain tabs and carriage returns in such a way that they display as
#' they would \bold{with} those characters, but without using those
......
......@@ -42,7 +42,7 @@ rle_sub <- function(rle, ind) {
c.factor <- function(..., recursive=FALSE) {
dots <- list(...)
dots.n.n <- dots[!vapply(dots, is.null, logical(1L))]
if(!length(dots)) factor(0L) else {
if(!length(dots)) factor(character()) else {
if(
!all(vapply(dots.n.n, is, logical(1L), "factor")) ||
length(unique(lapply(dots.n.n, levels))) != 1L
......@@ -84,6 +84,9 @@ which_top <- function(s.c) {
f.rle <- rle(funs)
val.calls <- f.rle$lengths == 2
# default if failed to find a value is last call on stack
res <- length(s.c)
if(any(val.calls) && fun.ref.loc) {
# return first index of last pairs of identical calls in the call stack
# that is followed by a correct .internal call, and also that are not
......@@ -94,15 +97,11 @@ which_top <- function(s.c) {
rle.followed <- which(
rle.elig.max < max(fun.ref.loc) & !grepl("eval\\(", funs[rle.elig.max])
)
if(!length(rle.followed)) { # can't find correct one
length(s.c)
} else {
rle.elig[[max(rle.followed)]][1L]
if(length(rle.followed)) { # can't find correct one
res <- rle.elig[[max(rle.followed)]][1L]
}
} else {
# failed to find a value, so just return last call on stack
length(s.c)
}
res
}
get_fun <- function(name, env) {
get.fun <- if(is.name(name) || (is.character(name) && length(name) == 1L)) {
......@@ -130,13 +129,32 @@ extract_call <- function(s.c, par.env) {
get.fun <- get_fun(found.call[[1L]], env=par.env)
res <- no.match
if(is.function(get.fun)) {
found.call.m <- try(match.call(definition=get.fun, call=found.call))
found.call.m <- try(
# this creates an environment where `...` is available so we don't
# get a "... used in a situation it does not exist error" (issue 134)
(function(...) {
match.call(definition=get.fun, call=found.call, envir=environment())
})()
)
if(!inherits(found.call.m, "try-error")) {
if(length(found.call.m) < 3L) length(found.call.m) <- 3L
if(length(found.call.m) < 3L) {
found.call.ml <- as.list(found.call.m)
length(found.call.ml) <- 3L
# found.call.ml[[3L]] <- quote(list(x=))[[2L]]
found.call.m <- as.call(found.call.ml)
}
res <-
list(call=found.call.m, tar=found.call.m[[2L]], cur=found.call.m[[3L]])
}
}
} else {
# nocov start
# not sure if it's possible to get here, seems like not, maybe we can
# get rid of try, but don't want to risk breaking stuff that used to work
warning(
"Failed trying to recover tar/cur expressions for display, see ",
"previous errors."
)
# nocov end
} }
res
}
#' Get Parent Frame of S4 Call Stack
......@@ -164,10 +182,10 @@ extract_call <- function(s.c, par.env) {
par_frame <- function() {
s.c <- head(sys.calls(), -1L)
top <- which_top(s.c)
par <- sys.parents()[top]
par <- head(sys.parents(), -1L)[top]
if(par) {
sys.frames()[[par]]
} else .GlobalEnv
head(sys.frames(), -1L)[[par]]
} else .GlobalEnv # can't figure out how to cause this branch
}
# check whether running in knitr
......@@ -358,8 +376,12 @@ nchar2 <- function(x, ..., sgr.supported) {
if(sgr.supported) crayon::col_nchar(x, ...)
else nchar(x, ...)
}
# This is an internal method for testing
# These are internal methods for testing
#' @export
print.diffobj_ogewlhgiadfl <- function(x, ...) stop('failure')
#' @export
as.character.diffobj_ogewlhgiadfl2 <- function(x, ...) stop('failure2')
......@@ -29,7 +29,6 @@ myers_simple_int <- function(A, B) {
N <- length(A)
M <- length(B)
MAX <- M + N + 1L
if(!MAX) return(matrix(integer(0L), ncol=2))
OFF <- MAX + 1L # offset to adjust to R indexing
Vl <- vector("list", MAX)
for(D in seq_len(MAX) - 1L) {
......@@ -153,7 +152,7 @@ diff_path_to_diff <- function(path, target, current) {
for(i in seq_along(chunks)) {
x <- chunks[[i]]
if((neg <- any(x < 0L, na.rm=TRUE)) && !all(x < 0L, na.rm=TRUE))
stop("Logic Error: match group error; contact maintainer")
stop("Internal Error: match group error; contact maintainer") # nocov
if(neg) {
# Matches, so equal length and set to zero
res.tar[[i]] <- res.cur[[i]] <- integer(nrow(x))
......
......@@ -236,7 +236,6 @@ setClass(
),
prototype=list(
pager=function(x) writeLines(readLines(x)), file.ext="", threshold=0L,
pager=function(x) stop("Pager object does not specify a paging function."),
ansi=FALSE, file.path=NA_character_, make.blocking=FALSE
),
validity=function(object) {
......
......@@ -113,7 +113,10 @@ Rdiff_run <- function(from, to, nullPointers, silent, minimal) {
"without a `diff` utility accessible to R"
)
)
if(!is.character(res)) stop("Unexpected tools::Rdiff output")
if(!is.character(res))
# nocov start
stop("Internal Error: Unexpected tools::Rdiff output, contact maintainer")
# nocov end
res <- if(minimal) res[!grepl("^[<>-]", res)] else res
if(silent) res else {
......
......@@ -216,8 +216,6 @@ setClass("Settings",
setMethod("initialize", "Settings", function(.Object, ...) {
if(is.numeric(.Object@disp.width))
.Object@disp.width <- as.integer(.Object@disp.width)
if(is.null(.Object@disp.width))
.Object@disp.width <- 80L
return(callNextMethod(.Object, ...))
} )
......@@ -236,67 +234,70 @@ setMethod("sideBySide", "Settings",
)
# Validate the *.dat slots of the Diff objects
#
# We stopped using this one because it was too expensive computationally.
# Saving the code just in case.
valid_dat <- function(x) {
char.cols <- c("orig", "raw", "trim", "eq", "comp", "fin")
list.cols <- c("word.ind")
zerotoone.cols <- "tok.rat"
integer.cols <- c("trim.ind.start", "trim.ind.end")
if(!is.list(x)) {
"should be a list"
} else if(!identical(names(x), .diff.dat.cols)) {
paste0("should have names ", dep(.diff.dat.cols))
} else if(
length(
unique(
vapply(
x[c(char.cols, list.cols, zerotoone.cols, integer.cols)],
length, integer(1L)
)
) ) != 1L
) {
"should have equal length components"
} else {
if(
length(
not.char <- which(!vapply(x[char.cols], is.character, logical(1L)))
)
){
sprintf("element `%s` should be character", char.cols[not.char][[1L]])
} else if (
length(
not.int <- which(!vapply(x[integer.cols], is.integer, logical(1L)))
)
) {
sprintf("element `%s` should be integer", integer.cols[not.int][[1L]])
} else if (
length(
not.list <- which(!vapply(x[list.cols], is.list, logical(1L)))
)
) {
sprintf("element `%s` should be list", list.cols[not.list][[1L]])
} else if (
!all(
vapply(
x$word.ind,
function(y)
is.integer(y) && is.integer(attr(y, "match.length")) &&
length(y) == length(attr(y, "match.length")),
logical(1L)
) )
) {
"element `word.ind` is not in expected format"
} else if (
!is.numeric(x$tok.rat) || anyNA(x$tok.rat) || !all(x$tok.rat %bw% c(0, 1))
) {
"element `tok.rat` should be numeric with all values between 0 and 1"
} else if (!is.logical(x$fill) || anyNA(x$fill)) {
"element `fill` should be logical and not contain NAs"
}
else TRUE
}
}
# valid_dat <- function(x) {
# char.cols <- c("orig", "raw", "trim", "eq", "comp", "fin")
# list.cols <- c("word.ind")
# zerotoone.cols <- "tok.rat"
# integer.cols <- c("trim.ind.start", "trim.ind.end")
#
# if(!is.list(x)) {
# "should be a list"
# } else if(!identical(names(x), .diff.dat.cols)) {
# paste0("should have names ", dep(.diff.dat.cols))
# } else if(
# length(
# unique(
# vapply(
# x[c(char.cols, list.cols, zerotoone.cols, integer.cols)],
# length, integer(1L)
# )
# ) ) != 1L
# ) {
# "should have equal length components"
# } else {
# if(
# length(
# not.char <- which(!vapply(x[char.cols], is.character, logical(1L)))
# )
# ){
# sprintf("element `%s` should be character", char.cols[not.char][[1L]])
# } else if (
# length(
# not.int <- which(!vapply(x[integer.cols], is.integer, logical(1L)))
# )
# ) {
# sprintf("element `%s` should be integer", integer.cols[not.int][[1L]])
# } else if (
# length(
# not.list <- which(!vapply(x[list.cols], is.list, logical(1L)))
# )
# ) {
# sprintf("element `%s` should be list", list.cols[not.list][[1L]])
# } else if (
# !all(
# vapply(
# x$word.ind,
# function(y)
# is.integer(y) && is.integer(attr(y, "match.length")) &&
# length(y) == length(attr(y, "match.length")),
# logical(1L)
# ) )
# ) {
# "element `word.ind` is not in expected format"
# } else if (
# !is.numeric(x$tok.rat) || anyNA(x$tok.rat) || !all(x$tok.rat %bw% c(0, 1))
# ) {
# "element `tok.rat` should be numeric with all values between 0 and 1"
# } else if (!is.logical(x$fill) || anyNA(x$fill)) {
# "element `fill` should be logical and not contain NAs"
# }
# else TRUE
# }
# }
#' Diff Result Object
#'
#' Return value for the \code{\link[=diffPrint]{diff*}} methods. Has
......
......@@ -61,6 +61,15 @@ align_split <- function(v, m) {
}
# Align lists based on equalities on other vectors
#
# This is used for hunks that are word diffed. Once the word differences are
# accounted for, the remaining strings (A.eq/B.eq) are compared to try to align
# them with a naive algorithm on a line basis. This works best when lines as a
# whole are equal except for a few differences. There can be funny situations
# where matched words are on one line in e.g. A, but spread over multiple lines
# in B. This isn't really handled well currently.
#
# See issue #37.
#
# The A/B vecs will be split up into matchd elements, and non-matched elements.
# Each matching element will be surrounding by (possibly empty) non-matching
# elements.
......
......@@ -354,17 +354,18 @@ setMethod("as.character", "Diff",
banner.A <- s@funs@word.delete(ban.A.trim)
banner.B <- s@funs@word.insert(ban.B.trim)