Import Upstream version 1.9

parent 4f04f47a
jdupes 1.9
- stderr on Windows is no longer polluted or empty when redirected
- Added -1/--one-file-system to restrict recursion to the same filesystem
- Added a universal exclusion stack which is currently only used for -X
- Added -X/--exclude to use exclusion stack; supersedes -x/--xsize
- More robust BTRFS enablement behavior in Makefile
- Fixed Unicode display for hard linking on Windows
- Efficiency improvements to internal memory allocator (string_malloc)
- Documentation improvements and updates
- Provide "fdupes_oneline.sh" which emulates old "fdupes -1" feature
- Single file names passed as arguments are now accepted and processed
jdupes 1.8
- All files are now licensed under The MIT License exclusively
......
......@@ -19,31 +19,28 @@ make CFLAGS_EXTRA='-DYOUR_OPTION_ONE -DYOUR_OPTION_TWO'
This is a list of options that can be "turned on" this way:
ENABLE_BTRFS Enable '-B/--dedupe' for btrfs deduplication
DEBUG * Turn on algorithm statistic reporting with '-D'
OMIT_GETOPT_LONG Do not use getopt_long() C library call
ON_WINDOWS Modify code to compile with MinGW on Windows
USE_TREE_REBALANCE * Use experimental tree rebalancing code
CONSIDER_IMBALANCE * Change tree rebalance to analyze weights first
LOW_MEMORY Build for lower memory usage instead of speed
* These options may slow down the program somewhat and are off by
default. Do not enable them unless you are experimenting.
Certain options need to be turned on by setting a variable passed to
make instead of using CFLAGS_EXTRA, i.e. 'make DEBUG=1':
DEBUG Turn on algorithm statistic reporting with '-D'
LOUD '-@' for low-level debugging; enables DEBUG
ENABLE_BTRFS Enable '-B/--dedupe' for btrfs deduplication
LOW_MEMORY Build for lower memory usage instead of speed
The LOW_MEMORY option tweaks various knobs in the program to lower
total memory usage. It also disables some features to reduce the size
of certain data structures. The improvements in memory usage are not
very large, but if you're running in a very RAM-limited environment
or have a CPU with very small caches it may be the best choice.
You can turn on the -@ option for "loud" debugging with:
make LOUD=1
Non-loud debugging can be enabled with
make DEBUG=1
A test directory is included so that you may familiarize yourself with
the way jdupes operates. You may test the program before installing
it by issuing a command such as "./jdupes testdir" or
......
......@@ -51,7 +51,7 @@ MKDIR = mkdir -p
# Make Configuration
CC ?= gcc
COMPILER_OPTIONS = -Wall -Wextra -Wwrite-strings -Wcast-align -Wstrict-aliasing -pedantic -Wstrict-overflow -Wstrict-prototypes -Wpointer-arith -Wundef
COMPILER_OPTIONS = -Wall -Wextra -Wwrite-strings -Wcast-align -Wstrict-aliasing -Wstrict-overflow -Wstrict-prototypes -Wpointer-arith -Wundef
COMPILER_OPTIONS += -Wshadow -Wfloat-equal -Wstrict-overflow=5 -Waggregate-return -Wcast-qual -Wswitch-default -Wswitch-enum -Wconversion -Wunreachable-code -Wformat=2 -Winit-self
COMPILER_OPTIONS += -std=gnu99 -O2 -g -D_FILE_OFFSET_BITS=64 -fstrict-aliasing -pipe
......@@ -71,6 +71,13 @@ ifdef HARDEN
COMPILER_OPTIONS += -Wformat -Wformat-security -D_FORTIFY_SOURCE=2 -fstack-protector-strong -fPIE -fpie -Wl,-z,relro -Wl,-z,now
endif
# Catch someone trying to enable BTRFS in flags and turn on ENABLE_BTRFS
ifneq (,$(findstring DENABLE_BTRFS,$(CFLAGS)))
ENABLE_BTRFS=1
endif
ifneq (,$(findstring DENABLE_BTRFS,$(CFLAGS_EXTRA)))
ENABLE_BTRFS=1
endif
# MinGW needs this for printf() conversions to work
ifeq ($(OS), Windows_NT)
......@@ -79,7 +86,7 @@ ifndef NO_UNICODE
COMPILER_OPTIONS += -municode
endif
COMPILER_OPTIONS += -D__USE_MINGW_ANSI_STDIO=1
OBJECT_FILES += win_stat.o
OBJS += win_stat.o
override undefine ENABLE_BTRFS
override undefine HAVE_BTRFS_IOCTL_H
endif
......@@ -91,7 +98,9 @@ endif
# New BTRFS support option
ifdef ENABLE_BTRFS
COMPILER_OPTIONS += -DENABLE_BTRFS
OBJECT_FILES += act_dedupefiles.o
OBJS += act_dedupefiles.o
else
OBJS_CLEAN += act_dedupefiles.o
endif
# Low memory mode
ifdef LOW_MEMORY
......@@ -100,33 +109,33 @@ endif
CFLAGS += $(COMPILER_OPTIONS) $(CFLAGS_EXTRA)
INSTALL_PROGRAM = $(INSTALL) -c -m 0755
INSTALL_DATA = $(INSTALL) -c -m 0644
INSTALL_PROGRAM = $(INSTALL) -m 0755
INSTALL_DATA = $(INSTALL) -m 0644
# ADDITIONAL_OBJECTS - some platforms will need additional object files
# to support features not supplied by their vendor. Eg: GNU getopt()
#ADDITIONAL_OBJECTS += getopt.o
OBJECT_FILES += jdupes.o jody_hash.o jody_paths.o jody_sort.o jody_win_unicode.o string_malloc.o
OBJECT_FILES += jody_cacheinfo.o
OBJECT_FILES += act_deletefiles.o act_linkfiles.o act_printmatches.o act_summarize.o
OBJECT_FILES += $(ADDITIONAL_OBJECTS)
OBJS += jdupes.o jody_hash.o jody_paths.o jody_sort.o jody_win_unicode.o string_malloc.o
OBJS += jody_cacheinfo.o
OBJS += act_deletefiles.o act_linkfiles.o act_printmatches.o act_summarize.o
OBJS += $(ADDITIONAL_OBJECTS)
all: jdupes
jdupes: $(OBJECT_FILES)
$(CC) $(CFLAGS) $(LDFLAGS) -o $(PROGRAM_NAME) $(OBJECT_FILES)
jdupes: $(OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) -o $(PROGRAM_NAME) $(OBJS)
installdirs:
test -d $(DESTDIR)$(BIN_DIR) || $(MKDIR) $(DESTDIR)$(BIN_DIR)
test -d $(DESTDIR)$(MAN_DIR) || $(MKDIR) $(DESTDIR)$(MAN_DIR)
test -e $(DESTDIR)$(BIN_DIR) || $(MKDIR) $(DESTDIR)$(BIN_DIR)
test -e $(DESTDIR)$(MAN_DIR) || $(MKDIR) $(DESTDIR)$(MAN_DIR)
install: jdupes installdirs
$(INSTALL_PROGRAM) $(PROGRAM_NAME) $(DESTDIR)$(BIN_DIR)/$(PROGRAM_NAME)
$(INSTALL_DATA) $(PROGRAM_NAME).1 $(DESTDIR)$(MAN_DIR)/$(PROGRAM_NAME).$(MAN_EXT)
clean:
$(RM) $(OBJECT_FILES) $(PROGRAM_NAME) jdupes.exe *~ *.gcno *.gcda *.gcov
$(RM) $(OBJS) $(OBJS_CLEAN) $(PROGRAM_NAME) $(PROGRAM_NAME).exe *~ *.gcno *.gcda *.gcov
distclean: clean
$(RM) *.pkg.tar.xz
......
......@@ -69,46 +69,53 @@ Usage
--------------------------------------------------------------------------
Usage: jdupes [options] DIRECTORY...
-A --nohidden exclude hidden files from consideration
-B --dedupe Send matches to btrfs for block-level deduplication
-d --delete prompt user for files to preserve and delete all
others; important: under particular circumstances,
data may be lost when using this option together
with -s or --symlinks, or when specifying a
particular directory more than once; refer to the
documentation for additional information
-f --omitfirst omit the first file in each set of matches
-h --help display this help message
-H --hardlinks treat hard-linked files as duplicate files. Normally
hard links are treated as non-duplicates for safety
-i --reverse reverse (invert) the match sort order
-I --isolate files in the same specified directory won't match
-@ --loud output annoying low-level debug info while running
-1 --one-file-system do not match files on different filesystems/devices
-A --nohidden exclude hidden files from consideration
-B --dedupe Send matches to btrfs for block-level deduplication
-d --delete prompt user for files to preserve and delete all
others; important: under particular circumstances,
data may be lost when using this option together
with -s or --symlinks, or when specifying a
particular directory more than once; refer to the
documentation for additional information
-D --debug output debug statistics after completion
-f --omitfirst omit the first file in each set of matches
-h --help display this help message
-H --hardlinks treat any linked files as duplicate files. Normally
linked files are treated as non-duplicates for safety
-i --reverse reverse (invert) the match sort order
-I --isolate files in the same specified directory won't match
-l --linksoft make relative symlinks for duplicates w/o prompting
-L --linkhard hard link all duplicate files without prompting
Windows allows a maximum of 1023 hard links per file
-m --summarize summarize dupe information
-N --noprompt together with --delete, preserve the first file in
each set of duplicates and delete the rest without
prompting the user
-o --order=BY select sort order for output, linking and deleting; by
-O --paramorder Parameter order is more important than selected -O sort
mtime (BY=time) or filename (BY=name, the default)
-p --permissions don't consider files with different owner/group or
permission bits as duplicates
-r --recurse for every directory given follow subdirectories
encountered within
-R --recurse: for each directory given after this option follow
subdirectories encountered within (note the ':' at
the end of the option, manpage for more details)
-s --symlinks follow symlinks
-S --size show size of duplicate files
-q --quiet hide progress indicator
-v --version display jdupes version and license information
-x --xsize=SIZE exclude files of size < SIZE bytes from consideration
--xsize=+SIZE '+' specified before SIZE, exclude size > SIZE
K/M/G size suffixes can be used (case-insensitive)
-L --linkhard hard link all duplicate files without prompting
-m --summarize summarize dupe information
-N --noprompt together with --delete, preserve the first file in
each set of duplicates and delete the rest without
prompting the user
-o --order=BY select sort order for output, linking and deleting; by
-O --paramorder Parameter order is more important than selected -O sort
mtime (BY=time) or filename (BY=name, the default)
-p --permissions don't consider files with different owner/group or
permission bits as duplicates
-Q --quick skip byte-for-byte confirmation for quick matching
WARNING: -Q can result in data loss! Be very careful!
-r --recurse for every directory, process its subdirectories too
-R --recurse: for each directory given after this option follow
subdirectories encountered within (note the ':' at
the end of the option, manpage for more details)
-s --symlinks follow symlinks
-S --size show size of duplicate files
-q --quiet hide progress indicator
-v --version display jdupes version and license information
-x --xsize=SIZE exclude files of size < SIZE bytes from consideration
--xsize=+SIZE '+' specified before SIZE, exclude size > SIZE
-X --exclude=spec:info exclude files based on specified criteria
specs: dir size+-=
Exclusions are cumulative: -X dir:abc -X dir:efg
-z --zeromatch consider zero-length files to be duplicates
-Z --softabort If the user aborts (i.e. CTRL-C) act on matches so far
-Z --softabort If the user aborts (i.e. CTRL-C) act on matches so far
For sizes, K/M/G/T/P/E[B|iB] suffixes can be used (case-insensitive)
The -n/--noempty option was removed for safety. Matching zero-length files as
duplicates now requires explicit use of the -z/--zeromatch option instead.
......@@ -118,9 +125,7 @@ Separate line. The groups are then separated from each other by blank lines.
The -s/--symlinks option will treat symlinked files as regular files, but
direct symlinks will be treated as if they are hard linked files and the
-H/--hardlinks option will apply to them in the same manner. This option
used to follow symlinked directories but in the current implementation this
behavior has been disabled due to it being too dangerous.
-H/--hardlinks option will apply to them in the same manner.
When using -d or --delete, care should be taken to insure against accidental
data loss. While no information will be immediately lost, using this option
......@@ -152,7 +157,7 @@ each link candidate. These arrows are as follows:
-//-> File linking failed due to an error during the linking process
If your data set has linked files and you do not use -L to always consider
If your data set has linked files and you do not use -H to always consider
them as duplicates, you may still see linked files appear together in match
sets. This is caused by a separate file that matches with linked files
independently and is the correct behavior. See notes below on the "triangle
......@@ -241,23 +246,24 @@ This is also an exercise for the user.
jdupes tracks each directory traversed by dev:inode pair to avoid adding
the contents of the same directory twice. This prevents the user from
being able to register all of their files twice by duplicating an entry
on the command line. Symlinked directories are not followed. Files are
renamed to a temporary name before any linking is done and if the link
operation fails they are renamed back to the original name.
on the command line. Symlinked directories are only followed if they
weren't already followed earlier. Files are renamed to a temporary name
before any linking is done and if the link operation fails they are renamed
back to the original name.
"Collision Robustness"
jdupes uses jodyhash for file data hashing. This hash is extremely fast
with a low collision rate, but it still encounters collisions as any hash
function will ("secure" or otherwise) due to the "birthday problem." This
function will ("secure" or otherwise) due to the pigeonhole principle. This
is why jdupes performs a full-file verification before declaring a match.
It is slower than matching on hashes alone, but the birthday problem puts
It's slower than matching by hash only, but the pigeonhole principle puts
all data sets larger than the hash at risk of collision, meaning a false
duplicate detection and data loss. The slower completion time is not as
important as data integrity. Checking for a match based on hashes alone
is irresponsible, and using secure hashes like MD5 or the SHA families
is orders of magnitude slower than jodyhash while still suffering from
the risk brought about by the birthday problem. In short, the birthday
problem means that if you have 365 days in a year and 366 people, the
the risk brought about by the pigeonholing. An example of this problem is
as follows: if you have 365 days in a year and 366 people, the chance of
having at least two birthdays on the same day is guaranteed; likewise,
even though SHA512 is a 512-bit (64-byte) wide hash, there are guaranteed
to be at least 256 pairs of data streams that causes a collision once any
......
......@@ -11,9 +11,3 @@
- Add a way to store stat() info + inital and full hashes for
explicit loading in future runs to speed up repeated calls to
the program.
- The --xsize option can be improved. Instead of simply specifying an
exclusion size min/max, the option should offer multiple ways to
specify allowed file sizes. Examples:
- '--xsize=512-1024' only examine files between 512 and 1024 bytes
- '--xsize=1M-3M,+20M' examine files 1-3 MB and >20 MB in size
......@@ -6,6 +6,8 @@
#ifdef ENABLE_BTRFS
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
......@@ -142,7 +144,7 @@ extern void dedupefiles(file_t * restrict files)
files->d_name, dupe_filenames[cur_info], dedupeerrstr(status),
status, readonly_msg[readonly]);
} else {
fprintf(stderr, "warning: dedupe only did %jd bytes: %s => %s: %s [%d]%s\n",
fprintf(stderr, "warning: dedupe only did %" PRIdMAX " bytes: %s => %s: %s [%d]%s\n",
(intmax_t)same->info[cur_info].bytes_deduped, files->d_name,
dupe_filenames[cur_info], dedupeerrstr(status), status, readonly_msg[readonly]);
}
......
......@@ -3,6 +3,8 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
#include "jdupes.h"
#include "jody_win_unicode.h"
......@@ -21,6 +23,8 @@ extern void deletefiles(file_t *files, int prompt, FILE *tty)
unsigned int number, sum, max, x;
size_t i;
if (!files) return;
groups = get_max_dupes(files, &max, NULL);
max++;
......@@ -61,7 +65,7 @@ extern void deletefiles(file_t *files, int prompt, FILE *tty)
/* prompt for files to preserve */
printf("Set %u of %u: keep which files? (1 - %u, [a]ll, [n]one)",
curgroup, groups, counter);
if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%ju byte%c each)", (uintmax_t)files->size,
if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%" PRIuMAX " byte%c each)", (uintmax_t)files->size,
(files->size != 1) ? 's' : ' ');
printf(": ");
fflush(stdout);
......
......@@ -3,14 +3,16 @@
#include "jdupes.h"
#if !defined NO_HARDLINKS || !defined NO_SYMLINKS
/* Compile out the code if no linking support is built in */
#if !(defined NO_HARDLINKS && defined NO_SYMLINKS)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "act_linkfiles.h"
#include "jody_win_unicode.h"
#if defined _WIN32 || defined __CYGWIN__
#ifdef ON_WINDOWS
#include "win_stat.h"
#endif
......@@ -212,6 +214,7 @@ extern void linkfiles(file_t *files, const int hard)
/* Create the desired hard link with the original file's name */
errno = 0;
success = 0;
#ifdef ON_WINDOWS
#ifdef UNICODE
if (!M2W(srcfile->d_name, wname2)) {
......@@ -223,7 +226,6 @@ extern void linkfiles(file_t *files, const int hard)
if (CreateHardLink(dupelist[x]->d_name, srcfile->d_name, NULL) == TRUE) success = 1;
#endif
#else
success = 0;
if (hard) {
if (link(srcfile->d_name, dupelist[x]->d_name) == 0) success = 1;
#ifdef NO_SYMLINKS
......@@ -241,11 +243,14 @@ extern void linkfiles(file_t *files, const int hard)
#endif /* NO_SYMLINKS */
#endif /* ON_WINDOWS */
if (success) {
if (!ISFLAG(flags, F_HIDEPROGRESS)) printf("%s %s\n", (hard ? "---->" : "-@@->"), dupelist[x]->d_name);
if (!ISFLAG(flags, F_HIDEPROGRESS)) {
printf("%s ", hard ? "---->" : "-@@->");
fwprint(stdout, dupelist[x]->d_name, 1);
}
} else {
/* The link failed. Warn the user and put the link target back */
if (!ISFLAG(flags, F_HIDEPROGRESS)) {
printf("-//-> "); fwprint(stderr, dupelist[x]->d_name, 1);
printf("-//-> "); fwprint(stdout, dupelist[x]->d_name, 1);
}
fprintf(stderr, "warning: unable to link '"); fwprint(stderr, dupelist[x]->d_name, 0);
fprintf(stderr, "' -> '"); fwprint(stderr, srcfile->d_name, 0);
......
......@@ -2,6 +2,8 @@
* This file is part of jdupes; see jdupes.c for license information */
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include "jdupes.h"
#include "jody_win_unicode.h"
#include "act_printmatches.h"
......@@ -11,11 +13,13 @@ extern void printmatches(file_t * restrict files)
file_t * restrict tmpfile;
int printed = 0;
LOUD(fprintf(stderr, "act_printmatches: %p\n", files));
while (files != NULL) {
if (ISFLAG(files->flags, F_HAS_DUPES)) {
printed = 1;
if (!ISFLAG(flags, F_OMITFIRST)) {
if (ISFLAG(flags, F_SHOWSIZE)) printf("%jd byte%c each:\n", (intmax_t)files->size,
if (ISFLAG(flags, F_SHOWSIZE)) printf("%" PRIdMAX " byte%c each:\n", (intmax_t)files->size,
(files->size != 1) ? 's' : ' ');
fwprint(stdout, files->d_name, 1);
}
......@@ -24,14 +28,14 @@ extern void printmatches(file_t * restrict files)
fwprint(stdout, tmpfile->d_name, 1);
tmpfile = tmpfile->duplicates;
}
if (files->next != NULL) printf("\n");
if (files->next != NULL) fwprint(stdout, "", 1);
}
files = files->next;
}
if (printed == 0) fprintf(stderr, "No duplicates found.\n");
if (printed == 0) fwprint(stderr, "No duplicates found.", 1);
return;
}
......@@ -2,6 +2,8 @@
* This file is part of jdupes; see jdupes.c for license information */
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include "jdupes.h"
#include "act_summarize.h"
......@@ -31,9 +33,9 @@ extern void summarizematches(const file_t * restrict files)
else
{
printf("%d duplicate files (in %d sets), occupying ", numfiles, numsets);
if (numbytes < 1000) printf("%jd byte%c\n", (intmax_t)numbytes, (numbytes != 1) ? 's' : ' ');
else if (numbytes <= 1000000) printf("%jd KB\n", (intmax_t)(numbytes / 1000));
else printf("%jd MB\n", (intmax_t)(numbytes / 1000000));
if (numbytes < 1000) printf("%" PRIdMAX " byte%c\n", (intmax_t)numbytes, (numbytes != 1) ? 's' : ' ');
else if (numbytes <= 1000000) printf("%" PRIdMAX " KB\n", (intmax_t)(numbytes / 1000));
else printf("%" PRIdMAX " MB\n", (intmax_t)(numbytes / 1000000));
}
return;
}
#!/bin/sh
# Emulates fdupes -1 output
# Usage: jdupes command line | ./fdupes_oneline.sh
while read LINE
do if [ -z "$LINE" ]
then echo
else echo -n "$LINE" | sed 's/ /\\ /g'; echo -n " "
fi
done
......@@ -18,6 +18,12 @@ byte-by-byte comparison.
.SH OPTIONS
.TP
.B -@ --loud
output annoying low-level debug info while running
.TP
.B -1 --one-file-system
do not match files that are on different filesystems or devices
.TP
.B -A --nohidden
exclude hidden files from consideration
.TP
......@@ -109,19 +115,23 @@ follow symlinked directories
.B -v --version
display jdupes version and compilation feature flags
.TP
.B -x --xsize=[+]SIZE
.B -x --xsize=[+]SIZE (NOTE: deprecated in favor of \-X)
exclude files of size less than SIZE from consideration, or if SIZE is
prefixed with a '+' i.e.
jdupes -x +226 [files]
then exclude files larger than SIZE. The following suffixes can be used
and are not case-sensitive:
then exclude files larger than SIZE. Suffixes K/M/G can be used.
.TP
.B -X --exclude=spec:info
exclude files based on specified criteria; supported specs are:
.RS
.IP `K'
for kilobytes (units of 1024 bytes)
.IP `M'
for megabytes (units of 1024 x 1024 bytes)
.IP `G'
for gigabytes (units of 1024 x 1024 x 1024 bytes)
.IP `size[+-=]:number[suffix]'
Match only if size is greater (+), less than (-), or equal to (=) the
specified number, with an optional multiplier suffix. The +/- and =
specifiers can be combined; ex :"size+=4K" will match if size is greater
than or equal to four kilobytes (4096 bytes). Suffixes supported are
K/M/G/T/P/E with a B or iB extension (all case-insensitive); no extension
or an IB extension specify binary multipliers while a B extension
specifies decimal multipliers (ex: 4K or 4KiB = 4096, 4KB = 4000.)
.RE
.TP
.B -z --zeromatch
......@@ -168,6 +178,15 @@ will follow subdirectories under both a and b.
will always place 'dir1' results first in any match set (where relevant)
.SH CAVEATS
Using
.B \-1
or
.BR \-\-one\-file\-system
prevents matches that cross filesystems, but a more relaxed form of this
option may be added that allows cross-matching for all filesystems that
each parameter is present on.
When using
.B \-d
or
......
......@@ -31,6 +31,7 @@
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#ifndef OMIT_GETOPT_LONG
#include <getopt.h>
#endif
......@@ -53,6 +54,7 @@
#include "act_printmatches.h"
#include "act_summarize.h"
/* Detect Windows and modify as needed */
#if defined _WIN32 || defined __CYGWIN__
const char dir_sep = '\\';
......@@ -77,6 +79,7 @@ wchar_t wname[PATH_MAX];
wchar_t wname2[PATH_MAX];
wchar_t wstr[PATH_MAX];
int out_mode = _O_TEXT;
int err_mode = _O_TEXT;
#define M2W(a,b) MultiByteToWideChar(CP_UTF8, 0, a, -1, (LPWSTR)b, PATH_MAX)
#define W2M(a,b) WideCharToMultiByte(CP_UTF8, 0, a, -1, (LPSTR)b, PATH_MAX, NULL, NULL)
#endif /* UNICODE */
......@@ -97,12 +100,6 @@ struct winstat ws;
struct stat s;
#endif
static uintmax_t excludesize = 0;
static enum {
SMALLERTHAN,
LARGERTHAN
} excludetype = SMALLERTHAN;
/* Larger chunk size makes large files process faster but uses more RAM */
#ifndef CHUNK_SIZE
#define CHUNK_SIZE 32768
......@@ -134,6 +131,32 @@ static size_t auto_chunk_size;
/* For interactive deletion input */
#define INPUT_SIZE 512
/* Size suffixes - this gets exported */
const struct size_suffix size_suffix[] = {
/* Byte (someone may actually try to use this) */
{ "b", 1 },
{ "k", 1024 },
{ "kib", 1024 },
{ "m", 1048576 },
{ "mib", 1048576 },
{ "g", (uint64_t)1048576 * 1024 },
{ "gib", (uint64_t)1048576 * 1024 },
{ "t", (uint64_t)1048576 * 1048576 },
{ "tib", (uint64_t)1048576 * 1048576 },
{ "p", (uint64_t)1048576 * 1048576 * 1024},
{ "pib", (uint64_t)1048576 * 1048576 * 1024},
{ "e", (uint64_t)1048576 * 1048576 * 1048576},
{ "eib", (uint64_t)1048576 * 1048576 * 1048576},
/* Decimal suffixes */
{ "kb", 1000 },
{ "mb", 1000000 },
{ "gb", 1000000000 },
{ "tb", 1000000000000 },
{ "pb", 1000000000000000 },
{ "eb", 1000000000000000000 },
{ NULL, 0 },
};
/* Assemble extension string from compile-time options */
static const char *extensions[] = {
#ifdef ON_WINDOWS
......@@ -196,9 +219,21 @@ struct travdone {
};
static struct travdone *travdone_head = NULL;
/* Exclusion tree head and static tag list */
struct exclude *exclude_head = NULL;
const struct exclude_tags exclude_tags[] = {
{ "dir", X_DIR },
{ "size+", X_SIZE_GT },
{ "size+=", X_SIZE_GTEQ },
{ "size-=", X_SIZE_LTEQ },
{ "size-", X_SIZE_LT },
{ "size=", X_SIZE_EQ },
{ NULL, 0 },
};
/* Required for progress indicator code */
static uintmax_t filecount = 0;
static uintmax_t progress = 0, dir_progress = 0, dupecount = 0;
static uintmax_t progress = 0, item_progress = 0, dupecount = 0;
/* Number of read loops before checking progress indicator */
#define CHECK_MINIMUM 256
......@@ -223,8 +258,8 @@ static unsigned int max_depth = 0;
/* File tree head */
static filetree_t *checktree = NULL;
/* Directory parameter position counter */
static unsigned int user_dir_count = 1;
/* Directory/file parameter position counter */
static unsigned int user_item_count = 1;
/* registerfile() direction options */
enum tree_direction { NONE, LEFT, RIGHT };
......@@ -341,8 +376,8 @@ static void update_progress(const char * const restrict msg, const int file_perc
gettimeofday(&time2, NULL);
if (progress == 0 || time2.tv_sec > time1.tv_sec) {
fprintf(stderr, "\rProgress [%ju/%ju, %ju pairs matched] %ju%%", progress, filecount,
dupecount, (progress * 100) / filecount);
fprintf(stderr, "\rProgress [%" PRIuMAX "/%" PRIuMAX ", %" PRIuMAX " pairs matched] %" PRIuMAX "%%",
progress, filecount, dupecount, (progress * 100) / filecount);
if (file_percent > -1 && msg != NULL) {
fprintf(stderr, " (%s: %d%%) ", msg, file_percent);
did_fpct = 1;
......@@ -412,7 +447,7 @@ extern inline int getfilestats(file_t * const restrict file)
file->mode = ws.mode;
#ifndef NO_HARDLINKS
file->nlink = ws.nlink;
#endif /* NO_HARDLINKS */
#endif
#else
if (stat(file->d_name, &s) != 0) return -1;
file->inode = s.st_ino;
......@@ -420,6 +455,9 @@ extern inline int getfilestats(file_t * const restrict file)
file->device = s.st_dev;
file->mtime = s.st_mtime;
file->mode = s.st_mode;
#ifndef NO_HARDLINKS
file->nlink = s.st_nlink;
#endif
#ifndef NO_PERMS
file->uid = s.st_uid;
file->gid = s.st_gid;
......@@ -433,6 +471,90 @@ extern inline int getfilestats(file_t * const restrict file)
}
static void add_exclude(const char *option)
{
char *opt, *p;
struct exclude *excl = exclude_head;
const struct exclude_tags *tags = exclude_tags;
const struct size_suffix *ss = size_suffix;
if (option == NULL) nullptr("add_exclude()");
LOUD(fprintf(stderr, "add_exclude '%s'\n", option);)
opt = string_malloc(strlen(option) + 1);
if (opt == NULL) oom("add_exclude option");
strcpy(opt, option);
p = opt;
while (*p != ':' && *p != '\0') p++;
/* Split tag string into *opt (tag) and *p (value) */
if (*p == ':') {
*p = '\0';
p++;
}
while (tags->tag != NULL && strcmp(tags->tag, opt) != 0) tags++;
if (tags->tag == NULL) goto bad_tag;
/* Check for a tag that requires a value */
if (tags->flags & XX_EXCL_DATA && *p == '\0') goto spec_missing;
/* *p is now at the value, NOT the tag string! */
if (exclude_head != NULL) {
/* Add to end of exclusion stack if head is present */
while (excl->next != NULL) excl = excl->next;
excl->next = string_malloc(sizeof(struct exclude) + strlen(p));
if (excl->next == NULL) oom("add_exclude alloc");
excl = excl->next;
} else {
/* Allocate exclude_head if no exclusions exist yet */
exclude_head = string_malloc(sizeof(struct exclude) + strlen(p));
if (exclude_head == NULL) oom("add_exclude alloc");
excl = exclude_head;
}
/* Set tag value from predefined tag array */
excl->flags = tags->flags;
/* Initialize the new exclude element */
excl->next = NULL;
if (excl->flags & XX_EXCL_OFFSET) {
/* Exclude uses a number; handle it with possible suffixes */
*(excl->param) = '\0';
/* Get base size */
if (*p < '0' || *p > '9') goto bad_size_suffix;
excl->size = strtoll(p, &p, 10);
/* Handle suffix, if any */
if (*p != '\0') {
while (ss->suffix != NULL && strcasecmp(ss->suffix, p) != 0) ss++;
if (ss->suffix == NULL) goto bad_size_suffix;
excl->size *= ss->multiplier;
}
} else {
/* Exclude uses string data; just copy it */
excl->size = 0;
strcpy(excl->param, p);
}
LOUD(fprintf(stderr, "Added exclude: tag '%s', data '%s', size %lld, flags %d\n", opt, excl->param, (long long)excl->size, excl->flags);)
string_free(opt);
return;
spec_missing:
fprintf(stderr, "Exclude spec missing or invalid: -X spec:data\n");
exit(EXIT_FAILURE);
bad_tag:
fprintf(stderr, "Invalid exclusion tag was specified\n");
exit(EXIT_FAILURE);
bad_size_suffix:
fprintf(stderr, "Invalid -X size suffix specified; use B or KMGTPE[i][B]\n");
exit(EXIT_FAILURE);
}
extern int getdirstats(const char * const restrict name,
jdupes_ino_t * const restrict inode, dev_t * const restrict dev)
{
......@@ -443,10 +565,12 @@ extern int getdirstats(const char * const restrict name,
if (win_stat(name, &ws) != 0) return -1;
*inode = ws.inode;
*dev = ws.device;
if (!S_ISDIR(ws.mode)) return 1;
#else
if (stat(name, &s) != 0) return -1;
*inode = s.st_ino;
*dev = s.st_dev;
if (!S_ISDIR(s.st_mode)) return 1;
#endif /* ON_WINDOWS */
return 0;
}
......@@ -470,7 +594,13 @@ extern int check_conditions(const file_t * const restrict file1, const file_t *
return -1;