Import Upstream version 1.9

parent 4f04f47a
jdupes 1.9
- stderr on Windows is no longer polluted or empty when redirected
- Added -1/--one-file-system to restrict recursion to the same filesystem
- Added a universal exclusion stack which is currently only used for -X
- Added -X/--exclude to use exclusion stack; supersedes -x/--xsize
- More robust BTRFS enablement behavior in Makefile
- Fixed Unicode display for hard linking on Windows
- Efficiency improvements to internal memory allocator (string_malloc)
- Documentation improvements and updates
- Provide "fdupes_oneline.sh" which emulates old "fdupes -1" feature
- Single file names passed as arguments are now accepted and processed
jdupes 1.8 jdupes 1.8
- All files are now licensed under The MIT License exclusively - All files are now licensed under The MIT License exclusively
......
...@@ -19,31 +19,28 @@ make CFLAGS_EXTRA='-DYOUR_OPTION_ONE -DYOUR_OPTION_TWO' ...@@ -19,31 +19,28 @@ make CFLAGS_EXTRA='-DYOUR_OPTION_ONE -DYOUR_OPTION_TWO'
This is a list of options that can be "turned on" this way: This is a list of options that can be "turned on" this way:
ENABLE_BTRFS Enable '-B/--dedupe' for btrfs deduplication
DEBUG * Turn on algorithm statistic reporting with '-D'
OMIT_GETOPT_LONG Do not use getopt_long() C library call OMIT_GETOPT_LONG Do not use getopt_long() C library call
ON_WINDOWS Modify code to compile with MinGW on Windows ON_WINDOWS Modify code to compile with MinGW on Windows
USE_TREE_REBALANCE * Use experimental tree rebalancing code USE_TREE_REBALANCE * Use experimental tree rebalancing code
CONSIDER_IMBALANCE * Change tree rebalance to analyze weights first CONSIDER_IMBALANCE * Change tree rebalance to analyze weights first
LOW_MEMORY Build for lower memory usage instead of speed
* These options may slow down the program somewhat and are off by * These options may slow down the program somewhat and are off by
default. Do not enable them unless you are experimenting. default. Do not enable them unless you are experimenting.
Certain options need to be turned on by setting a variable passed to
make instead of using CFLAGS_EXTRA, i.e. 'make DEBUG=1':
DEBUG Turn on algorithm statistic reporting with '-D'
LOUD '-@' for low-level debugging; enables DEBUG
ENABLE_BTRFS Enable '-B/--dedupe' for btrfs deduplication
LOW_MEMORY Build for lower memory usage instead of speed
The LOW_MEMORY option tweaks various knobs in the program to lower The LOW_MEMORY option tweaks various knobs in the program to lower
total memory usage. It also disables some features to reduce the size total memory usage. It also disables some features to reduce the size
of certain data structures. The improvements in memory usage are not of certain data structures. The improvements in memory usage are not
very large, but if you're running in a very RAM-limited environment very large, but if you're running in a very RAM-limited environment
or have a CPU with very small caches it may be the best choice. or have a CPU with very small caches it may be the best choice.
You can turn on the -@ option for "loud" debugging with:
make LOUD=1
Non-loud debugging can be enabled with
make DEBUG=1
A test directory is included so that you may familiarize yourself with A test directory is included so that you may familiarize yourself with
the way jdupes operates. You may test the program before installing the way jdupes operates. You may test the program before installing
it by issuing a command such as "./jdupes testdir" or it by issuing a command such as "./jdupes testdir" or
......
...@@ -51,7 +51,7 @@ MKDIR = mkdir -p ...@@ -51,7 +51,7 @@ MKDIR = mkdir -p
# Make Configuration # Make Configuration
CC ?= gcc CC ?= gcc
COMPILER_OPTIONS = -Wall -Wextra -Wwrite-strings -Wcast-align -Wstrict-aliasing -pedantic -Wstrict-overflow -Wstrict-prototypes -Wpointer-arith -Wundef COMPILER_OPTIONS = -Wall -Wextra -Wwrite-strings -Wcast-align -Wstrict-aliasing -Wstrict-overflow -Wstrict-prototypes -Wpointer-arith -Wundef
COMPILER_OPTIONS += -Wshadow -Wfloat-equal -Wstrict-overflow=5 -Waggregate-return -Wcast-qual -Wswitch-default -Wswitch-enum -Wconversion -Wunreachable-code -Wformat=2 -Winit-self COMPILER_OPTIONS += -Wshadow -Wfloat-equal -Wstrict-overflow=5 -Waggregate-return -Wcast-qual -Wswitch-default -Wswitch-enum -Wconversion -Wunreachable-code -Wformat=2 -Winit-self
COMPILER_OPTIONS += -std=gnu99 -O2 -g -D_FILE_OFFSET_BITS=64 -fstrict-aliasing -pipe COMPILER_OPTIONS += -std=gnu99 -O2 -g -D_FILE_OFFSET_BITS=64 -fstrict-aliasing -pipe
...@@ -71,6 +71,13 @@ ifdef HARDEN ...@@ -71,6 +71,13 @@ ifdef HARDEN
COMPILER_OPTIONS += -Wformat -Wformat-security -D_FORTIFY_SOURCE=2 -fstack-protector-strong -fPIE -fpie -Wl,-z,relro -Wl,-z,now COMPILER_OPTIONS += -Wformat -Wformat-security -D_FORTIFY_SOURCE=2 -fstack-protector-strong -fPIE -fpie -Wl,-z,relro -Wl,-z,now
endif endif
# Catch someone trying to enable BTRFS in flags and turn on ENABLE_BTRFS
ifneq (,$(findstring DENABLE_BTRFS,$(CFLAGS)))
ENABLE_BTRFS=1
endif
ifneq (,$(findstring DENABLE_BTRFS,$(CFLAGS_EXTRA)))
ENABLE_BTRFS=1
endif
# MinGW needs this for printf() conversions to work # MinGW needs this for printf() conversions to work
ifeq ($(OS), Windows_NT) ifeq ($(OS), Windows_NT)
...@@ -79,7 +86,7 @@ ifndef NO_UNICODE ...@@ -79,7 +86,7 @@ ifndef NO_UNICODE
COMPILER_OPTIONS += -municode COMPILER_OPTIONS += -municode
endif endif
COMPILER_OPTIONS += -D__USE_MINGW_ANSI_STDIO=1 COMPILER_OPTIONS += -D__USE_MINGW_ANSI_STDIO=1
OBJECT_FILES += win_stat.o OBJS += win_stat.o
override undefine ENABLE_BTRFS override undefine ENABLE_BTRFS
override undefine HAVE_BTRFS_IOCTL_H override undefine HAVE_BTRFS_IOCTL_H
endif endif
...@@ -91,7 +98,9 @@ endif ...@@ -91,7 +98,9 @@ endif
# New BTRFS support option # New BTRFS support option
ifdef ENABLE_BTRFS ifdef ENABLE_BTRFS
COMPILER_OPTIONS += -DENABLE_BTRFS COMPILER_OPTIONS += -DENABLE_BTRFS
OBJECT_FILES += act_dedupefiles.o OBJS += act_dedupefiles.o
else
OBJS_CLEAN += act_dedupefiles.o
endif endif
# Low memory mode # Low memory mode
ifdef LOW_MEMORY ifdef LOW_MEMORY
...@@ -100,33 +109,33 @@ endif ...@@ -100,33 +109,33 @@ endif
CFLAGS += $(COMPILER_OPTIONS) $(CFLAGS_EXTRA) CFLAGS += $(COMPILER_OPTIONS) $(CFLAGS_EXTRA)
INSTALL_PROGRAM = $(INSTALL) -c -m 0755 INSTALL_PROGRAM = $(INSTALL) -m 0755
INSTALL_DATA = $(INSTALL) -c -m 0644 INSTALL_DATA = $(INSTALL) -m 0644
# ADDITIONAL_OBJECTS - some platforms will need additional object files # ADDITIONAL_OBJECTS - some platforms will need additional object files
# to support features not supplied by their vendor. Eg: GNU getopt() # to support features not supplied by their vendor. Eg: GNU getopt()
#ADDITIONAL_OBJECTS += getopt.o #ADDITIONAL_OBJECTS += getopt.o
OBJECT_FILES += jdupes.o jody_hash.o jody_paths.o jody_sort.o jody_win_unicode.o string_malloc.o OBJS += jdupes.o jody_hash.o jody_paths.o jody_sort.o jody_win_unicode.o string_malloc.o
OBJECT_FILES += jody_cacheinfo.o OBJS += jody_cacheinfo.o
OBJECT_FILES += act_deletefiles.o act_linkfiles.o act_printmatches.o act_summarize.o OBJS += act_deletefiles.o act_linkfiles.o act_printmatches.o act_summarize.o
OBJECT_FILES += $(ADDITIONAL_OBJECTS) OBJS += $(ADDITIONAL_OBJECTS)
all: jdupes all: jdupes
jdupes: $(OBJECT_FILES) jdupes: $(OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) -o $(PROGRAM_NAME) $(OBJECT_FILES) $(CC) $(CFLAGS) $(LDFLAGS) -o $(PROGRAM_NAME) $(OBJS)
installdirs: installdirs:
test -d $(DESTDIR)$(BIN_DIR) || $(MKDIR) $(DESTDIR)$(BIN_DIR) test -e $(DESTDIR)$(BIN_DIR) || $(MKDIR) $(DESTDIR)$(BIN_DIR)
test -d $(DESTDIR)$(MAN_DIR) || $(MKDIR) $(DESTDIR)$(MAN_DIR) test -e $(DESTDIR)$(MAN_DIR) || $(MKDIR) $(DESTDIR)$(MAN_DIR)
install: jdupes installdirs install: jdupes installdirs
$(INSTALL_PROGRAM) $(PROGRAM_NAME) $(DESTDIR)$(BIN_DIR)/$(PROGRAM_NAME) $(INSTALL_PROGRAM) $(PROGRAM_NAME) $(DESTDIR)$(BIN_DIR)/$(PROGRAM_NAME)
$(INSTALL_DATA) $(PROGRAM_NAME).1 $(DESTDIR)$(MAN_DIR)/$(PROGRAM_NAME).$(MAN_EXT) $(INSTALL_DATA) $(PROGRAM_NAME).1 $(DESTDIR)$(MAN_DIR)/$(PROGRAM_NAME).$(MAN_EXT)
clean: clean:
$(RM) $(OBJECT_FILES) $(PROGRAM_NAME) jdupes.exe *~ *.gcno *.gcda *.gcov $(RM) $(OBJS) $(OBJS_CLEAN) $(PROGRAM_NAME) $(PROGRAM_NAME).exe *~ *.gcno *.gcda *.gcov
distclean: clean distclean: clean
$(RM) *.pkg.tar.xz $(RM) *.pkg.tar.xz
......
...@@ -69,46 +69,53 @@ Usage ...@@ -69,46 +69,53 @@ Usage
-------------------------------------------------------------------------- --------------------------------------------------------------------------
Usage: jdupes [options] DIRECTORY... Usage: jdupes [options] DIRECTORY...
-A --nohidden exclude hidden files from consideration -@ --loud output annoying low-level debug info while running
-B --dedupe Send matches to btrfs for block-level deduplication -1 --one-file-system do not match files on different filesystems/devices
-d --delete prompt user for files to preserve and delete all -A --nohidden exclude hidden files from consideration
others; important: under particular circumstances, -B --dedupe Send matches to btrfs for block-level deduplication
data may be lost when using this option together -d --delete prompt user for files to preserve and delete all
with -s or --symlinks, or when specifying a others; important: under particular circumstances,
particular directory more than once; refer to the data may be lost when using this option together
documentation for additional information with -s or --symlinks, or when specifying a
-f --omitfirst omit the first file in each set of matches particular directory more than once; refer to the
-h --help display this help message documentation for additional information
-H --hardlinks treat hard-linked files as duplicate files. Normally -D --debug output debug statistics after completion
hard links are treated as non-duplicates for safety -f --omitfirst omit the first file in each set of matches
-i --reverse reverse (invert) the match sort order -h --help display this help message
-I --isolate files in the same specified directory won't match -H --hardlinks treat any linked files as duplicate files. Normally
linked files are treated as non-duplicates for safety
-i --reverse reverse (invert) the match sort order
-I --isolate files in the same specified directory won't match
-l --linksoft make relative symlinks for duplicates w/o prompting -l --linksoft make relative symlinks for duplicates w/o prompting
-L --linkhard hard link all duplicate files without prompting -L --linkhard hard link all duplicate files without prompting
Windows allows a maximum of 1023 hard links per file -m --summarize summarize dupe information
-m --summarize summarize dupe information -N --noprompt together with --delete, preserve the first file in
-N --noprompt together with --delete, preserve the first file in each set of duplicates and delete the rest without
each set of duplicates and delete the rest without prompting the user
prompting the user -o --order=BY select sort order for output, linking and deleting; by
-o --order=BY select sort order for output, linking and deleting; by -O --paramorder Parameter order is more important than selected -O sort
-O --paramorder Parameter order is more important than selected -O sort mtime (BY=time) or filename (BY=name, the default)
mtime (BY=time) or filename (BY=name, the default) -p --permissions don't consider files with different owner/group or
-p --permissions don't consider files with different owner/group or permission bits as duplicates
permission bits as duplicates -Q --quick skip byte-for-byte confirmation for quick matching
-r --recurse for every directory given follow subdirectories WARNING: -Q can result in data loss! Be very careful!
encountered within -r --recurse for every directory, process its subdirectories too
-R --recurse: for each directory given after this option follow -R --recurse: for each directory given after this option follow
subdirectories encountered within (note the ':' at subdirectories encountered within (note the ':' at
the end of the option, manpage for more details) the end of the option, manpage for more details)
-s --symlinks follow symlinks -s --symlinks follow symlinks
-S --size show size of duplicate files -S --size show size of duplicate files
-q --quiet hide progress indicator -q --quiet hide progress indicator
-v --version display jdupes version and license information -v --version display jdupes version and license information
-x --xsize=SIZE exclude files of size < SIZE bytes from consideration -x --xsize=SIZE exclude files of size < SIZE bytes from consideration
--xsize=+SIZE '+' specified before SIZE, exclude size > SIZE --xsize=+SIZE '+' specified before SIZE, exclude size > SIZE
K/M/G size suffixes can be used (case-insensitive) -X --exclude=spec:info exclude files based on specified criteria
specs: dir size+-=
Exclusions are cumulative: -X dir:abc -X dir:efg
-z --zeromatch consider zero-length files to be duplicates -z --zeromatch consider zero-length files to be duplicates
-Z --softabort If the user aborts (i.e. CTRL-C) act on matches so far -Z --softabort If the user aborts (i.e. CTRL-C) act on matches so far
For sizes, K/M/G/T/P/E[B|iB] suffixes can be used (case-insensitive)
The -n/--noempty option was removed for safety. Matching zero-length files as The -n/--noempty option was removed for safety. Matching zero-length files as
duplicates now requires explicit use of the -z/--zeromatch option instead. duplicates now requires explicit use of the -z/--zeromatch option instead.
...@@ -118,9 +125,7 @@ Separate line. The groups are then separated from each other by blank lines. ...@@ -118,9 +125,7 @@ Separate line. The groups are then separated from each other by blank lines.
The -s/--symlinks option will treat symlinked files as regular files, but The -s/--symlinks option will treat symlinked files as regular files, but
direct symlinks will be treated as if they are hard linked files and the direct symlinks will be treated as if they are hard linked files and the
-H/--hardlinks option will apply to them in the same manner. This option -H/--hardlinks option will apply to them in the same manner.
used to follow symlinked directories but in the current implementation this
behavior has been disabled due to it being too dangerous.
When using -d or --delete, care should be taken to insure against accidental When using -d or --delete, care should be taken to insure against accidental
data loss. While no information will be immediately lost, using this option data loss. While no information will be immediately lost, using this option
...@@ -152,7 +157,7 @@ each link candidate. These arrows are as follows: ...@@ -152,7 +157,7 @@ each link candidate. These arrows are as follows:
-//-> File linking failed due to an error during the linking process -//-> File linking failed due to an error during the linking process
If your data set has linked files and you do not use -L to always consider If your data set has linked files and you do not use -H to always consider
them as duplicates, you may still see linked files appear together in match them as duplicates, you may still see linked files appear together in match
sets. This is caused by a separate file that matches with linked files sets. This is caused by a separate file that matches with linked files
independently and is the correct behavior. See notes below on the "triangle independently and is the correct behavior. See notes below on the "triangle
...@@ -241,23 +246,24 @@ This is also an exercise for the user. ...@@ -241,23 +246,24 @@ This is also an exercise for the user.
jdupes tracks each directory traversed by dev:inode pair to avoid adding jdupes tracks each directory traversed by dev:inode pair to avoid adding
the contents of the same directory twice. This prevents the user from the contents of the same directory twice. This prevents the user from
being able to register all of their files twice by duplicating an entry being able to register all of their files twice by duplicating an entry
on the command line. Symlinked directories are not followed. Files are on the command line. Symlinked directories are only followed if they
renamed to a temporary name before any linking is done and if the link weren't already followed earlier. Files are renamed to a temporary name
operation fails they are renamed back to the original name. before any linking is done and if the link operation fails they are renamed
back to the original name.
"Collision Robustness" "Collision Robustness"
jdupes uses jodyhash for file data hashing. This hash is extremely fast jdupes uses jodyhash for file data hashing. This hash is extremely fast
with a low collision rate, but it still encounters collisions as any hash with a low collision rate, but it still encounters collisions as any hash
function will ("secure" or otherwise) due to the "birthday problem." This function will ("secure" or otherwise) due to the pigeonhole principle. This
is why jdupes performs a full-file verification before declaring a match. is why jdupes performs a full-file verification before declaring a match.
It is slower than matching on hashes alone, but the birthday problem puts It's slower than matching by hash only, but the pigeonhole principle puts
all data sets larger than the hash at risk of collision, meaning a false all data sets larger than the hash at risk of collision, meaning a false
duplicate detection and data loss. The slower completion time is not as duplicate detection and data loss. The slower completion time is not as
important as data integrity. Checking for a match based on hashes alone important as data integrity. Checking for a match based on hashes alone
is irresponsible, and using secure hashes like MD5 or the SHA families is irresponsible, and using secure hashes like MD5 or the SHA families
is orders of magnitude slower than jodyhash while still suffering from is orders of magnitude slower than jodyhash while still suffering from
the risk brought about by the birthday problem. In short, the birthday the risk brought about by the pigeonholing. An example of this problem is
problem means that if you have 365 days in a year and 366 people, the as follows: if you have 365 days in a year and 366 people, the chance of
having at least two birthdays on the same day is guaranteed; likewise, having at least two birthdays on the same day is guaranteed; likewise,
even though SHA512 is a 512-bit (64-byte) wide hash, there are guaranteed even though SHA512 is a 512-bit (64-byte) wide hash, there are guaranteed
to be at least 256 pairs of data streams that causes a collision once any to be at least 256 pairs of data streams that causes a collision once any
......
...@@ -11,9 +11,3 @@ ...@@ -11,9 +11,3 @@
- Add a way to store stat() info + inital and full hashes for - Add a way to store stat() info + inital and full hashes for
explicit loading in future runs to speed up repeated calls to explicit loading in future runs to speed up repeated calls to
the program. the program.
- The --xsize option can be improved. Instead of simply specifying an
exclusion size min/max, the option should offer multiple ways to
specify allowed file sizes. Examples:
- '--xsize=512-1024' only examine files between 512 and 1024 bytes
- '--xsize=1M-3M,+20M' examine files 1-3 MB and >20 MB in size
...@@ -6,6 +6,8 @@ ...@@ -6,6 +6,8 @@
#ifdef ENABLE_BTRFS #ifdef ENABLE_BTRFS
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include <fcntl.h>
...@@ -142,7 +144,7 @@ extern void dedupefiles(file_t * restrict files) ...@@ -142,7 +144,7 @@ extern void dedupefiles(file_t * restrict files)
files->d_name, dupe_filenames[cur_info], dedupeerrstr(status), files->d_name, dupe_filenames[cur_info], dedupeerrstr(status),
status, readonly_msg[readonly]); status, readonly_msg[readonly]);
} else { } else {
fprintf(stderr, "warning: dedupe only did %jd bytes: %s => %s: %s [%d]%s\n", fprintf(stderr, "warning: dedupe only did %" PRIdMAX " bytes: %s => %s: %s [%d]%s\n",
(intmax_t)same->info[cur_info].bytes_deduped, files->d_name, (intmax_t)same->info[cur_info].bytes_deduped, files->d_name,
dupe_filenames[cur_info], dedupeerrstr(status), status, readonly_msg[readonly]); dupe_filenames[cur_info], dedupeerrstr(status), status, readonly_msg[readonly]);
} }
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h> #include <string.h>
#include "jdupes.h" #include "jdupes.h"
#include "jody_win_unicode.h" #include "jody_win_unicode.h"
...@@ -21,6 +23,8 @@ extern void deletefiles(file_t *files, int prompt, FILE *tty) ...@@ -21,6 +23,8 @@ extern void deletefiles(file_t *files, int prompt, FILE *tty)
unsigned int number, sum, max, x; unsigned int number, sum, max, x;
size_t i; size_t i;
if (!files) return;
groups = get_max_dupes(files, &max, NULL); groups = get_max_dupes(files, &max, NULL);
max++; max++;
...@@ -61,7 +65,7 @@ extern void deletefiles(file_t *files, int prompt, FILE *tty) ...@@ -61,7 +65,7 @@ extern void deletefiles(file_t *files, int prompt, FILE *tty)
/* prompt for files to preserve */ /* prompt for files to preserve */
printf("Set %u of %u: keep which files? (1 - %u, [a]ll, [n]one)", printf("Set %u of %u: keep which files? (1 - %u, [a]ll, [n]one)",
curgroup, groups, counter); curgroup, groups, counter);
if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%ju byte%c each)", (uintmax_t)files->size, if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%" PRIuMAX " byte%c each)", (uintmax_t)files->size,
(files->size != 1) ? 's' : ' '); (files->size != 1) ? 's' : ' ');
printf(": "); printf(": ");
fflush(stdout); fflush(stdout);
......
...@@ -3,14 +3,16 @@ ...@@ -3,14 +3,16 @@
#include "jdupes.h" #include "jdupes.h"
#if !defined NO_HARDLINKS || !defined NO_SYMLINKS /* Compile out the code if no linking support is built in */
#if !(defined NO_HARDLINKS && defined NO_SYMLINKS)
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
#include "act_linkfiles.h" #include "act_linkfiles.h"
#include "jody_win_unicode.h" #include "jody_win_unicode.h"
#if defined _WIN32 || defined __CYGWIN__ #ifdef ON_WINDOWS
#include "win_stat.h" #include "win_stat.h"
#endif #endif
...@@ -212,6 +214,7 @@ extern void linkfiles(file_t *files, const int hard) ...@@ -212,6 +214,7 @@ extern void linkfiles(file_t *files, const int hard)
/* Create the desired hard link with the original file's name */ /* Create the desired hard link with the original file's name */
errno = 0; errno = 0;
success = 0;
#ifdef ON_WINDOWS #ifdef ON_WINDOWS
#ifdef UNICODE #ifdef UNICODE
if (!M2W(srcfile->d_name, wname2)) { if (!M2W(srcfile->d_name, wname2)) {
...@@ -223,7 +226,6 @@ extern void linkfiles(file_t *files, const int hard) ...@@ -223,7 +226,6 @@ extern void linkfiles(file_t *files, const int hard)
if (CreateHardLink(dupelist[x]->d_name, srcfile->d_name, NULL) == TRUE) success = 1; if (CreateHardLink(dupelist[x]->d_name, srcfile->d_name, NULL) == TRUE) success = 1;
#endif #endif
#else #else
success = 0;
if (hard) { if (hard) {
if (link(srcfile->d_name, dupelist[x]->d_name) == 0) success = 1; if (link(srcfile->d_name, dupelist[x]->d_name) == 0) success = 1;
#ifdef NO_SYMLINKS #ifdef NO_SYMLINKS
...@@ -241,11 +243,14 @@ extern void linkfiles(file_t *files, const int hard) ...@@ -241,11 +243,14 @@ extern void linkfiles(file_t *files, const int hard)
#endif /* NO_SYMLINKS */ #endif /* NO_SYMLINKS */
#endif /* ON_WINDOWS */ #endif /* ON_WINDOWS */
if (success) { if (success) {
if (!ISFLAG(flags, F_HIDEPROGRESS)) printf("%s %s\n", (hard ? "---->" : "-@@->"), dupelist[x]->d_name); if (!ISFLAG(flags, F_HIDEPROGRESS)) {
printf("%s ", hard ? "---->" : "-@@->");
fwprint(stdout, dupelist[x]->d_name, 1);
}
} else { } else {
/* The link failed. Warn the user and put the link target back */ /* The link failed. Warn the user and put the link target back */
if (!ISFLAG(flags, F_HIDEPROGRESS)) { if (!ISFLAG(flags, F_HIDEPROGRESS)) {
printf("-//-> "); fwprint(stderr, dupelist[x]->d_name, 1); printf("-//-> "); fwprint(stdout, dupelist[x]->d_name, 1);
} }
fprintf(stderr, "warning: unable to link '"); fwprint(stderr, dupelist[x]->d_name, 0); fprintf(stderr, "warning: unable to link '"); fwprint(stderr, dupelist[x]->d_name, 0);
fprintf(stderr, "' -> '"); fwprint(stderr, srcfile->d_name, 0); fprintf(stderr, "' -> '"); fwprint(stderr, srcfile->d_name, 0);
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
* This file is part of jdupes; see jdupes.c for license information */ * This file is part of jdupes; see jdupes.c for license information */
#include <stdio.h> #include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include "jdupes.h" #include "jdupes.h"
#include "jody_win_unicode.h" #include "jody_win_unicode.h"
#include "act_printmatches.h" #include "act_printmatches.h"
...@@ -11,11 +13,13 @@ extern void printmatches(file_t * restrict files) ...@@ -11,11 +13,13 @@ extern void printmatches(file_t * restrict files)
file_t * restrict tmpfile; file_t * restrict tmpfile;
int printed = 0; int printed = 0;
LOUD(fprintf(stderr, "act_printmatches: %p\n", files));
while (files != NULL) { while (files != NULL) {
if (ISFLAG(files->flags, F_HAS_DUPES)) { if (ISFLAG(files->flags, F_HAS_DUPES)) {
printed = 1; printed = 1;
if (!ISFLAG(flags, F_OMITFIRST)) { if (!ISFLAG(flags, F_OMITFIRST)) {
if (ISFLAG(flags, F_SHOWSIZE)) printf("%jd byte%c each:\n", (intmax_t)files->size, if (ISFLAG(flags, F_SHOWSIZE)) printf("%" PRIdMAX " byte%c each:\n", (intmax_t)files->size,
(files->size != 1) ? 's' : ' '); (files->size != 1) ? 's' : ' ');
fwprint(stdout, files->d_name, 1); fwprint(stdout, files->d_name, 1);
} }
...@@ -24,14 +28,14 @@ extern void printmatches(file_t * restrict files) ...@@ -24,14 +28,14 @@ extern void printmatches(file_t * restrict files)
fwprint(stdout, tmpfile->d_name, 1); fwprint(stdout, tmpfile->d_name, 1);
tmpfile = tmpfile->duplicates; tmpfile = tmpfile->duplicates;
} }
if (files->next != NULL) printf("\n"); if (files->next != NULL) fwprint(stdout, "", 1);
} }
files = files->next; files = files->next;
} }
if (printed == 0) fprintf(stderr, "No duplicates found.\n"); if (printed == 0) fwprint(stderr, "No duplicates found.", 1);
return; return;
} }
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
* This file is part of jdupes; see jdupes.c for license information */ * This file is part of jdupes; see jdupes.c for license information */
#include <stdio.h> #include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include "jdupes.h" #include "jdupes.h"
#include "act_summarize.h" #include "act_summarize.h"
...@@ -31,9 +33,9 @@ extern void summarizematches(const file_t * restrict files) ...@@ -31,9 +33,9 @@ extern void summarizematches(const file_t * restrict files)
else else
{ {
printf("%d duplicate files (in %d sets), occupying ", numfiles, numsets); printf("%d duplicate files (in %d sets), occupying ", numfiles, numsets);
if (numbytes < 1000) printf("%jd byte%c\n", (intmax_t)numbytes, (numbytes != 1) ? 's' : ' '); if (numbytes < 1000) printf("%" PRIdMAX " byte%c\n", (intmax_t)numbytes, (numbytes != 1) ? 's' : ' ');
else if (numbytes <= 1000000) printf("%jd KB\n", (intmax_t)(numbytes / 1000)); else if (numbytes <= 1000000) printf("%" PRIdMAX " KB\n", (intmax_t)(numbytes / 1000));
else printf("%jd MB\n", (intmax_t)(numbytes / 1000000)); else printf("%" PRIdMAX " MB\n", (intmax_t)(numbytes / 1000000));
} }
return; return;
} }
#!/bin/sh
# Emulates fdupes -1 output
# Usage: jdupes command line | ./fdupes_oneline.sh
while read LINE
do if [ -z "$LINE" ]
then echo
else echo -n "$LINE" | sed 's/ /\\ /g'; echo -n " "
fi
done
...@@ -18,6 +18,12 @@ byte-by-byte comparison. ...@@ -18,6 +18,12 @@ byte-by-byte comparison.
.SH OPTIONS .SH OPTIONS
.TP .TP
.B -@ --loud
output annoying low-level debug info while running
.TP
.B -1 --one-file-system
do not match files that are on different filesystems or devices
.TP
.B -A --nohidden .B -A --nohidden
exclude hidden files from consideration exclude hidden files from consideration
.TP .TP
...@@ -109,19 +115,23 @@ follow symlinked directories ...@@ -109,19 +115,23 @@ follow symlinked directories
.B -v --version .B -v --version
display jdupes version and compilation feature flags display jdupes version and compilation feature flags
.TP .TP
.B -x --xsize=[+]SIZE .B -x --xsize=[+]SIZE (NOTE: deprecated in favor of \-X)
exclude files of size less than SIZE from consideration, or if SIZE is exclude files of size less than SIZE from consideration, or if SIZE is
prefixed with a '+' i.e. prefixed with a '+' i.e.
jdupes -x +226 [files] jdupes -x +226 [files]
then exclude files larger than SIZE. The following suffixes can be used then exclude files larger than SIZE. Suffixes K/M/G can be used.
and are not case-sensitive: .TP
.B -X --exclude=spec:info
exclude files based on specified criteria; supported specs are:
.RS .RS
.IP `K' .IP `size[+-=]:number[suffix]'
for kilobytes (units of 1024 bytes) Match only if size is greater (+), less than (-), or equal to (=) the
.IP `M' specified number, with an optional multiplier suffix. The +/- and =
for megabytes (units of 1024 x 1024 bytes) specifiers can be combined; ex :"size+=4K" will match if size is greater
.IP `G' than or equal to four kilobytes (4096 bytes). Suffixes supported are
for gigabytes (units of 1024 x 1024 x 1024 bytes) K/M/G/T/P/E with a B or iB extension (all case-insensitive); no extension
or an IB extension specify binary multipliers while a B extension
specifies decimal multipliers (ex: 4K or 4KiB = 4096, 4KB = 4000.)
.RE .RE
.TP .TP
.B -z --zeromatch .B -z --zeromatch
...@@ -168,6 +178,15 @@ will follow subdirectories under both a and b. ...@@ -168,6 +178,15 @@ will follow subdirectories under both a and b.
will always place 'dir1' results first in any match set (where relevant) will always place 'dir1' results first in any match set (where relevant)
.SH CAVEATS .SH CAVEATS
Using
.B \-1
or
.BR \-\-one\-file\-system
prevents matches that cross filesystems, but a more relaxed form of this
option may be added that allows cross-matching for all filesystems that
each parameter is present on.
When using When using
.B \-d .B \-d
or or
......
This diff is collapsed.
...@@ -8,6 +8,22 @@ ...@@ -8,6 +8,22 @@
extern "C" { extern "C" {
#endif #endif
/* Detect Windows and modify as needed */
#if defined _WIN32 || defined __CYGWIN__
#define ON_WINDOWS 1
#define NO_SYMLINKS 1
#define NO_PERMS 1
#define NO_SIGACTION 1
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <io.h>
#include "win_stat.h"
#define S_ISREG WS_ISREG
#define S_ISDIR WS_ISDIR
#endif /* Win32 */
#include <limits.h> #include <limits.h>
#include <stdint.h> #include <stdint.h>
#include <sys/types.h> #include <sys/types.h>
...@@ -23,20 +39,8 @@ extern "C" { ...@@ -23,20 +39,8 @@ extern "C" {
#include <linux/btrfs.h> #include <linux/btrfs.h>
#endif #endif
/* Detect Windows and modify as needed */ /* Some types are different on Windows */
#if defined _WIN32 || defined __CYGWIN__ #ifdef ON_WINDOWS
#define ON_WINDOWS 1
#define NO_SYMLINKS 1
#define NO_PERMS 1
#define NO_SIGACTION 1
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <io.h>
#include "win_stat.h"
#define S_ISREG WS_ISREG
#define S_ISDIR WS_ISDIR
typedef uint64_t jdupes_ino_t; typedef uint64_t jdupes_ino_t;
typedef uint32_t jdupes_mode_t; typedef uint32_t jdupes_mode_t;
extern const char dir_sep; extern const char dir_sep;
...@@ -64,6 +68,7 @@ extern wchar_t wname[PATH_MAX]; ...@@ -64,6 +68,7 @@ extern wchar_t wname[PATH_MAX];
extern wchar_t wname2[PATH_MAX]; extern wchar_t wname2[PATH_MAX];
extern wchar_t wstr[PATH_MAX]; extern wchar_t wstr[PATH_MAX];
extern int out_mode; extern int out_mode;
extern int err_mode;
#define M2W(a,b) MultiByteToWideChar(CP_UTF8, 0, a, -1, (LPWSTR)b, PATH_MAX) #define M2W(a,b) MultiByteToWideChar(CP_UTF8, 0, a, -1, (LPWSTR)b, PATH_MAX)
#define W2M(a,b) WideCharToMultiByte(CP_UTF8, 0, a, -1, (LPSTR)b, PATH_MAX, NULL, NULL) #define W2M(a,b) WideCharToMultiByte(CP_UTF8, 0, a, -1, (LPSTR)b, PATH_MAX, NULL, NULL)
#endif /* UNICODE */ #endif /* UNICODE */
...@@ -133,6 +138,7 @@ extern uint_fast32_t flags; ...@@ -133,6 +138,7 @@ extern uint_fast32_t flags;
#define F_ISOLATE 0x00100000U #define F_ISOLATE 0x00100000U
#define F_MAKESYMLINKS 0x00200000U #define F_MAKESYMLINKS 0x00200000U
#define F_PRINTMATCHES 0x00400000U #define F_PRINTMATCHES 0x00400000U
#define F_ONEFS 0x00800000U