...
 
Commits (6)
......@@ -20,6 +20,7 @@
/build_date.h
/jdupes
/jdupes*.exe
/jdupes-standalone
/*.pkg.tar.xz
test_temp
output.log
......
jdupes 1.12
- Small reductions in memory usage
- Add "standalone" jdupes C file which has no external requirements
- Add ability to toggle -Z with a USR1 signal (not available on Windows)
- Add -t/-no-tocttou option to disable file change safety checks
jdupes 1.11.1
- Disable build date embedding by default to make reproducible builds easier
......
......@@ -64,3 +64,20 @@ all. To use it, type:
./compare_jdupes.sh [options]
A stand-alone version of jdupes that consolidates most of the program's
functionality into a single C file is included with this source code. Major
differences include reduction or elimination of some text strings, using an
embedded 32-bit jody_hash implementation instead of relying on xxHash64,
removal of all DEBUG/LOUD and Windows support code, replacement of fancy
numeric sorting with the faster but naive strcmp() sort method, and other
minor adjustments and consolidations appropriate for single-file compilation.
This version of the program is suitable for inclusion in "Swiss army knife"
projects such as BusyBox and Toybox.
The standalone version is not meant to work on Windows; it has all of the
quirks for Windows support stripped out and there's no real advantage to
using it on Windows anyway. However, if you need added stress in your life
and you understand that this is NOT SUPPORTED and YOU'RE 100% ON YOUR OWN,
you can compile it with this make command and it'll even partially work:
make standalone NO_UNICODE=1 CFLAGS_EXTRA='-DNO_PERMS -DNO_SYMLINKS -DNO_HARDLINKS'
......@@ -45,6 +45,7 @@ MAN_EXT = 1
INSTALL = install # install : UCB/GNU Install compatiable
#INSTALL = ginstall
RM = rm -f
RMDIR = rmdir -p
MKDIR = mkdir -p
#MKDIR = mkdirhier
#MKDIR = mkinstalldirs
......@@ -125,6 +126,8 @@ OBJS += act_deletefiles.o act_linkfiles.o act_printmatches.o act_summarize.o
OBJS += xxhash.o
OBJS += $(ADDITIONAL_OBJECTS)
OBJS_CLEAN += jdupes-standalone
all: $(PROGRAM_NAME)
$(PROGRAM_NAME): $(OBJS)
......@@ -133,6 +136,8 @@ $(PROGRAM_NAME): $(OBJS)
winres.o : winres.rc winres.manifest.xml
windres winres.rc winres.o
standalone: jdupes-standalone
installdirs:
test -e $(DESTDIR)$(BIN_DIR) || $(MKDIR) $(DESTDIR)$(BIN_DIR)
test -e $(DESTDIR)$(MAN_DIR) || $(MKDIR) $(DESTDIR)$(MAN_DIR)
......@@ -141,6 +146,14 @@ install: $(PROGRAM_NAME) installdirs
$(INSTALL_PROGRAM) $(PROGRAM_NAME) $(DESTDIR)$(BIN_DIR)/$(PROGRAM_NAME)
$(INSTALL_DATA) $(PROGRAM_NAME).1 $(DESTDIR)$(MAN_DIR)/$(PROGRAM_NAME).$(MAN_EXT)
uninstalldirs:
-test -e $(DESTDIR)$(BIN_DIR) && $(RMDIR) $(DESTDIR)$(BIN_DIR)
-test -e $(DESTDIR)$(MAN_DIR) && $(RMDIR) $(DESTDIR)$(MAN_DIR)
uninstall: uninstalldirs
$(RM) $(DESTDIR)$(BIN_DIR)/$(PROGRAM_NAME)
$(RM) $(DESTDIR)$(MAN_DIR)/$(PROGRAM_NAME).$(MAN_EXT)
test:
./test.sh
......
......@@ -16,7 +16,7 @@ Why use jdupes instead of the original fdupes or other duplicate finders?
The biggest reason is raw speed. In testing on various data sets, jdupes is
over 7 times faster than fdupes-1.51 on average.
jdupes provides a native Wndows port. Most duplicate scanners built on
jdupes provides a native Windows port. Most duplicate scanners built on
Linux and other UNIX-like systems do not compile for Windows out-of-the-box
and even if they do, they don't support Unicode and other Windows-specific
quirks and features.
......@@ -118,6 +118,7 @@ option is specified (delete, summarize, link, dedupe, etc.)
the end of the option, manpage for more details)
-s --symlinks follow symlinks
-S --size show size of duplicate files
-t --no-tocttou disable security check for file changes (aka TOCTTOU)
-T --partial-only match based on partial hashes only. WARNING:
EXTREMELY DANGEROUS paired with destructive actions!
-T must be specified twice to work. Read the manual!
......@@ -129,14 +130,32 @@ option is specified (delete, summarize, link, dedupe, etc.)
Exclusions are cumulative: -X dir:abc -X dir:efg
-z --zeromatch consider zero-length files to be duplicates
-Z --softabort If the user aborts (i.e. CTRL-C) act on matches so far
You can send SIGUSR1 to the program to toggle this
For sizes, K/M/G/T/P/E[B|iB] suffixes can be used (case-insensitive)
The -t/-no-tocttou option disables checks for file changes during and after
scanning. This opens a security vulnerability that is called a TOCTTOU (time
of check to time of use) vulnerability. The program normally runs checks
immediately before scanning or taking action upon a file to see if the file
has changed in some way since it was last checked. With this option enabled,
the program will not run any of these checks, making the algorithm slightly
faster, but also increasing the risk that the program scans a file, the file
is changed after the scan, and the program still acts like the file was in
its previous state. This is particularly dangerous when considering actions
such as linking and deleting. In the most extreme case, a file could be
deleted during scanning but match other files prior to that deletion; if the
file is the first in the list of duplicates and auto-delete is used, all of
the remaining matched files will be deleted as well. This option was added
due to user reports of some filesystems (particularly network filesystems)
changing the reported file information inappropriately, rendering the entire
program unusable on such filesystems.
The -n/--noempty option was removed for safety. Matching zero-length files as
duplicates now requires explicit use of the -z/--zeromatch option instead.
Duplicate files are listed together in groups with each file displayed on a
Separate line. The groups are then separated from each other by blank lines.
separate line. The groups are then separated from each other by blank lines.
The -s/--symlinks option will treat symlinked files as regular files, but
direct symlinks will be treated as if they are hard linked files and the
......@@ -162,6 +181,17 @@ most users would expect. The decision to invert rather than reassign to a
different option was made because this feature was still fairly new at the
time of the change.
On non-Windows platforms that support SIGUSR1, you can toggle the state of
the -Z option by sending a SIGUSR1 to the program. This is handy if you want
to abort jdupes, didn't specify -Z, and changed your mind and don't want to
lose all the work that was done so far. Just do 'killall -USR1 jdupes' and
you wll be able to abort with -Z. This works in reverse: if you want to
prevent a -Z from happening, a SIGUSR1 will toggle it back off. That's a lot
less useful because you can just stop and kill the program to get the same
effect, but it's there if you want it for some reason. Sending the signal
twice while the program is stopped will behave as if it was only sent once,
as per normal POSIX signal behavior.
The -O or --paramorder option allows the user greater control over wha
appears in the first position of a match set, specifically for keeping the -N
option from deleting all but one file in a set in a seemingly random way. All
......
......@@ -24,17 +24,15 @@ static const char *readonly_msg[] = {
};
static char *dedupeerrstr(int err) {
static char buf[256];
buf[sizeof(buf)-1] = '\0';
tempname[sizeof(tempname)-1] = '\0';
if (err == BTRFS_SAME_DATA_DIFFERS) {
snprintf(buf, sizeof(buf), "BTRFS_SAME_DATA_DIFFERS (data modified in the meantime?)");
return buf;
snprintf(tempname, sizeof(tempname), "BTRFS_SAME_DATA_DIFFERS (data modified in the meantime?)");
return tempname;
} else if (err < 0) {
return strerror(-err);
} else {
snprintf(buf, sizeof(buf), "Unknown error %d", err);
return buf;
snprintf(tempname, sizeof(tempname), "Unknown error %d", err);
return tempname;
}
}
......
......@@ -10,6 +10,9 @@
#include "jody_win_unicode.h"
#include "act_deletefiles.h"
/* For interactive deletion input */
#define INPUT_SIZE 512
extern void deletefiles(file_t *files, int prompt, FILE *tty)
{
unsigned int counter, groups;
......
......@@ -31,7 +31,6 @@ extern void linkfiles(file_t *files, const int hard)
static unsigned int symsrc;
static char rel_path[PATHBUF_SIZE];
#endif
static char temp_path[PATHBUF_SIZE];
LOUD(fprintf(stderr, "Running linkfiles(%d)\n", hard);)
curfile = files;
......@@ -188,17 +187,17 @@ extern void linkfiles(file_t *files, const int hard)
name_len = strlen(dupelist[x]->d_name) + 14;
if (name_len > PATHBUF_SIZE) continue;
/* Assemble a temporary file name */
strcpy(temp_path, dupelist[x]->d_name);
strcat(temp_path, ".__jdupes__.tmp");
strcpy(tempname, dupelist[x]->d_name);
strcat(tempname, ".__jdupes__.tmp");
/* Rename the source file to the temporary name */
#ifdef UNICODE
if (!M2W(temp_path, wname2)) {
if (!M2W(tempname, wname2)) {
fprintf(stderr, "error: MultiByteToWideChar failed: "); fwprint(stderr, srcfile->d_name, 1);
continue;
}
i = MoveFileW(wname, wname2) ? 0 : 1;
#else
i = rename(dupelist[x]->d_name, temp_path);
i = rename(dupelist[x]->d_name, tempname);
#endif
if (i != 0) {
fprintf(stderr, "warning: cannot move link target to a temporary name, not linking:\n-//-> ");
......@@ -207,7 +206,7 @@ extern void linkfiles(file_t *files, const int hard)
#ifdef UNICODE
MoveFileW(wname2, wname);
#else
rename(temp_path, dupelist[x]->d_name);
rename(tempname, dupelist[x]->d_name);
#endif
continue;
}
......@@ -256,37 +255,37 @@ extern void linkfiles(file_t *files, const int hard)
fprintf(stderr, "' -> '"); fwprint(stderr, srcfile->d_name, 0);
fprintf(stderr, "': %s\n", strerror(errno));
#ifdef UNICODE
if (!M2W(temp_path, wname2)) {
fprintf(stderr, "error: MultiByteToWideChar failed: "); fwprint(stderr, temp_path, 1);
if (!M2W(tempname, wname2)) {
fprintf(stderr, "error: MultiByteToWideChar failed: "); fwprint(stderr, tempname, 1);
continue;
}
i = MoveFileW(wname2, wname) ? 0 : 1;
#else
i = rename(temp_path, dupelist[x]->d_name);
i = rename(tempname, dupelist[x]->d_name);
#endif
if (i != 0) {
fprintf(stderr, "error: cannot rename temp file back to original\n");
fprintf(stderr, "original: "); fwprint(stderr, dupelist[x]->d_name, 1);
fprintf(stderr, "current: "); fwprint(stderr, temp_path, 1);
fprintf(stderr, "current: "); fwprint(stderr, tempname, 1);
}
continue;
}
/* Remove temporary file to clean up; if we can't, reverse the linking */
#ifdef UNICODE
if (!M2W(temp_path, wname2)) {
fprintf(stderr, "error: MultiByteToWideChar failed: "); fwprint(stderr, temp_path, 1);
if (!M2W(tempname, wname2)) {
fprintf(stderr, "error: MultiByteToWideChar failed: "); fwprint(stderr, tempname, 1);
continue;
}
i = DeleteFileW(wname2) ? 0 : 1;
#else
i = remove(temp_path);
i = remove(tempname);
#endif
if (i != 0) {
/* If the temp file can't be deleted, there may be a permissions problem
* so reverse the process and warn the user */
fprintf(stderr, "\nwarning: can't delete temp file, reverting: ");
fwprint(stderr, temp_path, 1);
fwprint(stderr, tempname, 1);
#ifdef UNICODE
i = DeleteFileW(wname) ? 0 : 1;
#else
......@@ -298,12 +297,12 @@ extern void linkfiles(file_t *files, const int hard)
#ifdef UNICODE
i = MoveFileW(wname2, wname) ? 0 : 1;
#else
i = rename(temp_path, dupelist[x]->d_name);
i = rename(tempname, dupelist[x]->d_name);
#endif
if (i != 0) {
fprintf(stderr, "\nwarning: couldn't revert the file to its original name\n");
fprintf(stderr, "original: "); fwprint(stderr, dupelist[x]->d_name, 1);
fprintf(stderr, "current: "); fwprint(stderr, temp_path, 1);
fprintf(stderr, "current: "); fwprint(stderr, tempname, 1);
}
}
}
......
jdupes (1.12-1) unstable; urgency=medium
* New upstream version 1.12.
* Using new DH level format. Consequently:
- debian/compat: removed.
- debian/control: changed from 'debhelper' to 'debhelper-compat' in
Build-Depends field and bumped level to 12.
* debian/control: bumped Standards-Version to 4.3.0.
* debian/copyright: updated upstream and packaging copyright years.
-- Joao Eriberto Mota Filho <eriberto@debian.org> Tue, 26 Feb 2019 11:42:47 -0300
jdupes (1.11.1-2) unstable; urgency=medium
* debian/tests/control: added a new test.
......
......@@ -2,8 +2,8 @@ Source: jdupes
Section: utils
Priority: optional
Maintainer: Joao Eriberto Mota Filho <eriberto@debian.org>
Build-Depends: debhelper (>= 11)
Standards-Version: 4.2.1
Build-Depends: debhelper-compat (= 12)
Standards-Version: 4.3.0
Homepage: https://github.com/jbruchon/jdupes
Vcs-Browser: https://salsa.debian.org/debian/jdupes
Vcs-Git: https://salsa.debian.org/debian/jdupes.git
......
......@@ -5,7 +5,7 @@ Source: https://github.com/jbruchon/jdupes
Files: *
Copyright: 1999-2018 Adrian Lopez <adrian2@caribe.net>
2014-2018 Jody Lee Bruchon <jody@jodybruchon.com>
2014-2019 Jody Lee Bruchon <jody@jodybruchon.com>
License: MIT
Comment: jdupes is based in fdupes. Adrian is the fdupes upstream.
......@@ -15,7 +15,7 @@ Copyright: 2012-2016 Yann Collet <cyan@fb.com>
License: BSD-2-Clause
Files: debian/*
Copyright: 2016-2018 Joao Eriberto Mota Filho <eriberto@debian.org>
Copyright: 2016-2019 Joao Eriberto Mota Filho <eriberto@debian.org>
License: MIT
License: MIT
......
This diff is collapsed.
......@@ -153,7 +153,7 @@ exclude files based on specified criteria; supported specs are:
.IP `size[+-=]:number[suffix]'
Match only if size is greater (+), less than (-), or equal to (=) the
specified number, with an optional multiplier suffix. The +/- and =
specifiers can be combined; ex :"size+=4K" will match if size is greater
specifiers can be combined; ex :"size+=:4K" will match if size is greater
than or equal to four kilobytes (4096 bytes). Suffixes supported are
K/M/G/T/P/E with a B or iB extension (all case-insensitive); no extension
or an IB extension specify binary multipliers while a B extension
......
......@@ -121,13 +121,6 @@ static size_t auto_chunk_size = CHUNK_SIZE;
#error "PATHBUF_SIZE can't be less than PATH_MAX"
#endif
#ifndef INITIAL_DEPTH_THRESHOLD
#define INITIAL_DEPTH_THRESHOLD 8
#endif
/* For interactive deletion input */
#define INPUT_SIZE 512
/* Size suffixes - this gets exported */
const struct size_suffix size_suffix[] = {
/* Byte (someone may actually try to use this) */
......@@ -261,6 +254,8 @@ static int interrupt = 0;
/* Progress indicator time */
struct timeval time1, time2;
/* For path name mangling */
char tempname[PATHBUF_SIZE * 2];
/***** End definitions, begin code *****/
......@@ -278,6 +273,16 @@ void sighandler(const int signum)
return;
}
#ifndef ON_WINDOWS
void sigusr1(const int signum)
{
(void)signum;
if (!ISFLAG(flags, F_SOFTABORT)) SETFLAG(flags, F_SOFTABORT);
else CLEARFLAG(flags, F_SOFTABORT);
return;
}
#endif
/* Out of memory */
extern void oom(const char * const restrict msg)
......@@ -383,6 +388,9 @@ static void update_progress(const char * const restrict msg, const int file_perc
* Returns 1 if changed, 0 if not changed, negative if error */
extern int file_has_changed(file_t * const restrict file)
{
/* If -t/--no-tocttou specified then completely bypass this code */
if (ISFLAG(flags, F_NO_TOCTTOU)) return 0;
if (file == NULL || file->d_name == NULL) nullptr("file_has_changed()");
LOUD(fprintf(stderr, "file_has_changed('%s')\n", file->d_name);)
......@@ -639,7 +647,6 @@ extern int check_conditions(const file_t * const restrict file1, const file_t *
/* Check for exclusion conditions for a single file (1 = fail) */
static int check_singlefile(file_t * const restrict newfile)
{
static char tempname[PATHBUF_SIZE * 2];
char * restrict tp = tempname;
int excluded;
......@@ -783,7 +790,6 @@ static void grokdir(const char * const restrict dir,
file_t * restrict newfile;
struct dirent *dirinfo;
static int grokdir_level = 0;
static char tempname[PATHBUF_SIZE * 2];
size_t dirlen;
struct travdone *traverse;
int i, single = 0;
......@@ -916,6 +922,8 @@ static void grokdir(const char * const restrict dir,
tp = tempname;
memcpy(newfile->d_name, tp, dirlen + d_name_len);
/*** WARNING: tempname global gets reused by check_singlefile here! ***/
/* Single-file [l]stat() and exclusion condition check */
if (check_singlefile(newfile) != 0) {
LOUD(fprintf(stderr, "grokdir: check_singlefile rejected file\n"));
......@@ -1304,7 +1312,7 @@ static file_t **checkmatch(filetree_t * restrict tree, file_t * const restrict f
/* Do a byte-by-byte comparison in case two different files produce the
same signature. Unlikely, but better safe than sorry. */
static inline int confirmmatch(FILE * const restrict file1, FILE * const restrict file2, off_t size)
static inline int confirmmatch(FILE * const restrict file1, FILE * const restrict file2, const off_t size)
{
static char *c1 = NULL, *c2 = NULL;
size_t r1, r2;
......@@ -1520,6 +1528,7 @@ static inline void help_text(void)
printf(" \tpermission bits as duplicates\n");
#endif
printf(" -P --print=type \tprint extra info (partial, early, fullhash)\n");
printf(" -q --quiet \thide progress indicator\n");
printf(" -Q --quick \tskip byte-for-byte confirmation for quick matching\n");
printf(" \tWARNING: -Q can result in data loss! Be very careful!\n");
printf(" -r --recurse \tfor every directory, process its subdirectories too\n");
......@@ -1530,9 +1539,7 @@ static inline void help_text(void)
printf(" -s --symlinks \tfollow symlinks\n");
#endif
printf(" -S --size \tshow size of duplicate files\n");
printf(" -q --quiet \thide progress indicator\n");
printf(" -Q --quick \tskip byte-by-byte duplicate verification. WARNING:\n");
printf(" \tthis may delete non-duplicates! Read the manual first!\n");
printf(" -t --no-tocttou \tdisable security check for file changes (aka TOCTTOU)\n");
printf(" -T --partial-only \tmatch based on partial hashes only. WARNING:\n");
printf(" \tEXTREMELY DANGEROUS paired with destructive actions!\n");
printf(" \t-T must be specified twice to work. Read the manual!\n");
......@@ -1544,6 +1551,9 @@ static inline void help_text(void)
printf(" \tExclusions are cumulative: -X dir:abc -X dir:efg\n");
printf(" -z --zeromatch \tconsider zero-length files to be duplicates\n");
printf(" -Z --softabort \tIf the user aborts (i.e. CTRL-C) act on matches so far\n");
#ifndef ON_WINDOWS
printf(" \tYou can send SIGUSR1 to the program to toggle this\n");
#endif
printf("\nFor sizes, K/M/G/T/P/E[B|iB] suffixes can be used (case-insensitive)\n");
#ifdef OMIT_GETOPT_LONG
printf("Note: Long options are not supported in this build.\n\n");
......@@ -1605,6 +1615,7 @@ int main(int argc, char **argv)
{ "recursive:", 0, 0, 'R' },
{ "symlinks", 0, 0, 's' },
{ "size", 0, 0, 'S' },
{ "no-tocttou", 0, 0, 't' },
{ "partial-only", 0, 0, 'T' },
{ "version", 0, 0, 'v' },
{ "xsize", 1, 0, 'x' },
......@@ -1663,7 +1674,7 @@ int main(int argc, char **argv)
oldargv = cloneargs(argc, argv);
while ((opt = GETOPT(argc, argv,
"@01ABC:dDfhHiIlLmMnNOpP:qQrRsSTvVzZo:x:X:"
"@01ABC:dDfhHiIlLmMnNOpP:qQrRsStTvVzZo:x:X:"
#ifndef OMIT_GETOPT_LONG
, long_options, NULL
#endif
......@@ -1763,6 +1774,9 @@ int main(int argc, char **argv)
case 'R':
SETFLAG(flags, F_RECURSEAFTER);
break;
case 't':
SETFLAG(flags, F_NO_TOCTTOU);
break;
case 'T':
if (partialonly_spec == 0)
partialonly_spec = 1;
......@@ -1983,6 +1997,10 @@ int main(int argc, char **argv)
/* Catch CTRL-C */
signal(SIGINT, sighandler);
#ifndef ON_WINDOWS
/* Catch SIGUSR1 and use it to enable -Z */
signal(SIGUSR1, sigusr1);
#endif
while (curfile) {
static file_t **match = NULL;
......
......@@ -116,9 +116,6 @@ extern "C" {
#endif
/* How many operations to wait before updating progress counters */
#define DELAY_COUNT 256
/* Behavior modification flags */
extern uint_fast32_t flags;
#define F_RECURSE 0x00000001U
......@@ -147,6 +144,7 @@ extern uint_fast32_t flags;
#define F_ONEFS 0x00800000U
#define F_PRINTNULL 0x01000000U
#define F_PARTIALONLY 0x02000000U
#define F_NO_TOCTTOU 0x04000000U
#define F_LOUD 0x40000000U
#define F_DEBUG 0x80000000U
......@@ -182,9 +180,6 @@ typedef enum {
#define PATHBUF_SIZE 4096
#endif
/* For interactive deletion input */
#define INPUT_SIZE 512
/* Per-file information */
typedef struct _file {
struct _file *duplicates;
......@@ -269,7 +264,7 @@ struct size_suffix {
};
extern const struct size_suffix size_suffix[];
extern char tempname[PATHBUF_SIZE * 2];
extern void oom(const char * const restrict msg);
extern void nullptr(const char * restrict func);
......
......@@ -4,7 +4,7 @@
#ifndef JDUPES_VERSION_H
#define JDUPES_VERSION_H
#define VER "1.11.1"
#define VERDATE "2018-11-09"
#define VER "1.12"
#define VERDATE "2019-02-18"
#endif /* JDUPES_VERSION_H */