Import Upstream version 1.9

parent 4f04f47a
jdupes 1.9
- stderr on Windows is no longer polluted or empty when redirected
- Added -1/--one-file-system to restrict recursion to the same filesystem
- Added a universal exclusion stack which is currently only used for -X
- Added -X/--exclude to use exclusion stack; supersedes -x/--xsize
- More robust BTRFS enablement behavior in Makefile
- Fixed Unicode display for hard linking on Windows
- Efficiency improvements to internal memory allocator (string_malloc)
- Documentation improvements and updates
- Provide "fdupes_oneline.sh" which emulates old "fdupes -1" feature
- Single file names passed as arguments are now accepted and processed
jdupes 1.8
- All files are now licensed under The MIT License exclusively
......
......@@ -19,31 +19,28 @@ make CFLAGS_EXTRA='-DYOUR_OPTION_ONE -DYOUR_OPTION_TWO'
This is a list of options that can be "turned on" this way:
ENABLE_BTRFS Enable '-B/--dedupe' for btrfs deduplication
DEBUG * Turn on algorithm statistic reporting with '-D'
OMIT_GETOPT_LONG Do not use getopt_long() C library call
ON_WINDOWS Modify code to compile with MinGW on Windows
USE_TREE_REBALANCE * Use experimental tree rebalancing code
CONSIDER_IMBALANCE * Change tree rebalance to analyze weights first
LOW_MEMORY Build for lower memory usage instead of speed
* These options may slow down the program somewhat and are off by
default. Do not enable them unless you are experimenting.
Certain options need to be turned on by setting a variable passed to
make instead of using CFLAGS_EXTRA, i.e. 'make DEBUG=1':
DEBUG Turn on algorithm statistic reporting with '-D'
LOUD '-@' for low-level debugging; enables DEBUG
ENABLE_BTRFS Enable '-B/--dedupe' for btrfs deduplication
LOW_MEMORY Build for lower memory usage instead of speed
The LOW_MEMORY option tweaks various knobs in the program to lower
total memory usage. It also disables some features to reduce the size
of certain data structures. The improvements in memory usage are not
very large, but if you're running in a very RAM-limited environment
or have a CPU with very small caches it may be the best choice.
You can turn on the -@ option for "loud" debugging with:
make LOUD=1
Non-loud debugging can be enabled with
make DEBUG=1
A test directory is included so that you may familiarize yourself with
the way jdupes operates. You may test the program before installing
it by issuing a command such as "./jdupes testdir" or
......
......@@ -51,7 +51,7 @@ MKDIR = mkdir -p
# Make Configuration
CC ?= gcc
COMPILER_OPTIONS = -Wall -Wextra -Wwrite-strings -Wcast-align -Wstrict-aliasing -pedantic -Wstrict-overflow -Wstrict-prototypes -Wpointer-arith -Wundef
COMPILER_OPTIONS = -Wall -Wextra -Wwrite-strings -Wcast-align -Wstrict-aliasing -Wstrict-overflow -Wstrict-prototypes -Wpointer-arith -Wundef
COMPILER_OPTIONS += -Wshadow -Wfloat-equal -Wstrict-overflow=5 -Waggregate-return -Wcast-qual -Wswitch-default -Wswitch-enum -Wconversion -Wunreachable-code -Wformat=2 -Winit-self
COMPILER_OPTIONS += -std=gnu99 -O2 -g -D_FILE_OFFSET_BITS=64 -fstrict-aliasing -pipe
......@@ -71,6 +71,13 @@ ifdef HARDEN
COMPILER_OPTIONS += -Wformat -Wformat-security -D_FORTIFY_SOURCE=2 -fstack-protector-strong -fPIE -fpie -Wl,-z,relro -Wl,-z,now
endif
# Catch someone trying to enable BTRFS in flags and turn on ENABLE_BTRFS
ifneq (,$(findstring DENABLE_BTRFS,$(CFLAGS)))
ENABLE_BTRFS=1
endif
ifneq (,$(findstring DENABLE_BTRFS,$(CFLAGS_EXTRA)))
ENABLE_BTRFS=1
endif
# MinGW needs this for printf() conversions to work
ifeq ($(OS), Windows_NT)
......@@ -79,7 +86,7 @@ ifndef NO_UNICODE
COMPILER_OPTIONS += -municode
endif
COMPILER_OPTIONS += -D__USE_MINGW_ANSI_STDIO=1
OBJECT_FILES += win_stat.o
OBJS += win_stat.o
override undefine ENABLE_BTRFS
override undefine HAVE_BTRFS_IOCTL_H
endif
......@@ -91,7 +98,9 @@ endif
# New BTRFS support option
ifdef ENABLE_BTRFS
COMPILER_OPTIONS += -DENABLE_BTRFS
OBJECT_FILES += act_dedupefiles.o
OBJS += act_dedupefiles.o
else
OBJS_CLEAN += act_dedupefiles.o
endif
# Low memory mode
ifdef LOW_MEMORY
......@@ -100,33 +109,33 @@ endif
CFLAGS += $(COMPILER_OPTIONS) $(CFLAGS_EXTRA)
INSTALL_PROGRAM = $(INSTALL) -c -m 0755
INSTALL_DATA = $(INSTALL) -c -m 0644
INSTALL_PROGRAM = $(INSTALL) -m 0755
INSTALL_DATA = $(INSTALL) -m 0644
# ADDITIONAL_OBJECTS - some platforms will need additional object files
# to support features not supplied by their vendor. Eg: GNU getopt()
#ADDITIONAL_OBJECTS += getopt.o
OBJECT_FILES += jdupes.o jody_hash.o jody_paths.o jody_sort.o jody_win_unicode.o string_malloc.o
OBJECT_FILES += jody_cacheinfo.o
OBJECT_FILES += act_deletefiles.o act_linkfiles.o act_printmatches.o act_summarize.o
OBJECT_FILES += $(ADDITIONAL_OBJECTS)
OBJS += jdupes.o jody_hash.o jody_paths.o jody_sort.o jody_win_unicode.o string_malloc.o
OBJS += jody_cacheinfo.o
OBJS += act_deletefiles.o act_linkfiles.o act_printmatches.o act_summarize.o
OBJS += $(ADDITIONAL_OBJECTS)
all: jdupes
jdupes: $(OBJECT_FILES)
$(CC) $(CFLAGS) $(LDFLAGS) -o $(PROGRAM_NAME) $(OBJECT_FILES)
jdupes: $(OBJS)
$(CC) $(CFLAGS) $(LDFLAGS) -o $(PROGRAM_NAME) $(OBJS)
installdirs:
test -d $(DESTDIR)$(BIN_DIR) || $(MKDIR) $(DESTDIR)$(BIN_DIR)
test -d $(DESTDIR)$(MAN_DIR) || $(MKDIR) $(DESTDIR)$(MAN_DIR)
test -e $(DESTDIR)$(BIN_DIR) || $(MKDIR) $(DESTDIR)$(BIN_DIR)
test -e $(DESTDIR)$(MAN_DIR) || $(MKDIR) $(DESTDIR)$(MAN_DIR)
install: jdupes installdirs
$(INSTALL_PROGRAM) $(PROGRAM_NAME) $(DESTDIR)$(BIN_DIR)/$(PROGRAM_NAME)
$(INSTALL_DATA) $(PROGRAM_NAME).1 $(DESTDIR)$(MAN_DIR)/$(PROGRAM_NAME).$(MAN_EXT)
clean:
$(RM) $(OBJECT_FILES) $(PROGRAM_NAME) jdupes.exe *~ *.gcno *.gcda *.gcov
$(RM) $(OBJS) $(OBJS_CLEAN) $(PROGRAM_NAME) $(PROGRAM_NAME).exe *~ *.gcno *.gcda *.gcov
distclean: clean
$(RM) *.pkg.tar.xz
......
......@@ -69,6 +69,8 @@ Usage
--------------------------------------------------------------------------
Usage: jdupes [options] DIRECTORY...
-@ --loud output annoying low-level debug info while running
-1 --one-file-system do not match files on different filesystems/devices
-A --nohidden exclude hidden files from consideration
-B --dedupe Send matches to btrfs for block-level deduplication
-d --delete prompt user for files to preserve and delete all
......@@ -77,15 +79,15 @@ Usage: jdupes [options] DIRECTORY...
with -s or --symlinks, or when specifying a
particular directory more than once; refer to the
documentation for additional information
-D --debug output debug statistics after completion
-f --omitfirst omit the first file in each set of matches
-h --help display this help message
-H --hardlinks treat hard-linked files as duplicate files. Normally
hard links are treated as non-duplicates for safety
-H --hardlinks treat any linked files as duplicate files. Normally
linked files are treated as non-duplicates for safety
-i --reverse reverse (invert) the match sort order
-I --isolate files in the same specified directory won't match
-l --linksoft make relative symlinks for duplicates w/o prompting
-L --linkhard hard link all duplicate files without prompting
Windows allows a maximum of 1023 hard links per file
-m --summarize summarize dupe information
-N --noprompt together with --delete, preserve the first file in
each set of duplicates and delete the rest without
......@@ -95,8 +97,9 @@ Usage: jdupes [options] DIRECTORY...
mtime (BY=time) or filename (BY=name, the default)
-p --permissions don't consider files with different owner/group or
permission bits as duplicates
-r --recurse for every directory given follow subdirectories
encountered within
-Q --quick skip byte-for-byte confirmation for quick matching
WARNING: -Q can result in data loss! Be very careful!
-r --recurse for every directory, process its subdirectories too
-R --recurse: for each directory given after this option follow
subdirectories encountered within (note the ':' at
the end of the option, manpage for more details)
......@@ -106,10 +109,14 @@ Usage: jdupes [options] DIRECTORY...
-v --version display jdupes version and license information
-x --xsize=SIZE exclude files of size < SIZE bytes from consideration
--xsize=+SIZE '+' specified before SIZE, exclude size > SIZE
K/M/G size suffixes can be used (case-insensitive)
-X --exclude=spec:info exclude files based on specified criteria
specs: dir size+-=
Exclusions are cumulative: -X dir:abc -X dir:efg
-z --zeromatch consider zero-length files to be duplicates
-Z --softabort If the user aborts (i.e. CTRL-C) act on matches so far
For sizes, K/M/G/T/P/E[B|iB] suffixes can be used (case-insensitive)
The -n/--noempty option was removed for safety. Matching zero-length files as
duplicates now requires explicit use of the -z/--zeromatch option instead.
......@@ -118,9 +125,7 @@ Separate line. The groups are then separated from each other by blank lines.
The -s/--symlinks option will treat symlinked files as regular files, but
direct symlinks will be treated as if they are hard linked files and the
-H/--hardlinks option will apply to them in the same manner. This option
used to follow symlinked directories but in the current implementation this
behavior has been disabled due to it being too dangerous.
-H/--hardlinks option will apply to them in the same manner.
When using -d or --delete, care should be taken to insure against accidental
data loss. While no information will be immediately lost, using this option
......@@ -152,7 +157,7 @@ each link candidate. These arrows are as follows:
-//-> File linking failed due to an error during the linking process
If your data set has linked files and you do not use -L to always consider
If your data set has linked files and you do not use -H to always consider
them as duplicates, you may still see linked files appear together in match
sets. This is caused by a separate file that matches with linked files
independently and is the correct behavior. See notes below on the "triangle
......@@ -241,23 +246,24 @@ This is also an exercise for the user.
jdupes tracks each directory traversed by dev:inode pair to avoid adding
the contents of the same directory twice. This prevents the user from
being able to register all of their files twice by duplicating an entry
on the command line. Symlinked directories are not followed. Files are
renamed to a temporary name before any linking is done and if the link
operation fails they are renamed back to the original name.
on the command line. Symlinked directories are only followed if they
weren't already followed earlier. Files are renamed to a temporary name
before any linking is done and if the link operation fails they are renamed
back to the original name.
"Collision Robustness"
jdupes uses jodyhash for file data hashing. This hash is extremely fast
with a low collision rate, but it still encounters collisions as any hash
function will ("secure" or otherwise) due to the "birthday problem." This
function will ("secure" or otherwise) due to the pigeonhole principle. This
is why jdupes performs a full-file verification before declaring a match.
It is slower than matching on hashes alone, but the birthday problem puts
It's slower than matching by hash only, but the pigeonhole principle puts
all data sets larger than the hash at risk of collision, meaning a false
duplicate detection and data loss. The slower completion time is not as
important as data integrity. Checking for a match based on hashes alone
is irresponsible, and using secure hashes like MD5 or the SHA families
is orders of magnitude slower than jodyhash while still suffering from
the risk brought about by the birthday problem. In short, the birthday
problem means that if you have 365 days in a year and 366 people, the
the risk brought about by the pigeonholing. An example of this problem is
as follows: if you have 365 days in a year and 366 people, the chance of
having at least two birthdays on the same day is guaranteed; likewise,
even though SHA512 is a 512-bit (64-byte) wide hash, there are guaranteed
to be at least 256 pairs of data streams that causes a collision once any
......
......@@ -11,9 +11,3 @@
- Add a way to store stat() info + inital and full hashes for
explicit loading in future runs to speed up repeated calls to
the program.
- The --xsize option can be improved. Instead of simply specifying an
exclusion size min/max, the option should offer multiple ways to
specify allowed file sizes. Examples:
- '--xsize=512-1024' only examine files between 512 and 1024 bytes
- '--xsize=1M-3M,+20M' examine files 1-3 MB and >20 MB in size
......@@ -6,6 +6,8 @@
#ifdef ENABLE_BTRFS
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
......@@ -142,7 +144,7 @@ extern void dedupefiles(file_t * restrict files)
files->d_name, dupe_filenames[cur_info], dedupeerrstr(status),
status, readonly_msg[readonly]);
} else {
fprintf(stderr, "warning: dedupe only did %jd bytes: %s => %s: %s [%d]%s\n",
fprintf(stderr, "warning: dedupe only did %" PRIdMAX " bytes: %s => %s: %s [%d]%s\n",
(intmax_t)same->info[cur_info].bytes_deduped, files->d_name,
dupe_filenames[cur_info], dedupeerrstr(status), status, readonly_msg[readonly]);
}
......
......@@ -3,6 +3,8 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
#include "jdupes.h"
#include "jody_win_unicode.h"
......@@ -21,6 +23,8 @@ extern void deletefiles(file_t *files, int prompt, FILE *tty)
unsigned int number, sum, max, x;
size_t i;
if (!files) return;
groups = get_max_dupes(files, &max, NULL);
max++;
......@@ -61,7 +65,7 @@ extern void deletefiles(file_t *files, int prompt, FILE *tty)
/* prompt for files to preserve */
printf("Set %u of %u: keep which files? (1 - %u, [a]ll, [n]one)",
curgroup, groups, counter);
if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%ju byte%c each)", (uintmax_t)files->size,
if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%" PRIuMAX " byte%c each)", (uintmax_t)files->size,
(files->size != 1) ? 's' : ' ');
printf(": ");
fflush(stdout);
......
......@@ -3,14 +3,16 @@
#include "jdupes.h"
#if !defined NO_HARDLINKS || !defined NO_SYMLINKS
/* Compile out the code if no linking support is built in */
#if !(defined NO_HARDLINKS && defined NO_SYMLINKS)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "act_linkfiles.h"
#include "jody_win_unicode.h"
#if defined _WIN32 || defined __CYGWIN__
#ifdef ON_WINDOWS
#include "win_stat.h"
#endif
......@@ -212,6 +214,7 @@ extern void linkfiles(file_t *files, const int hard)
/* Create the desired hard link with the original file's name */
errno = 0;
success = 0;
#ifdef ON_WINDOWS
#ifdef UNICODE
if (!M2W(srcfile->d_name, wname2)) {
......@@ -223,7 +226,6 @@ extern void linkfiles(file_t *files, const int hard)
if (CreateHardLink(dupelist[x]->d_name, srcfile->d_name, NULL) == TRUE) success = 1;
#endif
#else
success = 0;
if (hard) {
if (link(srcfile->d_name, dupelist[x]->d_name) == 0) success = 1;
#ifdef NO_SYMLINKS
......@@ -241,11 +243,14 @@ extern void linkfiles(file_t *files, const int hard)
#endif /* NO_SYMLINKS */
#endif /* ON_WINDOWS */
if (success) {
if (!ISFLAG(flags, F_HIDEPROGRESS)) printf("%s %s\n", (hard ? "---->" : "-@@->"), dupelist[x]->d_name);
if (!ISFLAG(flags, F_HIDEPROGRESS)) {
printf("%s ", hard ? "---->" : "-@@->");
fwprint(stdout, dupelist[x]->d_name, 1);
}
} else {
/* The link failed. Warn the user and put the link target back */
if (!ISFLAG(flags, F_HIDEPROGRESS)) {
printf("-//-> "); fwprint(stderr, dupelist[x]->d_name, 1);
printf("-//-> "); fwprint(stdout, dupelist[x]->d_name, 1);
}
fprintf(stderr, "warning: unable to link '"); fwprint(stderr, dupelist[x]->d_name, 0);
fprintf(stderr, "' -> '"); fwprint(stderr, srcfile->d_name, 0);
......
......@@ -2,6 +2,8 @@
* This file is part of jdupes; see jdupes.c for license information */
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include "jdupes.h"
#include "jody_win_unicode.h"
#include "act_printmatches.h"
......@@ -11,11 +13,13 @@ extern void printmatches(file_t * restrict files)
file_t * restrict tmpfile;
int printed = 0;
LOUD(fprintf(stderr, "act_printmatches: %p\n", files));
while (files != NULL) {
if (ISFLAG(files->flags, F_HAS_DUPES)) {
printed = 1;
if (!ISFLAG(flags, F_OMITFIRST)) {
if (ISFLAG(flags, F_SHOWSIZE)) printf("%jd byte%c each:\n", (intmax_t)files->size,
if (ISFLAG(flags, F_SHOWSIZE)) printf("%" PRIdMAX " byte%c each:\n", (intmax_t)files->size,
(files->size != 1) ? 's' : ' ');
fwprint(stdout, files->d_name, 1);
}
......@@ -24,14 +28,14 @@ extern void printmatches(file_t * restrict files)
fwprint(stdout, tmpfile->d_name, 1);
tmpfile = tmpfile->duplicates;
}
if (files->next != NULL) printf("\n");
if (files->next != NULL) fwprint(stdout, "", 1);
}
files = files->next;
}
if (printed == 0) fprintf(stderr, "No duplicates found.\n");
if (printed == 0) fwprint(stderr, "No duplicates found.", 1);
return;
}
......@@ -2,6 +2,8 @@
* This file is part of jdupes; see jdupes.c for license information */
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include "jdupes.h"
#include "act_summarize.h"
......@@ -31,9 +33,9 @@ extern void summarizematches(const file_t * restrict files)
else
{
printf("%d duplicate files (in %d sets), occupying ", numfiles, numsets);
if (numbytes < 1000) printf("%jd byte%c\n", (intmax_t)numbytes, (numbytes != 1) ? 's' : ' ');
else if (numbytes <= 1000000) printf("%jd KB\n", (intmax_t)(numbytes / 1000));
else printf("%jd MB\n", (intmax_t)(numbytes / 1000000));
if (numbytes < 1000) printf("%" PRIdMAX " byte%c\n", (intmax_t)numbytes, (numbytes != 1) ? 's' : ' ');
else if (numbytes <= 1000000) printf("%" PRIdMAX " KB\n", (intmax_t)(numbytes / 1000));
else printf("%" PRIdMAX " MB\n", (intmax_t)(numbytes / 1000000));
}
return;
}
#!/bin/sh
# Emulates fdupes -1 output
# Usage: jdupes command line | ./fdupes_oneline.sh
while read LINE
do if [ -z "$LINE" ]
then echo
else echo -n "$LINE" | sed 's/ /\\ /g'; echo -n " "
fi
done
......@@ -18,6 +18,12 @@ byte-by-byte comparison.
.SH OPTIONS
.TP
.B -@ --loud
output annoying low-level debug info while running
.TP
.B -1 --one-file-system
do not match files that are on different filesystems or devices
.TP
.B -A --nohidden
exclude hidden files from consideration
.TP
......@@ -109,19 +115,23 @@ follow symlinked directories
.B -v --version
display jdupes version and compilation feature flags
.TP
.B -x --xsize=[+]SIZE
.B -x --xsize=[+]SIZE (NOTE: deprecated in favor of \-X)
exclude files of size less than SIZE from consideration, or if SIZE is
prefixed with a '+' i.e.
jdupes -x +226 [files]
then exclude files larger than SIZE. The following suffixes can be used
and are not case-sensitive:
then exclude files larger than SIZE. Suffixes K/M/G can be used.
.TP
.B -X --exclude=spec:info
exclude files based on specified criteria; supported specs are:
.RS
.IP `K'
for kilobytes (units of 1024 bytes)
.IP `M'
for megabytes (units of 1024 x 1024 bytes)
.IP `G'
for gigabytes (units of 1024 x 1024 x 1024 bytes)
.IP `size[+-=]:number[suffix]'
Match only if size is greater (+), less than (-), or equal to (=) the
specified number, with an optional multiplier suffix. The +/- and =
specifiers can be combined; ex :"size+=4K" will match if size is greater
than or equal to four kilobytes (4096 bytes). Suffixes supported are
K/M/G/T/P/E with a B or iB extension (all case-insensitive); no extension
or an IB extension specify binary multipliers while a B extension
specifies decimal multipliers (ex: 4K or 4KiB = 4096, 4KB = 4000.)
.RE
.TP
.B -z --zeromatch
......@@ -168,6 +178,15 @@ will follow subdirectories under both a and b.
will always place 'dir1' results first in any match set (where relevant)
.SH CAVEATS
Using
.B \-1
or
.BR \-\-one\-file\-system
prevents matches that cross filesystems, but a more relaxed form of this
option may be added that allows cross-matching for all filesystems that
each parameter is present on.
When using
.B \-d
or
......
This diff is collapsed.
......@@ -8,6 +8,22 @@
extern "C" {
#endif
/* Detect Windows and modify as needed */
#if defined _WIN32 || defined __CYGWIN__
#define ON_WINDOWS 1
#define NO_SYMLINKS 1
#define NO_PERMS 1
#define NO_SIGACTION 1
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <io.h>
#include "win_stat.h"
#define S_ISREG WS_ISREG
#define S_ISDIR WS_ISDIR
#endif /* Win32 */
#include <limits.h>
#include <stdint.h>
#include <sys/types.h>
......@@ -23,20 +39,8 @@ extern "C" {
#include <linux/btrfs.h>
#endif
/* Detect Windows and modify as needed */
#if defined _WIN32 || defined __CYGWIN__
#define ON_WINDOWS 1
#define NO_SYMLINKS 1
#define NO_PERMS 1
#define NO_SIGACTION 1
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <io.h>
#include "win_stat.h"
#define S_ISREG WS_ISREG
#define S_ISDIR WS_ISDIR
/* Some types are different on Windows */
#ifdef ON_WINDOWS
typedef uint64_t jdupes_ino_t;
typedef uint32_t jdupes_mode_t;
extern const char dir_sep;
......@@ -64,6 +68,7 @@ extern wchar_t wname[PATH_MAX];
extern wchar_t wname2[PATH_MAX];
extern wchar_t wstr[PATH_MAX];
extern int out_mode;
extern int err_mode;
#define M2W(a,b) MultiByteToWideChar(CP_UTF8, 0, a, -1, (LPWSTR)b, PATH_MAX)
#define W2M(a,b) WideCharToMultiByte(CP_UTF8, 0, a, -1, (LPSTR)b, PATH_MAX, NULL, NULL)
#endif /* UNICODE */
......@@ -133,6 +138,7 @@ extern uint_fast32_t flags;
#define F_ISOLATE 0x00100000U
#define F_MAKESYMLINKS 0x00200000U
#define F_PRINTMATCHES 0x00400000U
#define F_ONEFS 0x00800000U
#define F_LOUD 0x40000000U
#define F_DEBUG 0x80000000U
......@@ -184,9 +190,11 @@ typedef struct _file {
uid_t uid;
gid_t gid;
#endif
#ifdef ON_WINDOWS
#ifndef NO_HARDLINKS
DWORD nlink;
#ifndef NO_HARDLINKS
#ifndef ON_WINDOWS
nlink_t nlink;
#else
uint32_t nlink; /* link count on Windows is always a DWORD */
#endif
#endif
} file_t;
......@@ -209,6 +217,50 @@ extern struct winstat ws;
extern struct stat s;
#endif
/* -X exclusion parameter stack */
struct exclude {
struct exclude *next;
unsigned int flags;
int64_t size;
char param[];
};
/* Exclude parameter flags */
#define X_DIR 0x00000001U
#define X_SIZE_EQ 0x00000002U
#define X_SIZE_GT 0x00000004U
#define X_SIZE_LT 0x00000008U
/* The X-than-or-equal are combination flags */
#define X_SIZE_GTEQ 0x00000006U
#define X_SIZE_LTEQ 0x0000000aU
/* Size specifier flags */
#define XX_EXCL_SIZE 0x0000000eU
/* Flags that use numeric offset instead of a string */
#define XX_EXCL_OFFSET 0x0000000eU
/* Flags that require a data parameter */
#define XX_EXCL_DATA 0x0000000fU
/* Exclude definition array */
struct exclude_tags {
const char * const tag;
const uint32_t flags;
};
extern const struct exclude_tags exclude_tags[];
extern struct exclude *exclude_head;
/* Suffix definitions (treat as case-insensitive) */
struct size_suffix {
const char * const suffix;
const int64_t multiplier;
};
extern const struct size_suffix size_suffix[];
extern void oom(const char * const restrict msg);
extern void nullptr(const char * restrict func);
extern int file_has_changed(file_t * const restrict file);
......
......@@ -17,7 +17,7 @@
* This shift was decided upon after lots of testing and
* changing it will likely cause lots of hash collisions. */
#ifndef JODY_HASH_SHIFT
#define JODY_HASH_SHIFT 11
#define JODY_HASH_SHIFT 14
#endif
/* The salt value's purpose is to cause each byte in the
......@@ -88,7 +88,7 @@ extern hash_t jody_block_hash(const hash_t * restrict data,
element = *data;
hash += element;
hash += JODY_HASH_CONSTANT;
hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(hash_t) * 8 - JODY_HASH_SHIFT);
hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(hash_t) * 8 - JODY_HASH_SHIFT); /* bit rotate left */
hash ^= element;
hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(hash_t) * 8 - JODY_HASH_SHIFT);
hash ^= JODY_HASH_CONSTANT;
......
......@@ -12,7 +12,7 @@ extern "C" {
#include <stdint.h>
/* Width of a jody_hash. Changing this will also require
* changing the width of tail masks and endian conversion */
* changing the width of tail masks to match. */
#ifndef JODY_HASH_WIDTH
#define JODY_HASH_WIDTH 64
#endif
......@@ -28,7 +28,7 @@ typedef uint16_t hash_t;
#endif
/* Version increments when algorithm changes incompatibly */
#define JODY_HASH_VERSION 4
#define JODY_HASH_VERSION 5
extern hash_t jody_block_hash(const hash_t * restrict data,
const hash_t start_hash, const size_t count);
......
......@@ -50,14 +50,17 @@ error_wc2mb:
extern int fwprint(FILE * const restrict stream, const char * const restrict str, const int cr)
{
int retval;
int stream_mode = out_mode;
if (out_mode != _O_TEXT) {
if (stream == stderr) stream_mode = err_mode;
if (stream_mode == _O_U16TEXT) {
/* Convert to wide string and send to wide console output */
if (!MultiByteToWideChar(CP_UTF8, 0, str, -1, (LPWSTR)wstr, PATH_MAX)) return -1;
fflush(stdout); fflush(stderr);
_setmode(_fileno(stream), out_mode);
fflush(stream);
_setmode(_fileno(stream), stream_mode);
retval = fwprintf(stream, L"%S%S", wstr, cr ? L"\n" : L"");
fflush(stdout); fflush(stderr);
fflush(stream);
_setmode(_fileno(stream), _O_TEXT);
return retval;
} else {
......
......@@ -24,15 +24,12 @@
#define SMA_MAX_FREE 32
#endif
/* Minimum free object size to consider adding to free list */
#ifndef SMA_MIN_SLACK
#define SMA_MIN_SLACK 48
#endif
#ifdef DEBUG
uintmax_t sma_allocs = 0;
uintmax_t sma_free_ignored = 0;
uintmax_t sma_free_good = 0;
uintmax_t sma_free_merged = 0;
uintmax_t sma_free_replaced = 0;
uintmax_t sma_free_reclaimed = 0;
uintmax_t sma_free_scanned = 0;
uintmax_t sma_free_tails = 0;
......@@ -44,27 +41,21 @@ uintmax_t sma_free_tails = 0;
/* This is used to bypass string_malloc for debugging */
#ifdef SMA_PASSTHROUGH
void *string_malloc(size_t len)
{
return malloc(len);
}
void string_free(void *ptr)
{
free(ptr);
return;
}
void string_malloc_destroy(void) {
return;
}
void *string_malloc(size_t len) { return malloc(len); }
void string_free(void *ptr) { free(ptr); return; }
void string_malloc_destroy(void) { return; }
#else /* Not SMA_PASSTHROUGH mode */
struct freelist {
void *addr;
size_t size;
};
static void *sma_head = NULL;
static uintptr_t *sma_curpage = NULL;
static unsigned int sma_pages = 0;
static void *sma_freelist[SMA_MAX_FREE];
static struct freelist sma_freelist[SMA_MAX_FREE];
static int sma_freelist_cnt = 0;
static size_t sma_nextfree = sizeof(uintptr_t);
......@@ -84,11 +75,11 @@ static inline void *scan_freelist(const size_t size)
if (used == sma_freelist_cnt) return NULL;
DBG(sma_free_scanned++;)
object = sma_freelist[i];
object = sma_freelist[i].addr;
/* Skip empty entries */
if (object == NULL) continue;
sz = *object;
sz = sma_freelist[i].size;
used++;
/* Skip smaller objects */
......@@ -106,8 +97,8 @@ static inline void *scan_freelist(const size_t size)
/* Return smallest object found and delete from free list */
if (min_i != -1) {
min_p = sma_freelist[min_i];
sma_freelist[min_i] = NULL;
min_p = sma_freelist[min_i].addr;
sma_freelist[min_i].addr = NULL;
sma_freelist_cnt--;
min_p++;
return (void *)min_p;
......@@ -167,7 +158,7 @@ void *string_malloc(size_t len)
/* Initialize on first use */
if (sma_pages == 0) {
/* Initialize the freed object list */
for (int i = 0; i < SMA_MAX_FREE; i++) sma_freelist[i] = NULL;
for (int i = 0; i < SMA_MAX_FREE; i++) sma_freelist[i].addr = NULL;
/* Allocate first page and set up for first allocation */
sma_head = string_malloc_page();
if (sma_head == NULL) return NULL;
......@@ -186,11 +177,11 @@ void *string_malloc(size_t len)
if ((sma_nextfree + len + sizeof(size_t)) > SMA_PAGE_SIZE) {
size_t sz;
size_t *tailaddr;
/* See if remaining space is usable */
if (sma_freelist_cnt < SMA_MAX_FREE && (sma_nextfree + sizeof(size_t)) < SMA_PAGE_SIZE) {