Script find duplicates
#!/usr/bin/env bash
#
# ==========================================
# Duplicate Finder (by filename, filesize, or both)
# ==========================================
# Version: 1.3
# Author: Manuel Wendel
# ==========================================
show_help() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS] <directory>
Find duplicate files based on filename and/or file size (not file content).
Options:
--mode <both|name|size> Comparison mode:
both = same name AND same size (default)
name = same name
size = same size
--export <file> Export results as CSV to given file path
--min-size <bytes> Only consider files >= this size in bytes (default 0)
--help Show this help
EOF
}
# --- Defaults ---
MODE="both"
EXPORT_FILE=""
MIN_SIZE=0
DIR=""
# --- Argument Parsing ---
while [[ $# -gt 0 ]]; do
case "$1" in
--mode) MODE="$2"; shift 2 ;;
--export) EXPORT_FILE="$2"; shift 2 ;;
--min-size) MIN_SIZE="$2"; shift 2 ;;
--help) show_help; exit 0 ;;
*) DIR="$1"; shift ;;
esac
done
if [[ -z "$DIR" || ! -d "$DIR" ]]; then
echo "Error: Please provide a valid directory"
exit 1
fi
echo "Scanning '$DIR' recursively ... (Mode: $MODE, Min size: $MIN_SIZE bytes)"
TMPFILE=$(mktemp)
# --- Find files with min-size ---
find "$DIR" -type f -size +"${MIN_SIZE}"c -printf '%p|%f|%s|%Cs|%Ts\n' > "$TMPFILE"
# --- Find duplicates ---
DUPES=$(awk -F'|' -v mode="$MODE" '
{
if (mode == "name") key = $2
else if (mode == "size") key = $3
else key = $2 "|" $3
data[key] = data[key] ? data[key] RS $0 : $0
count[key]++
}
END {
for (k in count)
if (count[k] > 1)
print data[k]
}' "$TMPFILE")
if [[ -z "$DUPES" ]]; then
echo "✅ No duplicates found."
rm -f "$TMPFILE"
exit 0
fi
# --- Output ---
if [[ -n "$EXPORT_FILE" ]]; then
echo "path,filename,filesize,created,last_modified" > "$EXPORT_FILE"
echo "$DUPES" | while IFS='|' read -r path file size ctime mtime; do
echo "\"$path\",\"$file\",$size,\"$(date -d @"$ctime" +"%Y-%m-%d %H:%M:%S")\",\"$(date -d @"$mtime" +"%Y-%m-%d %H:%M:%S")\"" >> "$EXPORT_FILE"
done
echo "CSV exported to: $EXPORT_FILE"
else
echo ""
echo "Duplicate files:"
echo "----------------"
echo "$DUPES" | awk -F'|' '
BEGIN { lastfile=""; lastsize="" }
{
if ($2 != lastfile || $3 != lastsize) {
if (NR>1) print "----------------"
printf "File: %s (Size: %s bytes)\n", $2, $3
}
printf " → %s\n", $1
lastfile = $2; lastsize = $3
}'
fi
rm -f "$TMPFILE"
Examples:
# Standardmodus (Name & Größe, keine Min-Size)
bash duplicate-finder.sh "/volume1/files/"
# Nur Dateien ≥ 100 MB prüfen
bash duplicate-finder.sh --min-size 104857600 "/volume1/files/"
# CSV exportieren
bash duplicate-finder.sh --export /tmp/duplicates.csv "/volume1/files/"
# Nur Name vergleichen
bash duplicate-finder.sh --mode name "/volume1/files/"