#!/usr/bin/env bash
#
# ==========================================
# Duplicate Finder (by filename, filesize, or both)
# ==========================================
# Version: 1.3
# Author: Manuel Wendel
# ==========================================

show_help() {
    cat <<EOF
Usage: $(basename "$0") [OPTIONS] <directory>

Find duplicate files based on filename and/or file size (not file content).

Options:
  --mode <both|name|size>  Comparison mode:
                           both = same name AND same size (default)
                           name = same name
                           size = same size
  --export <file>          Export results as CSV to given file path
  --min-size <bytes>       Only consider files >= this size in bytes (default 0)
  --help                   Show this help
EOF
}

# --- Defaults ---
MODE="both"
EXPORT_FILE=""
MIN_SIZE=0
DIR=""

# --- Argument Parsing ---
while [[ $# -gt 0 ]]; do
    case "$1" in
        --mode) MODE="$2"; shift 2 ;;
        --export) EXPORT_FILE="$2"; shift 2 ;;
        --min-size) MIN_SIZE="$2"; shift 2 ;;
        --help) show_help; exit 0 ;;
        *) DIR="$1"; shift ;;
    esac
done

if [[ -z "$DIR" || ! -d "$DIR" ]]; then
    echo "Error: Please provide a valid directory"
    exit 1
fi

echo "Scanning '$DIR' recursively ... (Mode: $MODE, Min size: $MIN_SIZE bytes)"

TMPFILE=$(mktemp)

# --- Find files with min-size ---
find "$DIR" -type f -size +"${MIN_SIZE}"c -printf '%p|%f|%s|%Cs|%Ts\n' > "$TMPFILE"

# --- Find duplicates ---
DUPES=$(awk -F'|' -v mode="$MODE" '
{
  if (mode == "name") key = $2
  else if (mode == "size") key = $3
  else key = $2 "|" $3

  data[key] = data[key] ? data[key] RS $0 : $0
  count[key]++
}
END {
  for (k in count)
    if (count[k] > 1)
      print data[k]
}' "$TMPFILE")

if [[ -z "$DUPES" ]]; then
    echo "✅ No duplicates found."
    rm -f "$TMPFILE"
    exit 0
fi

# --- Output ---
if [[ -n "$EXPORT_FILE" ]]; then
    echo "path,filename,filesize,created,last_modified" > "$EXPORT_FILE"
    echo "$DUPES" | while IFS='|' read -r path file size ctime mtime; do
        echo "\"$path\",\"$file\",$size,\"$(date -d @"$ctime" +"%Y-%m-%d %H:%M:%S")\",\"$(date -d @"$mtime" +"%Y-%m-%d %H:%M:%S")\"" >> "$EXPORT_FILE"
    done
    echo "CSV exported to: $EXPORT_FILE"
else
    echo ""
    echo "Duplicate files:"
    echo "----------------"
    echo "$DUPES" | awk -F'|' '
    BEGIN { lastfile=""; lastsize="" }
    {
      if ($2 != lastfile || $3 != lastsize) {
        if (NR>1) print "----------------"
        printf "File: %s (Size: %s bytes)\n", $2, $3
      }
      printf " → %s\n", $1
      lastfile = $2; lastsize = $3
    }'
fi

rm -f "$TMPFILE"

Examples:

# Standardmodus (Name & Größe, keine Min-Size)
bash duplicate-finder.sh "/volume1/files/"

# Nur Dateien ≥ 100 MB prüfen
bash duplicate-finder.sh --min-size 104857600 "/volume1/files/"

# CSV exportieren
bash duplicate-finder.sh --export /tmp/duplicates.csv "/volume1/files/"

# Nur Name vergleichen
bash duplicate-finder.sh --mode name "/volume1/files/"