#!/bin/bash # findnl - find problematic filenames # Copyright © 2000-2009 by Pádraig Brady
. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the GNU General Public License for more details, # which is available at www.gnu.org #Notes: #note if files have names with ascii char 1 in them will have this converted #to \n, ls will say file not found in this case and hence the real filename #can be inferred. # #Note expressions are evaluated left to right and as soon as #one is matched the rest aren't evaluated (makes sense as doing #or operation not and. Anyway this implies that should check things #like filename length before actual valid character combinations tests # #Don't need to handle ^~ | ^- etc as always get fully qualified path from find # #Even if -n1 specified for xargs there will be no ls processes run #to display output until the end as the pipe is line buffered and #there is only 1 line passed to xargs. This is required so paths #with linefeeds are supported. # #How do you add expression to only include filenames with @ most #1 consequtive character (for e.g. :) I was trying: ":{1,1}" #but the second 1 was ignored? I know grep -Ev ":{2,}" works but #I need the expression as part of the large expression for obvious #reasons. For now I'm allowing any filenames with :, but this could #interoperability problems with NTFS for e.g. which uses this char #to indicate a seperate stream in the file. # #man ascii was useful when writing this. # #Hmm just noticed the GNU pathchk utility, which #should probably be integrated in some way? # #Note theoretically the only char UNIX doesn't allow in file/dir names #is /. This can make things very awkward, especially for future extensions #like streams etc. script_dir=$(dirname "$0") #directory of this script script_dir=$(readlink -f "$script_dir") #Make sure absolute path . "$script_dir"/supprt/fslver Usage() { ProgName=$(basename "$0") echo "find Name (directory or file) Lint. Usage: $ProgName [-1] [-2] [-3] [-p] [[-r] [-f] paths(s) ...] These options are mutually exclusive (i.e. only the last one takes effect). -1 is least checking, -3 is most. The default is 2. -p is most stringent and applies POSIX.1 filename portability testing. I.E. characters are limited to [A-Za-z0-9_.-] and max name length = 14 and max path length = 255. If no path(s) specified then the currrent directory is assumed." exit } #default settings level=2 MaxNameLen=129 MaxPathLen=2049 for arg do case "$arg" in -1) level="1" MaxNameLen=256 MaxPathLen=4097 ;; -2) : ;; #defaults set above -3) level="3" MaxNameLen=65 MaxPathLen=1025 ;; -p) level="p" MaxNameLen=15 MaxPathLen=256 ;; -h|--help|-help) Usage ;; -v|--version) Version ;; *) if [ -e "$arg" ] && ! echo "$arg" | grep -qF /; then arg="./$arg" #make sure / before name (see below) fi argsToPassOn="$argsToPassOn $(shell_quote "$arg")" ;; esac done if [ "$level" != "p" ]; then # Note LC_CTYPE=C in fslver causes grep below to report # valid UTF8 [:alnum:] chars as problematic, so unset here unset LC_CTYPE # Note this slows grep down 320% on FC4 (grep-2.5.1) at least. # Hence the minimum fslint-gui run time will be about 6 times larger # due to the polling mechanism it uses. #else #I'm trusting grep to process [:alnum:] appropriately #for the users locale. fi #-p = POSIX.1 checking (names <= 14 chars) etc. expressionsp="(.*/[^/]{$MaxNameLen,}$)" #name length >= MaxNameLen #-1 = min checking(most still only require shell quoting, but very bad practice) expressions1="$expressionsp|( +$)" #spaces @ end of name expressions1="$expressions1|(.*/ [^/]*$)" #spaces @ start of name expressions1="$expressions1|(.*/[^/]*[ ]{2,}[^/]*$)" #2 or more adjacent spaces expressions1="$expressions1|(.*/-[^/]*$)" #- @ start of name expressions1="$expressions1|(.*/[^/]* -[^/]*$)" #- after space in name #-2 = default checking (characters requiring shell quoting etc) expressions2="$expressions1|(.*/[^/]*\{[^/]*,[^/]*\}[^/]*$)" #name with {,} pat expressions2="$expressions2|(.*/[^/]*\[[^/]+\][^/]*$)" #name with [.+] pattern expressions2="$expressions2|(.*/~[^/]*$)" #~ @ start of name #-3 = max checking expressions3="$expressions2|(.*/[^/]*[.]{2,}[^/]*$)" #2 or more adjacent .'s expressions3="$expressions3|(.+[.]$)" #trailing .(s) charactersp="[:alnum:]_./" #/ included as can't be in name and simpler exprs characters3="$charactersp,~+" characters2="$characters3@#!=[{}:;'<>%& " characters1="$characters2()\"$|\\" #any other characters are never OK and that includes \t*? etc. #Note characters ]- are included in expresssions below. eval expressions=\$expressions$level eval characters=\$characters$level . "$script_dir"/supprt/getfpf -f "$argsToPassOn" #forcing -f as expressions assume / before name #Note could run through basename after if required? find "$@" -printf "$FPF\0" | sort -zu | #merge files (indirectly) specified multiple times tr '\n\0' '\1\n' | #convert \n to \1 so grep works grep -E "$expressions|(.*/[^/]*[^]$characters-]+[^/]*$)" | #note ] 1st & - last tr '\n\1' '\0\n' | #convert back from \1 to \n if [ ! -p /proc/self/fd/1 ]; then xargs -r0 ls -b1Ud --color=auto -- else cat fi