/* * qfind - a quick! "find . -type f" like program optimized for Windows. * * Description: * 1. By default, only prints non-directory file names. * 2. Prints all a directory's files, before processing its subdirectories. * 3. Does not prefix filenames with redundant "./". * * Background: * It is not currently possible to write efficient directory traversal code in * Perl on Windows. This is because you need to stat() each file returned by * readdir() to test if it is a directory when this information has already * been obtained but not used. The extra unnecessary stat() calls severely * degrade performance. This program was written to be run from a Perl script * to provide it with a faster method of finding all the files beneath a * directory than (for example) File::Find::find(). See "peg.pl" for details. * * To compile: * % gcc -O2 qfind.c * * Copyright (c) 2007-2014 Alex Davies. All rights reserved. This program * is free software; you can redistribute it and/or modify it under the * same terms as Perl itself. */ /******************************************************************************/ #define QFIND_VERSION "1.27" #define QFIND_COPYRIGHT "Copyright (c) 2007-2014 Alex Davies." /* Are we on a MS Windows platform? */ #if defined(_WIN32) && !defined(WIN32) #define WIN32 #endif #if defined(_WIN64) && !defined(WIN64) #define WIN64 #endif #if (defined(WIN32) || defined(WIN64)) && !defined(WINDOWS) #define WINDOWS #endif #if defined(WINDOWS) && defined(UNICODE) #error A UNICODE build is not supported #endif /* Qfind can be built on either Windows or Unix-like Oses. The Windows version * uses Find(First|Next)File to obtain filenames while the Unix version uses * opendir/readdir/stat. The Unix version can also be built on Windows; however, * it is only intended for (basic) testing purposes. */ #if defined(WINDOWS) && !defined(QFINDX) #define USE_FINDFILE 1 #else #define USE_FINDFILE 0 #endif /******************************************************************************/ #include #include #include #include #include #if defined(WINDOWS) #include #include #endif #if USE_FINDFILE #include #else #include #include #include #include #include #include #endif /******************************************************************************/ #if USE_FINDFILE #define QCODE(a, b) a #else #define QCODE(a, b) b #endif #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L #define STDC99 #endif #if !(defined(STDC99) || defined(__GNUC__) || defined(__cplusplus)) #define inline #endif #if defined(__GNUC__) #define GCC_ATTR(x) __attribute__ ((x)) #else #define GCC_ATTR(x) #endif #if defined(__cplusplus) #define CPP_CAST(type) (type) #else #define CPP_CAST(type) #endif #define UNUSED_ARG(x) ((void) x) /* To avoid a compiler warning. */ #ifdef _DIRENT_HAVE_D_TYPE #define HAS_D_TYPE #endif #if defined(HAS_D_TYPE) && defined(WINDOWS) && !USE_FINDFILE /* Allow test compiling the HAS_D_TYPE build on Windows. */ #define d_type d_ino #define DT_UNKNOWN 0 /* NB. d_ino is always 0 on Windows. */ #define DT_FIFO 1 #define DT_CHR 2 #define DT_DIR 4 #define DT_BLK 6 #define DT_REG 8 #define DT_LNK 10 #define DT_SOCK 12 #define DT_WHT 14 #endif /******************************************************************************/ /* Encapsulate a simple stack of strings. */ struct strstack { struct { /* A single buffer containing all the strings. */ char *buf; size_t size; } str; struct { /* Store each string's offset into the buffer. */ size_t *list; size_t max; size_t next; } offset; size_t total; /* # of strings stored. */ }; /* Initial stack sizes. */ #define DIR_STACK_ELEMENTS (1 * 1024) #define DIR_STACK_BUFSIZE (8 * DIR_STACK_ELEMENTS) #define FILE_STACK_ELEMENTS (4 * 1024) #define FILE_STACK_BUFSIZE (8 * FILE_STACK_ELEMENTS) /* Use a stack of directories to allow us to print all the files * in a directory before processing its subdirectories. */ static struct strstack dir_stack; /* A stack of filenames which is sorted prior to printing. */ static struct strstack file_stack; struct stack_state { size_t start_total; size_t end_total; size_t next_offset; }; /* A function for comparing strings. */ typedef int (*strcmp_fn_t)(const char *s1, const char *s2); /* The comparison function used to sort filenames. * We don't sort the filenames before printing by default when using * FindNextFile() because it already returns sorted results on NTFS. */ static strcmp_fn_t filename_cmp = QCODE(NULL, strcmp); #if USE_FINDFILE /* => WINDOWS */ static char slash = '/'; static char slosh = '\\'; /* The other slash. */ #else static const char slash = '/'; #endif /* Store various boolean options in a single variable. */ static unsigned int flags = 0; #if USE_FINDFILE /* Print classic 8.3 filenames eg. "filename.txt". */ #define TRUNCATED_FILENAMES (1u << 0) /* -t */ /* Skip SYSTEM|HIDDEN files & directories. */ #define SKIP_HIDDEN_FILES (1u << 1) /* -y */ #else /* By default the opendir/readdir build only prints regular files. * ie. skip sockets, character/block special files & named pipes. */ #define ANY_FILE (1u << 2) /* -a */ /* Skip non-readable files. */ #define SKIP_NON_READABLE_FILES_ALWAYS (1u << 3) /* -r */ #define SKIP_NON_READABLE_FILES_IF_FREE (1u << 4) /* -R */ #define SKIP_NON_READABLE_FILES \ (SKIP_NON_READABLE_FILES_ALWAYS | SKIP_NON_READABLE_FILES_IF_FREE) #endif /* Skip dot files & directories. */ #define SKIP_DOT_FILES (1u << 5) /* -o */ /* Silence messages to STDERR. */ #define SILENT (1u << 6) /* -s */ /* Print files matching given size restraints. */ #define MAX_SIZE_TEST (1u << 7) /* -K */ #define MIN_SIZE_TEST_ALWAYS (1u << 8) /* -z or -J */ #define MIN_SIZE_TEST_IF_FREE (1u << 9) /* -Z */ #define MIN_SIZE_TEST \ (MIN_SIZE_TEST_ALWAYS | MIN_SIZE_TEST_IF_FREE) #if USE_FINDFILE struct high_low { DWORD high, low; }; static struct high_low min_size_hl, max_size_hl; #else typedef unsigned long filesize_t; static filesize_t min_size, max_size; #endif /* Modification time tests. */ #define SKIP_NEW_FILES (1u << 10) /* -O */ #define SKIP_OLD_FILES (1u << 11) /* -N */ static time_t opt_N_time; static time_t opt_O_time; #define MTIME_TEST (SKIP_OLD_FILES | SKIP_NEW_FILES) #define CONSIDER_CTIME (1u << 12) /* -c */ /* Do we need to call is_exclude_extension() on each filename. */ #define EXTENSION_TEST (1u << 13) /* -e or -p */ /* Match/ignore files belonging to a given user/group id. */ #if !USE_FINDFILE #define UID_TEST_MATCH (1u << 14) /* -u */ #define UID_TEST_SKIP (1u << 15) /* -U */ #define GID_TEST_MATCH (1u << 16) /* -g */ #define GID_TEST_SKIP (1u << 17) /* -G */ /* One or more of -u/-U/-g/-G. */ #define ID_TEST (0 \ | UID_TEST_MATCH \ | UID_TEST_SKIP \ | GID_TEST_MATCH \ | GID_TEST_SKIP) typedef unsigned int id_t; static id_t opt_uid; static id_t opt_gid; #endif #if !USE_FINDFILE /* By default qfind ignores symolic links to directories. The -l/-L options * make qfind follow them; the difference between them is that -l will skip * directories it has already been in, while -L follows them all except where * that leads to recursion. */ #define FOLLOW_LINKS_NO_REPS (1u << 18) /* -l */ #define FOLLOW_LINKS_ALL (1u << 19) /* -L */ #define FOLLOW_LINKS (FOLLOW_LINKS_NO_REPS | FOLLOW_LINKS_ALL) /* The idea of not chdir()'ing came from GNU grep. Unfortunately It will fail * with ENAMETOOLONG if the pathname exceeds PATH_MAX. To handle arbitarily * deep directory trees means we have to chdir(). * It may still be useful on mounted filesystems. */ #define NO_CHDIR (1u << 20) /* -X */ /* Equivalent to GNU find's -xdev/-mount option. */ #define XDEV (1u << 21) /* -m */ #endif /* Qfind is really all about printing (regular) files and not directories. * However, since it is occasionally useful to have a list of directories, and * since it fairly simple to tack this capability onto qfind, here it is... * NB. The mnemonic for -+ is the "+"s seen in file/directory tree views. */ #define PRINT_DIRS_ALSO (1u << 22) /* -+ */ #define PRINT_DIRS_ONLY (1u << 23) /* -: */ #define PRINT_DIRS (PRINT_DIRS_ALSO | PRINT_DIRS_ONLY) #define DEPTH_TEST (1u << 24) /* -E */ static int depth; static int max_depth; /* Provide a simple "grep -rFl STR DIR" literal string matching capability. */ #define MATCH_STR_TEST (1u << 25) /* -M */ #define NOCASE_MATCH_STR_TEST (1u << 26) /* -Y */ /* stbm = "Self-Tuning Boyer-Moore". */ #define STBM_READ_SIZE (32 * 1024) #define STBM_MATCH_STR_BUFSIZE 128 /* We're not GNU grep! */ #define STBM_BUFFER_SIZE ((2 * STBM_MATCH_STR_BUFSIZE) + STBM_READ_SIZE) struct stbm_data { unsigned int skip_table[UCHAR_MAX + 1]; unsigned int md2; unsigned int match_str_len; char match_str[STBM_MATCH_STR_BUFSIZE]; char buffer[STBM_BUFFER_SIZE]; }; static struct stbm_data *stbm = NULL; /* If using -: or -+ provide a way to show directories with a trailing slash. */ #define PRINT_DIR_TRAILING_SLASH (1u << 27) /* -/ */ /* When running "qfind DIR", do not print the leading DIR part. */ #define PRINT_TAIL (1u << 28) /* -T */ static size_t argv_dir_len; /* Print the directory name after its contents. This mimics GNU find's -depth. * NB. This implies -+ unless -: has already been specified. */ #define PRINT_DIR_LAST (1u << 29) /* -B */ /* Print each filename followed by a null separator. As the name suggests, this * is equivalent to GNU find's "-print0" option. */ #define PRINT0 (1u << 30) /* -0 */ #ifdef HAS_D_TYPE /* It is not always necessary to stat() the file if its d_type is DT_REG. */ #define NEED_TO_CALL_STAT (0 \ | MAX_SIZE_TEST \ | MIN_SIZE_TEST_ALWAYS \ | SKIP_NON_READABLE_FILES_ALWAYS \ | MTIME_TEST \ | MATCH_STR_TEST \ | ID_TEST) #endif /* We need to determine if a given string is among a list of other strings. * To speed lookup, after initialization the strings are sorted and then we * build a table keyed by the first character of member strings which gives the * first index in the strstack of strings with that first character. */ struct lookup_list { struct strstack ss; size_t firstchar[UCHAR_MAX + 1]; /* A primitive hash table. */ }; /* Value of firstchar[UCHAR(c)] indicating no string in the lookup_list with * a first character of 'c'. */ #define NONE ((size_t) -1) #define EMPTY_STACK ((size_t) -1) static int match_extension; /* True if -p option is used. */ /* A list of file extensions. If "match_extension" is true we only match files * in this list, else we only match files whose extension is not in the list. */ static struct lookup_list xp; /* A list of directories to ignore eg. ".git". */ static struct lookup_list xd; /* A list of directory paths to ignore. */ static struct lookup_list xD; /* A list of filenames to ignore. */ static struct lookup_list xv; /* Data needed by cmp_ss(), the qsort comparison function. */ static struct { struct strstack *ssp; strcmp_fn_t cmp; } sort_data; /* The name of the directory currently being processed. */ static char *dirname = NULL; static size_t dirname_bufsize; static size_t dirname_len; #define DIRNAME_BUFSIZE 280 #if !USE_FINDFILE /* The name of the starting directory. This is only needed if multiple DIR * command line arguments are specified. */ static char *start_dir = NULL; /* If symbolic links to directories are followed, it is possible to enter an * infinite loop if a link points back to a directory containing that link. * To avoid this we store ids of the directories processed. */ struct dirid { ino_t ino; dev_t dev; }; struct dirids { struct dirid *list; size_t size; size_t total; }; /* To reduce lookup time, use a fixed size array of lists keyed by the ino. */ #define DIRID_LIST_SIZE 31 #define DIRID_LIST_IDX(ino, dev) (((unsigned int) ino) % DIRID_LIST_SIZE) static struct dirids dirid_list[DIRID_LIST_SIZE]; /* Data stored after directory names in dir_stack. */ struct dir_info { struct dirid di; int is_a_link; }; /* A function type representing stat() and lstat(). */ typedef int (*stat_fn_t)(const char *f, struct stat *statp); /* The device number for the current top directory. */ static dev_t start_dev; /* GNU find uses O_LARGEFILE when open'ing a file. We copy it. */ #ifdef O_LARGEFILE #define OPEN_READONLY_FLAGS (O_RDONLY | O_LARGEFILE) #else #define OPEN_READONLY_FLAGS O_RDONLY #endif #endif #if !defined(WINDOWS) /* A buffer to hold the filenames written to stdout. */ #if defined(BUFSIZ) && BUFSIZ >= 512 #define QF_BUFSIZ BUFSIZ #else #define QF_BUFSIZ 4096 #endif static char stdout_buffer[QF_BUFSIZ]; #endif /******************************************************************************/ typedef QCODE(DWORD, int) err_t; /* GetLastError() or errno. */ typedef QCODE(WIN32_FIND_DATA, struct stat) filedata_t; typedef QCODE(HANDLE, int) fh_t; /* A file handle/descriptor. */ static void setup(void); static int process_argv(int argc, char *argv[]); static int process_option(char *arg); static void prepare(void); static void print_help(void); static void scan_dir(void); #if !USE_FINDFILE static void scan_dir2(const char *d, int *cwd_fdp, size_t old_dirname_len); static void close_cwd_fd(int fd); #endif static inline void save_stack_state(struct stack_state *p, const struct strstack *ssp); static inline void restore_stack_state(const struct stack_state *p, struct strstack *ssp); static void print_files(void) GCC_ATTR(noinline); static void print_file_stack(void); static void flush_output(void); static void found_file(const char *f); static void write_dirname(void); static void print_directory(void); static void dirwarn(const char *msg, err_t err); static char * remove_trailing_slash(void); static char * get_error_msg(err_t err); #ifdef QFIND_CLEAN_UP static void clean_up(void); static void free_ss(struct strstack *ssp); static void free_ll(struct lookup_list *llp); #if !USE_FINDFILE static void free_dirid_list(void); #endif #endif static void terminate(int status) GCC_ATTR(noreturn); static void die_msg(const char *msg) GCC_ATTR(noreturn); static void die_bad_arg(const char *name, const char *arg) GCC_ATTR(noreturn); static size_t get_alloc_size(size_t needed); static void * Malloc(size_t size); static void * Realloc(void *ptr, size_t size); static void out_of_memory(void) GCC_ATTR(noreturn); static void init_ss(struct strstack *ssp); static inline void alloc_ss(struct strstack *ssp, size_t elements, size_t bufsize); static void init_ll(struct lookup_list *xptr); static void init_ll_firstchar(struct lookup_list *xptr); static void push_ss(struct strstack *ssp, const char *str); #if !USE_FINDFILE static void push_ss_x(struct strstack *ssp, const char *str, const void *data_ptr, size_t data_size); #endif static void grow_ss(struct strstack *ssp, size_t elements, size_t size); static inline char * get_str(const struct strstack *ssp, size_t i); static void clear_ss(struct strstack *ssp); static void sort_ss(struct strstack *ssp, size_t from, size_t n, strcmp_fn_t cmp); static void sort_filenames(struct strstack *ssp, size_t from, size_t n); static void remove_reps(struct lookup_list *xptr); static int stri2cmp(const char *s1, const char *s2); static int cmp_ss(const void *elem1, const void *elem2); static int mystricmp(const char *s1, const char *s2); static int strnumcmp(const char *s1, const char *s2); static void parse_name_list(const char *arg, struct lookup_list *xptr); static void parse_ext_arg(char *arg, char opt); static void parse_time_arg(const char *arg, char opt); static void parse_size_arg(const char *arg, char opt); static void parse_depth_arg(const char *arg); static void parse_match_arg(const char *arg); static void stbm_init(void); static int file_size_ok(const filedata_t *fdp); static fh_t open_file(const char *f); static int fill_read_buffer(fh_t fh, char *read_buf); static int invalid_fh(fh_t fh); static inline err_t get_err(void); static int close_fh_ok(fh_t fh); static int file_contains_match_str(const char *f, const filedata_t *fdp) GCC_ATTR(noinline); static int buffer_contains_match_str(const char *buf, size_t buf_len); static void stbm_setup_sentinels(int n); static int stbm_find(const char *haystack, size_t h_len, const char *needle, size_t n_len); static int stbm_find_nocase(const char *haystack, size_t h_len, const char *needle, size_t n_len); static void fix_xD(void); #if !USE_FINDFILE static char * get_cwd(void); static void chdir_to_start_dir(void); static void parse_id_arg(const char *arg, char opt); static void stat_failed(const char *f); static void lstat_failed(const char *f); static void do_stat_failed(const char *f, const char *func); static time_t get_mtime_from_stat_data(const struct stat *statp); static void clear_dir_info(struct dir_info *dinfop); static int open_cwd(int *cwd_fdp, size_t old_dirname_len); static int chdir_into(const char *d); static void chdir_up(int fd); static void dirdie(const char *msg, err_t err) GCC_ATTR(noreturn); static DIR * do_opendir(void); static int do_stat(const char *f, struct stat *statp); static int do_lstat(const char *f, struct stat *statp); static int do_stat2(const char *f, struct stat *statp, stat_fn_t stat_func); static void found_dir(const char *str, const struct stat *statp, int is_a_link); static void push_dir_ss(const char *str, const struct stat *statp, int is_a_link); static void init_dirid_list(void); static void clear_dirid_list(void); static void push_dirid_list(ino_t ino, dev_t dev); static void pop_dirid_list(ino_t ino, dev_t dev); static int in_dirid_list(ino_t ino, dev_t dev); static void get_dir_info(const char *d, struct dir_info *dinfop); #endif static void skip_zero_size_files(const char opt); static int lookup(const struct lookup_list *xptr, const char *name); static int lookup_i(const struct lookup_list *xptr, const char *name); static int is_exclude_dir(const char *f); static int is_exclude_extension(const char *f); static int is_dirname_excluded(void); static int is_filename_excluded(const char *f); static void alloc_dirname(size_t bufsize); static void append_dirname(const char *d, size_t old_dirname_len); static int set_dirname(const char *dir); static void restore_dirname(size_t len); static void chomp_dirname_trailing_slash(void); static void lc_str(char *str); #if USE_FINDFILE static HANDLE get_first_file(WIN32_FIND_DATA *fDatap); static void warn_on_reparse_point(const char *f); static time_t filetime_to_timet(const FILETIME *filetimep); static BOOL filetime_from_time(PFILETIME pFileTime, time_t Time); static time_t get_mtime_from_find_data(const WIN32_FIND_DATA *fDatap); static time_t to_utc_time(time_t t); static void consistent_slashes(char *path); static inline int high_low_cmp(DWORD ah, DWORD al, DWORD bh, DWORD bl); #define high_low_gt(ah, al, bh, bl) (high_low_cmp(ah, al, bh, bl) > 0) #define high_low_lt(ah, al, bh, bl) (high_low_cmp(ah, al, bh, bl) < 0) #endif static inline int is_digit(int c); static inline int to_lower(int c); static int to_upper(int c); /* Not used in critical loops. */ /******************************************************************************/ /* Provide wrappers around the standard strto* functions that simplify testing * for an error. */ #define define_strto_function(name, strto_func, result_type) \ static int \ name(const char *arg, result_type *resultp) \ { \ char *end = NULL; \ int ok; \ \ errno = 0; \ *resultp = strto_func(arg, &end, 10); \ if (!*arg || errno || !end || *end) \ ok = 0; \ else \ ok = 1; \ return ok; \ } define_strto_function(parse_l, strtol, long) define_strto_function(parse_ul, strtoul, unsigned long) /******************************************************************************/ #define strEQ(s1, s2) (strcmp(s1, s2) == 0) #define strNE(s1, s2) (strcmp(s1, s2) != 0) /* Get the number of elements in an array. */ #define NELEMS(ary) (sizeof(ary) / sizeof(ary[0])) /******************************************************************************/ #define Swap(T, a, b) \ do { \ T temp = a; \ a = b; \ b = temp; \ } while (0) /******************************************************************************/ #if defined(WINDOWS) #define lstat stat /* There is no lstat() on Windows. */ #endif /******************************************************************************/ #if UCHAR_MAX != 255 #undef QFIND_ASCII #endif #ifdef QFIND_ASCII /* Provide fast equivalents for 's tolower() and isdigit() assuming * the ASCII character set. */ /* Map ASCII characters to their lower case versions. */ static const unsigned char lc[256] = { 0, 1, 2, 3, 4, 5, 6, 7, /* 0-7 */ 8, 9, 10, 11, 12, 13, 14, 15, /* 8-15 */ 16, 17, 18, 19, 20, 21, 22, 23, /* 16-23 */ 24, 25, 26, 27, 28, 29, 30, 31, /* 24-31 */ 32, 33, 34, 35, 36, 37, 38, 39, /* 32-39 */ 40, 41, 42, 43, 44, 45, 46, 47, /* 40-47 */ 48, 49, 50, 51, 52, 53, 54, 55, /* 48-55 */ 56, 57, 58, 59, 60, 61, 62, 63, /* 56-63 */ 64, 97, 98, 99, 100, 101, 102, 103, /* 64-71 */ 104, 105, 106, 107, 108, 109, 110, 111, /* 72-79 */ 112, 113, 114, 115, 116, 117, 118, 119, /* 80-87 */ 120, 121, 122, 91, 92, 93, 94, 95, /* 88-95 */ 96, 97, 98, 99, 100, 101, 102, 103, /* 96-103 */ 104, 105, 106, 107, 108, 109, 110, 111, /* 104-111 */ 112, 113, 114, 115, 116, 117, 118, 119, /* 112-119 */ 120, 121, 122, 123, 124, 125, 126, 127, /* 120-127 */ 128, 129, 130, 131, 132, 133, 134, 135, /* 128-135 */ 136, 137, 138, 139, 140, 141, 142, 143, /* 136-143 */ 144, 145, 146, 147, 148, 149, 150, 151, /* 144-151 */ 152, 153, 154, 155, 156, 157, 158, 159, /* 152-159 */ 160, 161, 162, 163, 164, 165, 166, 167, /* 160-167 */ 168, 169, 170, 171, 172, 173, 174, 175, /* 168-175 */ 176, 177, 178, 179, 180, 181, 182, 183, /* 176-183 */ 184, 185, 186, 187, 188, 189, 190, 191, /* 184-191 */ 192, 193, 194, 195, 196, 197, 198, 199, /* 192-199 */ 200, 201, 202, 203, 204, 205, 206, 207, /* 200-207 */ 208, 209, 210, 211, 212, 213, 214, 215, /* 208-215 */ 216, 217, 218, 219, 220, 221, 222, 223, /* 216-223 */ 224, 225, 226, 227, 228, 229, 230, 231, /* 224-231 */ 232, 233, 234, 235, 236, 237, 238, 239, /* 232-239 */ 240, 241, 242, 243, 244, 245, 246, 247, /* 240-247 */ 248, 249, 250, 251, 252, 253, 254, 255, /* 248-255 */ }; /*! Optimized for is_digit(). */ #define U 0 /*! 0x01 */ /* upper */ #define L 0 /*! 0x02 */ /* lower */ #define D 1 /*! 0x04 */ /* digit */ #define S 0 /*! 0x08 */ /* space */ #define P 0 /*! 0x10 */ /* punct */ #define C 0 /*! 0x20 */ /* cntrl */ #define B 0 /*! 0x40 */ /* blank */ #define H 0 /*! 0x80 */ /* hex */ /* ASCII character info. */ static const unsigned char ctype[256] = { C, C, C, C, C, C, C, C, /* 0-7 */ C, S|C|B, S|C, S|C, S|C, S|C, C, C, /* 8-15 */ C, C, C, C, C, C, C, C, /* 16-23 */ C, C, C, C, C, C, C, C, /* 24-31 */ S|B, P, P, P, P, P, P, P, /* 32-39 */ P, P, P, P, P, P, P, P, /* 40-47 */ D|H, D|H, D|H, D|H, D|H, D|H, D|H, D|H, /* 48-55 */ D|H, D|H, P, P, P, P, P, P, /* 56-63 */ P, U|H, U|H, U|H, U|H, U|H, U|H, U, /* 64-71 */ U, U, U, U, U, U, U, U, /* 72-79 */ U, U, U, U, U, U, U, U, /* 80-87 */ U, U, U, P, P, P, P, P, /* 88-95 */ P, L|H, L|H, L|H, L|H, L|H, L|H, L, /* 96-103 */ L, L, L, L, L, L, L, L, /* 104-111 */ L, L, L, L, L, L, L, L, /* 112-119 */ L, L, L, P, P, P, P, C, /* 120-127 */ /* The rest are 0. */ }; #else #include #include #endif /* XXX The QFIND_ASCII versions of is_digit() and to_lower() do not handle EOF. * However, this is not a problem in this program. */ static inline int is_digit(int c) { #ifdef QFIND_ASCII return ctype[c] /*! & D */; #else return isdigit(c); #endif } static inline int to_lower(int c) { #ifdef QFIND_ASCII return lc[c]; #else return tolower(c); #endif } static int to_upper(int c) { #ifdef QFIND_ASCII if (c >= 97 && c <= 122) /* 'a' .. 'z' */ c -= 32; return c; #else return toupper(c); #endif } #undef U #undef L #undef D #undef S #undef P #undef C #undef B #undef H #define UCHAR(c) ((unsigned char) (c)) #define izdigit(c) (is_digit(UCHAR(c))) #define lowercase(c) (to_lower(UCHAR(c))) #define uppercase(c) (to_upper(UCHAR(c))) /******************************************************************************/ int main(int argc, char *argv[]) { int i, first_dir_arg_idx, num_dir_args; setup(); num_dir_args = process_argv(argc, argv); prepare(); first_dir_arg_idx = argc - num_dir_args; if (!num_dir_args) argv[argc++] = (char *) ""; /* Fake an extra command line argument. */ #if !USE_FINDFILE if (num_dir_args > 1 && !(flags & NO_CHDIR)) start_dir = get_cwd(); #endif for (i = first_dir_arg_idx; i < argc; ++i) { const char *dir = argv[i]; if (!set_dirname(dir)) continue; scan_dir(); } terminate(EXIT_SUCCESS); return 0; } /******************************************************************************/ /* Initialize the stacks etc. */ static void setup(void) { init_ll(&xd); init_ll(&xD); init_ll(&xp); init_ll(&xv); init_ss(&dir_stack); init_ss(&file_stack); #if !USE_FINDFILE init_dirid_list(); #endif #ifndef QFIND_ASCII /* XXX If the filenames are in UTF-8, then this is wrong. */ /* Use the current locale's tolower(). */ setlocale(LC_CTYPE, ""); #endif /* Set stdout to provide full buffering. */ #if defined(WINDOWS) /* On Win32 _IOLBF == _IOFBF ie. nothing to be done. */ #else errno = 0; if (setvbuf(stdout, stdout_buffer, _IOFBF, sizeof(stdout_buffer))) { fprintf(stderr, "qfind: setvbuf failed: %s\n", strerror(errno)); terminate(EXIT_FAILURE); } #endif #if defined(WINDOWS) /* Write LF newlines, not CRLFs. */ if (setmode(fileno(stdout), O_BINARY) == -1) { fprintf(stderr, "qfind: failed to binmode output: %s\n", strerror(errno)); terminate(EXIT_FAILURE); } #endif } /******************************************************************************/ /* Process the command line arguments. */ static int process_argv(int argc, char *argv[]) { if (argc <= 0) return 0; ++argv; --argc; /* Skip program name (assume it is "qfind"). */ while (argc > 0) { if (argv[0][0] == '-') { char *arg = argv[0]; ++argv; --argc; if (process_option(arg)) continue; } break; } return argc; /* # of remaining non option (ie. DIR) arguments. */ } /******************************************************************************/ /* Process an option argument. */ static int process_option(char *arg) { char opt; if (*arg++ != '-') return 0; /* Handle "--" as a special case. */ if (arg[0] == '-' && arg[1] == '\0') return 0; while ((opt = *arg++) != '\0') { switch (opt) { #if USE_FINDFILE case 'b': slash = '\\'; slosh = '/'; break; case 't': flags |= TRUNCATED_FILENAMES; break; case 'y': flags |= SKIP_HIDDEN_FILES; break; #else case 'a': flags |= ANY_FILE; break; case 'g': case 'G': case 'u': case 'U': parse_id_arg(arg, opt); goto done; case 'l': flags |= FOLLOW_LINKS_NO_REPS; flags &= ~FOLLOW_LINKS_ALL; /* cf. -Ll */ break; case 'L': flags |= FOLLOW_LINKS_ALL; flags &= ~FOLLOW_LINKS_NO_REPS; /* cf. -lL */ break; case 'm': flags |= XDEV; break; case 'r': flags |= SKIP_NON_READABLE_FILES_ALWAYS; break; case 'R': flags |= SKIP_NON_READABLE_FILES_IF_FREE; break; #endif case 'c': flags |= CONSIDER_CTIME; break; case 'd': parse_name_list(arg, &xd); goto done; case 'e': case 'p': parse_ext_arg(arg, opt); goto done; case 'h': print_help(); terminate(EXIT_SUCCESS); break; case 'i': filename_cmp = mystricmp; break; case 'n': filename_cmp = strnumcmp; break; case 'o': flags |= SKIP_DOT_FILES; break; case 's': flags |= SILENT; break; case 'v': parse_name_list(arg, &xv); goto done; case 'x': filename_cmp = NULL; break; case 'z': case 'Z': skip_zero_size_files(opt); break; case 'B': flags |= PRINT_DIR_LAST; if (!(flags & PRINT_DIRS_ONLY)) flags |= PRINT_DIRS_ALSO; break; case 'D': parse_name_list(arg, &xD); goto done; case 'E': parse_depth_arg(arg); goto done; case 'J': case 'K': parse_size_arg(arg, opt); goto done; case 'M': parse_match_arg(arg); goto done; case 'N': case 'O': parse_time_arg(arg, opt); goto done; case 'S': filename_cmp = strcmp; break; case 'T': flags |= PRINT_TAIL; break; case 'V': puts("qfind v" QFIND_VERSION " " QFIND_COPYRIGHT); terminate(EXIT_SUCCESS); break; case 'X': #if !USE_FINDFILE flags |= NO_CHDIR; #endif break; case 'Y': flags |= NOCASE_MATCH_STR_TEST; break; case '0': flags |= PRINT0; break; case ':': flags |= PRINT_DIRS_ONLY; flags &= ~PRINT_DIRS_ALSO; /* cf. -+: */ break; case '+': flags |= PRINT_DIRS_ALSO; flags &= ~PRINT_DIRS_ONLY; /* cf. -:+ */ break; case '/': flags |= PRINT_DIR_TRAILING_SLASH; break; default: fprintf(stderr, "qfind: unknown option '%c'\n", opt); terminate(EXIT_FAILURE); break; } } done: return 1; } /******************************************************************************/ /* Do any post command line argument processing setup here. */ static void prepare(void) { if (flags & MATCH_STR_TEST) stbm_init(); /* If both -N & -O, or -J & -K, are specified then assume the ranges are * intended to be between the two given values. */ if ((flags & MTIME_TEST) == MTIME_TEST) if (opt_N_time > opt_O_time) Swap(time_t, opt_N_time, opt_O_time); if ((flags & MIN_SIZE_TEST) && (flags & MAX_SIZE_TEST)) { #if USE_FINDFILE if (high_low_gt(min_size_hl.high, min_size_hl.low, max_size_hl.high, max_size_hl.low)) Swap(struct high_low, max_size_hl, min_size_hl); #else if (min_size > max_size) Swap(filesize_t, max_size, min_size); #endif } alloc_dirname(DIRNAME_BUFSIZE); alloc_ss(&dir_stack, DIR_STACK_ELEMENTS, DIR_STACK_BUFSIZE); if (filename_cmp) alloc_ss(&file_stack, FILE_STACK_ELEMENTS, FILE_STACK_BUFSIZE); fix_xD(); init_ll_firstchar(&xD); init_ll_firstchar(&xp); init_ll_firstchar(&xd); init_ll_firstchar(&xv); } /******************************************************************************/ /* Print the (regular) files in a directory, then recurse on its subdirectories. * NB. dirname is either "" or terminated with a slash. */ static void scan_dir(void) { size_t i, num_dirs, this_dirname_len; struct stack_state dss; #if !USE_FINDFILE int cwd_fd = -1; /* Only used if FOLLOW_LINKS. */ #endif save_stack_state(&dss, &dir_stack); if (!(flags & PRINT_DIR_LAST)) print_directory(); print_files(); dss.end_total = dir_stack.total; num_dirs = dss.end_total - dss.start_total; sort_filenames(&dir_stack, dss.start_total, num_dirs); this_dirname_len = dirname_len; ++depth; for (i = dss.start_total; i < dss.end_total; ++i) { const char *d = get_str(&dir_stack, i); append_dirname(d, this_dirname_len); if (is_dirname_excluded()) continue; #if USE_FINDFILE scan_dir(); #else scan_dir2(d, &cwd_fd, this_dirname_len); #endif } --depth; #if !USE_FINDFILE close_cwd_fd(cwd_fd); #endif restore_dirname(this_dirname_len); if (flags & PRINT_DIR_LAST) print_directory(); restore_stack_state(&dss, &dir_stack); } /******************************************************************************/ static inline void save_stack_state(struct stack_state *p, const struct strstack *ssp) { p->next_offset = ssp->offset.next; p->start_total = ssp->total; } /******************************************************************************/ static inline void restore_stack_state(const struct stack_state *p, struct strstack *ssp) { ssp->offset.next = p->next_offset; ssp->total = p->start_total; } /******************************************************************************/ /* Print the files in a directory according to the command line options. * NB. dirname is either "" or terminated with a slash. */ static void print_files(void) { #if USE_FINDFILE const char *f, *s; WIN32_FIND_DATA fData; HANDLE find_han; if ((find_han = get_first_file(&fData)) == INVALID_HANDLE_VALUE) { dirwarn("", GetLastError()); return; } while (1) { if (!*fData.cAlternateFileName) f = fData.cFileName; else if (flags & TRUNCATED_FILENAMES) f = fData.cAlternateFileName; else { /* Any "?"s in cFileName indicate characters not in current code * page; in which case use cAlternateFileName instead. */ f = fData.cFileName; for (s = f; *s; ++s) { if (*s == '?') { f = fData.cAlternateFileName; break; } } } if ((flags & SKIP_DOT_FILES) && f[0] == '.') goto next_file; if ((flags & SKIP_HIDDEN_FILES) && (fData.dwFileAttributes & (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM))) goto next_file; if (fData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { if (f[0] == '.' && (!f[1] || (f[1] == '.' && !f[2]))) goto next_file; /* Ignore "." and ".." directories. */ if (fData.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) { /* Avoid a potentially recursive directory tree. */ warn_on_reparse_point(f); goto next_file; } if ((flags & DEPTH_TEST) && depth >= max_depth) goto next_file; if (is_exclude_dir(f)) goto next_file; /* Found a directory to process. */ push_ss(&dir_stack, f); goto next_file; } if (flags & PRINT_DIRS_ONLY) goto next_file; if ((flags & MIN_SIZE_TEST) && high_low_lt(fData.nFileSizeHigh, fData.nFileSizeLow, min_size_hl.high, min_size_hl.low)) goto next_file; if ((flags & MAX_SIZE_TEST) && high_low_gt(fData.nFileSizeHigh, fData.nFileSizeLow, max_size_hl.high, max_size_hl.low)) goto next_file; if (flags & MTIME_TEST) { time_t t = get_mtime_from_find_data(&fData); if ( ((flags & SKIP_OLD_FILES) && t < opt_N_time) || ((flags & SKIP_NEW_FILES) && t > opt_O_time)) goto next_file; } if ((flags & EXTENSION_TEST) && is_exclude_extension(f)) goto next_file; if (is_filename_excluded(f)) goto next_file; if (flags & MATCH_STR_TEST) if (!file_contains_match_str(f, &fData)) goto next_file; found_file(f); next_file: if (!FindNextFile(find_han, &fData)) { const DWORD err = GetLastError(); if (err != ERROR_NO_MORE_FILES) dirwarn("FindNextFile error on ", err); break; } } if (FindClose(find_han) == 0) dirwarn("FindClose error on ", GetLastError()); #else DIR *dirp; struct dirent *dp; struct stat statbuf; const char *f; int is_a_link; if ((dirp = do_opendir()) == NULL) { dirwarn("can't opendir ", errno); return; } while (1) { errno = 0; if ((dp = readdir(dirp)) == NULL) { if (errno) dirwarn("can't readdir ", errno); break; } f = dp->d_name; /* Ignore "." and ".." directories. */ if (f[0] == '.') if (!f[1] || (f[1] == '.' && !f[2]) || (flags & SKIP_DOT_FILES)) continue; #ifdef HAS_D_TYPE /* Use d_type to save calling stat() and lstat() when possible. */ switch (dp->d_type) { case DT_DIR: if (!(flags & (FOLLOW_LINKS | XDEV))) goto f_is_dir; break; case DT_FIFO: case DT_CHR: case DT_BLK: case DT_SOCK: if (!(flags & ANY_FILE)) /* We don't show non regular files. */ continue; /* No break; drop through to regular file logic. */ case DT_REG: if (!(flags & NEED_TO_CALL_STAT)) goto non_statbuf_tests; break; case DT_LNK: /* Must do a stat() to see what type of file it links to. */ case DT_UNKNOWN: default: /* Must do a stat() to see what type of file it is. */ break; case DT_WHT: /* "A whiteout is a directory entry that hides any identically named * objects in the lower layers. The semantics of a whiteout for a * given name is that a lookup on that name should return ENOENT." * So, qfind should (probably?) ignore them. */ continue; } #endif /* NB. In the case of symbolic links, qfind's file tests relate to * the file *linked to*, so use stat() and not lstat(). */ if (do_stat(f, &statbuf)) { stat_failed(f); continue; } if (S_ISDIR(statbuf.st_mode)) { if ((flags & XDEV) && statbuf.st_dev != start_dev) continue; #ifdef HAS_D_TYPE f_is_dir: #endif if ((flags & DEPTH_TEST) && depth >= max_depth) continue; if (is_exclude_dir(f)) continue; /* Need to determine if it's a link to a directory. */ #ifdef HAS_D_TYPE if (dp->d_type == DT_LNK) is_a_link = 1; else if (dp->d_type == DT_DIR) is_a_link = 0; else #endif { /* NB. we can't reuse statbuf here since if it is a link to a * directory, we care about the ino/dev of the directory itself * and not the link to it. */ struct stat lstatbuf; if (do_lstat(f, &lstatbuf)) { lstat_failed(f); continue; } is_a_link = !S_ISDIR(lstatbuf.st_mode); } if (is_a_link && !(flags & FOLLOW_LINKS)) continue; found_dir(f, &statbuf, is_a_link); continue; } if (!(S_ISREG(statbuf.st_mode) || (flags & ANY_FILE))) continue; if ((flags & MIN_SIZE_TEST) && (filesize_t) statbuf.st_size < min_size) continue; if ((flags & MAX_SIZE_TEST) && (filesize_t) statbuf.st_size > max_size) continue; if ((flags & SKIP_NON_READABLE_FILES) && !(statbuf.st_mode & S_IREAD)) continue; if (flags & MTIME_TEST) { time_t t = get_mtime_from_stat_data(&statbuf); if ( ((flags & SKIP_OLD_FILES) && t < opt_N_time) || ((flags & SKIP_NEW_FILES) && t > opt_O_time)) continue; } if (flags & ID_TEST) { id_t uid = statbuf.st_uid; id_t gid = statbuf.st_gid; if ( ((flags & UID_TEST_MATCH) && uid != opt_uid) || ((flags & UID_TEST_SKIP) && uid == opt_uid) || ((flags & GID_TEST_MATCH) && gid != opt_gid) || ((flags & GID_TEST_SKIP) && gid == opt_gid)) continue; } #ifdef HAS_D_TYPE non_statbuf_tests: #endif if (flags & PRINT_DIRS_ONLY) continue; if ((flags & EXTENSION_TEST) && is_exclude_extension(f)) continue; if (is_filename_excluded(f)) continue; if (flags & MATCH_STR_TEST) if (!file_contains_match_str(f, &statbuf)) continue; found_file(f); } if (closedir(dirp) == -1) dirwarn("can't closedir ", errno); #endif if (file_stack.total) { if (file_stack.total != EMPTY_STACK) print_file_stack(); clear_ss(&file_stack); flush_output(); } } /******************************************************************************/ /* NB. We reuse the dirname buffer for writing "{dirname}{f}\n". */ static void print_file_stack(void) { size_t i, this_dirname_len = dirname_len; /* NB. filename_cmp will be non-NULL here. */ sort_ss(&file_stack, 0, file_stack.total, filename_cmp); for (i = 0; i < file_stack.total; ++i) { const char *f = get_str(&file_stack, i); append_dirname(f, this_dirname_len); write_dirname(); } restore_dirname(this_dirname_len); } /******************************************************************************/ /* We want output to be flushed to the caller asap; but calling fflush() * after every set of fwrite()s can be overkill. */ static void flush_output(void) { static unsigned int n = 0; /* If any of these flags are set then always flush. */ #define FLUSH_FLAGS (MTIME_TEST | MATCH_STR_TEST | PRINT_DIRS_ONLY) if (n < 10 || (flags & FLUSH_FLAGS)) { ++n; if (fflush(stdout) != 0) { fprintf(stderr, "qfind: fflush error: %s\n", strerror(errno)); terminate(EXIT_FAILURE); } } /* Else leave stdio to write output when buffer is full. */ } /******************************************************************************/ /* In directories containing many thousands of files it will be faster to turn * off sorting and write the filenames as we get them. */ static void found_file(const char *f) { if (filename_cmp) { push_ss(&file_stack, f); } else { size_t this_dirname_len = dirname_len; append_dirname(f, this_dirname_len); write_dirname(); restore_dirname(this_dirname_len); file_stack.total = EMPTY_STACK; /* Indicate files found & printed. */ } } /******************************************************************************/ static void write_dirname(void) { size_t len, written; const char *str; const char sep = (flags & PRINT0) ? '\0' : '\n'; /* Replace trailing slash with the newline or null separator. */ dirname[dirname_len - 1] = sep; str = dirname; len = dirname_len; if (flags & PRINT_TAIL) { str += argv_dir_len; len -= argv_dir_len; } written = fwrite(str, 1, len, stdout); /* Check for an output error cf. "qfind | more". */ if (written != len) terminate(EXIT_FAILURE); } /******************************************************************************/ static void print_directory(void) { if (flags & PRINT_DIRS) { size_t this_dirname_len = dirname_len; char *slashp = NULL; if (dirname_len == 0) { /* "" -> "./" */ dirname_len = 2; dirname[0] = '.'; dirname[1] = slash; /* No need for a terminating '\0' here. */ } /* NB. write_dirname() assumes the final character of dirname is a * slash, and replaces it with a newline. */ if (!(flags & PRINT_DIR_TRAILING_SLASH)) slashp = remove_trailing_slash(); /* NB. dirname_len unchanged! */ if (!slashp) ++dirname_len; /* Print dirname's final character. */ write_dirname(); restore_dirname(this_dirname_len); if (slashp) *slashp = slash; /* Harmless if dirname_len is zero. */ } } /******************************************************************************/ #if USE_FINDFILE static HANDLE get_first_file(WIN32_FIND_DATA *fDatap) { char *dirname_end = dirname + dirname_len; HANDLE find_han; /* Append a "*" to dirname so it can be used as a glob pattern. */ dirname_end[0] = '*'; dirname_end[1] = '\0'; find_han = FindFirstFile(dirname, fDatap); /* Remove temporary "*" from dirname. */ dirname_end[0] = '\0'; return find_han; } /******************************************************************************/ /* XXX Reparse point directories probably ought to have their file contents * listed, but their subdirectories ignored. For now just ignore them. */ static void warn_on_reparse_point(const char *f) { size_t this_dirname_len = dirname_len; append_dirname(f, this_dirname_len); dirwarn("ignoring reparse point directory ", 0); restore_dirname(this_dirname_len); } #endif /******************************************************************************/ #if !USE_FINDFILE static DIR * do_opendir(void) { const char *d; if ((flags & NO_CHDIR) && dirname_len > 0) d = dirname; else d = "."; return opendir(d); } /******************************************************************************/ static int do_stat(const char *f, struct stat *statp) { return do_stat2(f, statp, stat); } /******************************************************************************/ static int do_lstat(const char *f, struct stat *statp) { return do_stat2(f, statp, lstat); } /******************************************************************************/ static int do_stat2(const char *f, struct stat *statp, stat_fn_t stat_func) { int result; if (flags & NO_CHDIR) { size_t this_dirname_len = dirname_len; append_dirname(f, this_dirname_len); dirname[dirname_len - 1] = '\0'; /* Erase trailing slash. */ result = stat_func(dirname, statp); restore_dirname(this_dirname_len); } else { result = stat_func(f, statp); } return result; } /******************************************************************************/ /* Some of the stat error cases are not relevent for the purposes that qfind is * intended for (listing regular files) and can obscure the more interesting * warnings. */ static void stat_failed(const char *f) { switch (errno) { case ENOENT: #ifdef ELOOP case ELOOP: #endif return; /* Not a stat() error we care about. */ } do_stat_failed(f, "stat"); } /******************************************************************************/ /* NB. lstat() is only called if stat() already succeeded which makes all * lstat() errors worth reporting. */ static void lstat_failed(const char *f) { do_stat_failed(f, "lstat"); } /******************************************************************************/ static void do_stat_failed(const char *f, const char *func) { if (flags & SILENT) return; fprintf(stderr, "qfind: can't %s %s%s: %s\n", func, dirname, f, strerror(errno)); } /******************************************************************************/ /* There are a number of reasons why a file's ctime is different to its mtime: * - the file has been the argument of one of various system calls that * affect the inode, but not the file contents eg. chmod(), link(), * rename(), unlink() etc. * - various file archive extraction programs such as unzip create files with * the mtime of the original archived file, but sets the ctime to the time * when it was created. */ static time_t get_mtime_from_stat_data(const struct stat *statp) { time_t mt = statp->st_mtime; if (flags & CONSIDER_CTIME) { time_t ct = statp->st_ctime; if (ct > mt) mt = ct; } return mt; } /******************************************************************************/ static char * get_cwd(void) { char *buf = NULL; size_t bufsize = 280; while (1) { buf = CPP_CAST(char *) Realloc(buf, bufsize); if (getcwd(buf, bufsize)) return buf; if (errno != ERANGE) { fprintf(stderr, "qfind: getcwd failed: %s\n", strerror(errno)); free(buf); terminate(EXIT_FAILURE); } bufsize *= 2; } } /******************************************************************************/ static void chdir_to_start_dir(void) { static int first_time = 1; if (first_time) { first_time = 0; return; /* We've yet to chdir away from starting directory. */ } if (chdir(start_dir)) { fprintf(stderr, "qfind: can't chdir back to %s: %s\n", start_dir, strerror(errno)); terminate(EXIT_FAILURE); } } /******************************************************************************/ static void close_cwd_fd(int fd) { if (fd >= 0) if (close(fd) != 0) dirwarn("can't close ", errno); } /******************************************************************************/ static void scan_dir2(const char *d, int *cwd_fdp, size_t old_dirname_len) { struct dir_info dinfo; clear_dir_info(&dinfo); if (flags & FOLLOW_LINKS) { get_dir_info(d, &dinfo); if (in_dirid_list(dinfo.di.ino, dinfo.di.dev)) return; if (!(flags & NO_CHDIR) && dinfo.is_a_link) if (!open_cwd(cwd_fdp, old_dirname_len)) return; } if (!(flags & NO_CHDIR)) if (!chdir_into(d)) return; if (flags & FOLLOW_LINKS) push_dirid_list(dinfo.di.ino, dinfo.di.dev); scan_dir(); /* recurse */ if (flags & FOLLOW_LINKS_ALL) pop_dirid_list(dinfo.di.ino, dinfo.di.dev); if (!(flags & NO_CHDIR)) chdir_up(*cwd_fdp); } /******************************************************************************/ static void clear_dir_info(struct dir_info *dinfop) { dinfop->di.ino = 0; dinfop->di.dev = 0; dinfop->is_a_link = 0; } /******************************************************************************/ static int open_cwd(int *cwd_fdp, size_t old_dirname_len) { if (*cwd_fdp == -1) { /* First time through for this dirname. */ if ((*cwd_fdp = open(".", OPEN_READONLY_FLAGS)) == -1) { /* Since we have yet to chdir() into dirname itself, the error * message should refer to the previous dirname. NB. As the open() * has failed, we are finished with the current dirname. */ restore_dirname(old_dirname_len); dirwarn("can't open ", errno); *cwd_fdp = -2; } } return *cwd_fdp >= 0; } /******************************************************************************/ static int chdir_into(const char *d) { if (chdir(d)) { dirwarn("can't chdir into ", errno); return 0; } return 1; } /******************************************************************************/ /* XXX No check is made to ensure that the directory we 'return' to is the * original parent directory. This can happen if we chdir() into an automounted * directory, in which case incorrect output (and errors) will be generated. * A (partial) solution to this problem is to use the -X option. */ static void chdir_up(int fd) { if (fd < 0) { if (chdir("..")) dirdie("can't chdir(\"..\") from ", errno); } else { #if !defined(WINDOWS) /* No fchdir() on Windows. */ if (fchdir(fd)) dirdie("can't fchdir back from ", errno); #endif } } /******************************************************************************/ static void dirdie(const char *msg, err_t err) { flags &= ~SILENT; dirwarn(msg, err); terminate(EXIT_FAILURE); } #endif /******************************************************************************/ /* NB. dirname is either "" (which corresponds to ".") or ends in a slash. * If the latter, we remove the slash prior to printing. */ static void dirwarn(const char *msg, err_t err) { char *slashp = NULL; const char *dir; if (flags & SILENT) return; if (*dirname) { slashp = remove_trailing_slash(); dir = dirname; } else dir = "."; if (err) fprintf(stderr, "qfind: %s%s: %s\n", msg, dir, get_error_msg(err)); else fprintf(stderr, "qfind: %s%s\n", msg, dir); if (slashp) *slashp = slash; } /******************************************************************************/ /* Remove any trailing slash from the dirname (except for the case where it's * a root directory). */ static char * remove_trailing_slash(void) { if (dirname_len > 0 && !( (dirname_len == 1 && dirname[0] == slash) #if defined(WINDOWS) || (dirname_len == 3 && dirname[1] == ':' && dirname[2] == slash) #endif )) { char *end = dirname + dirname_len - 1; if (*end == slash) { *end = '\0'; return end; } } return NULL; } /******************************************************************************/ static char * get_error_msg(err_t err) { #if USE_FINDFILE static char buf[200]; if (FormatMessage( /* NB. returns 0 on error. */ FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), /* Default language. */ buf, sizeof(buf) - 1, NULL )) { /* Chomp any trailing newline from the error message. Needed! */ size_t len = strlen(buf); if (len >= 1 && buf[len - 1] == '\n') { buf[len - 1] = '\0'; if (len >= 2 && buf[len - 2] == '\r') /* A CRLF newline. */ buf[len - 2] = '\0'; } } else { /* NB. err_t => DWORD => unsigned long */ sprintf(buf, "error code %lu (FormatMessage error %lu)", (unsigned long) err, (unsigned long) GetLastError()); } return (char *) buf; #else return strerror(err); #endif } /******************************************************************************/ /* By default we leave the OS to reclaim alloc'd memory on program exit. */ static void terminate(int status) { fflush(stdout); #ifdef QFIND_CLEAN_UP clean_up(); #endif exit(status); } /******************************************************************************/ static void die_msg(const char *msg) { fprintf(stderr, "qfind: %s\n", msg); terminate(EXIT_FAILURE); } /******************************************************************************/ static void die_bad_arg(const char *name, const char *arg) { fprintf(stderr, "qfind: bad %s argument '%s'\n", name, arg); terminate(EXIT_FAILURE); } /******************************************************************************/ #ifdef QFIND_CLEAN_UP static void clean_up(void) { free_ll(&xd); free_ll(&xD); free_ll(&xp); free_ll(&xv); free_ss(&dir_stack); free_ss(&file_stack); free(dirname); #if !USE_FINDFILE free(start_dir); free_dirid_list(); #endif free(stbm); } /******************************************************************************/ static void free_ll(struct lookup_list *llp) { free_ss(&llp->ss); } /******************************************************************************/ static void free_ss(struct strstack *ssp) { free(ssp->str.buf); free(ssp->offset.list); } /******************************************************************************/ #if !USE_FINDFILE static void free_dirid_list(void) { size_t i; for (i = 0; i < NELEMS(dirid_list); ++i) { free(dirid_list[i].list); } } #endif #endif /******************************************************************************/ #if USE_FINDFILE /* Need to convert between FILETIME's and time_t's. */ /* Taken from Perl's "win32/win32.c". */ #ifdef __GNUC__ #define Const64(x) x##LL #else #define Const64(x) x##i64 #endif /* Number of 100 nanosecond units from 1/1/1601 to 1/1/1970 */ #define EPOCH_BIAS Const64(116444736000000000) static time_t filetime_to_timet(const FILETIME *filetimep) { ULARGE_INTEGER li; li.LowPart = filetimep->dwLowDateTime; li.HighPart = filetimep->dwHighDateTime; li.QuadPart -= EPOCH_BIAS; /* XXX Assume time is after 1970! */ li.QuadPart /= 10000000; return (time_t) li.QuadPart; /* XXX Assume time is before 2038! */ } /******************************************************************************/ /* NB. it's possible to have a file whose LastWriteTime is older than its * CreationTime eg. a file extracted from an archive retains the 'lwt' from * the original file while its 'ct' is when it was extracted. */ static time_t get_mtime_from_find_data(const WIN32_FIND_DATA *fDatap) { time_t lwt = filetime_to_timet(&fDatap->ftLastWriteTime); if (flags & CONSIDER_CTIME) { time_t ct = filetime_to_timet(&fDatap->ftCreationTime); if (ct > lwt) lwt = ct; } return lwt; } /******************************************************************************/ /* NB. Win32's localtime() does not support negative time_t's. */ static BOOL filetime_from_time(PFILETIME pFileTime, time_t Time) { struct tm *pTM = localtime(&Time); SYSTEMTIME SystemTime; FILETIME LocalTime; if (pTM == NULL) return FALSE; SystemTime.wYear = pTM->tm_year + 1900; SystemTime.wMonth = pTM->tm_mon + 1; SystemTime.wDay = pTM->tm_mday; SystemTime.wHour = pTM->tm_hour; SystemTime.wMinute = pTM->tm_min; SystemTime.wSecond = pTM->tm_sec; SystemTime.wMilliseconds = 0; return SystemTimeToFileTime(&SystemTime, &LocalTime) && LocalFileTimeToFileTime(&LocalTime, pFileTime); } /******************************************************************************/ /* See Win32::UTCFileTime for the horrors of working with UTC times and the * time_t's returned by Win32's stat(). */ static time_t to_utc_time(time_t t) { FILETIME ft; time_t utc; if (!filetime_from_time(&ft, t)) { fprintf(stderr, "qfind: failed to convert time '%ld'\n", (long) t); terminate(EXIT_FAILURE); } utc = filetime_to_timet(&ft); return utc; } #endif /******************************************************************************/ static void init_ss(struct strstack *ssp) { ssp->str.buf = NULL; ssp->str.size = 0; ssp->offset.list = NULL; ssp->offset.max = 0; ssp->offset.next = 0; ssp->total = 0; } /******************************************************************************/ static inline void alloc_ss(struct strstack *ssp, size_t elements, size_t bufsize) { /* This assumes get_alloc_size()'s doubling nature. */ grow_ss(ssp, elements / 2, bufsize / 2); } /******************************************************************************/ /* Place a string on the end of a strstack. */ static void push_ss(struct strstack *ssp, const char *str) { const size_t size = strlen(str) + 1; grow_ss(ssp, 1, size); memcpy(ssp->str.buf + ssp->offset.next, str, size); ssp->offset.list[ssp->total++] = ssp->offset.next; ssp->offset.next += size; } /******************************************************************************/ #if !USE_FINDFILE /* Place a string and arbitrary data on the end of a strstack. */ static void push_ss_x(struct strstack *ssp, const char *str, const void *data_ptr, size_t data_size) { const size_t str_size = strlen(str) + 1; const size_t total_size = str_size + data_size; char *s; grow_ss(ssp, 1, total_size); s = ssp->str.buf + ssp->offset.next; memcpy(s, str, str_size); memcpy(s + str_size, data_ptr, data_size); ssp->offset.list[ssp->total++] = ssp->offset.next; ssp->offset.next += total_size; } #endif /******************************************************************************/ /* Ensure there is space in a strstack's buffers for additional strings. */ static void grow_ss(struct strstack *ssp, size_t elements, size_t size) { size_t needed; needed = elements + ssp->total; if (ssp->offset.max < needed) { ssp->offset.max = get_alloc_size(needed); ssp->offset.list = CPP_CAST(size_t *) Realloc(ssp->offset.list, ssp->offset.max * sizeof(ssp->offset.list[0])); } needed = size + ssp->offset.next; if (ssp->str.size < needed) { ssp->str.size = get_alloc_size(needed); ssp->str.buf = CPP_CAST(char *) Realloc(ssp->str.buf, ssp->str.size); } } /******************************************************************************/ static inline char * get_str(const struct strstack *ssp, size_t i) { return ssp->str.buf + ssp->offset.list[i]; } /******************************************************************************/ static void clear_ss(struct strstack *ssp) { ssp->total = 0; ssp->offset.next = 0; } /******************************************************************************/ static void sort_ss(struct strstack *ssp, size_t from, size_t n, strcmp_fn_t cmp) { if (n > 1) { sort_data.ssp = ssp; sort_data.cmp = cmp; qsort(ssp->offset.list + from, n, sizeof(ssp->offset.list[0]), cmp_ss); } } /******************************************************************************/ static void init_ll(struct lookup_list *xptr) { size_t i; init_ss(&xptr->ss); for (i = 0; i < NELEMS(xptr->firstchar); ++i) xptr->firstchar[i] = NONE; } /******************************************************************************/ /* Organize the strings in the stack to make their lookup in the main loop * of print_files() as efficient as possible. */ static void init_ll_firstchar(struct lookup_list *xptr) { size_t i; sort_ss(&xptr->ss, 0, xptr->ss.total, strcmp); remove_reps(xptr); for (i = 0; i < xptr->ss.total; ++i) { const char *s = get_str(&xptr->ss, i); int first = UCHAR(s[0]); if (xptr->firstchar[first] == NONE) xptr->firstchar[first] = i; } xptr->firstchar[0] = NONE; /* Ensure "" is not a match. */ } /******************************************************************************/ /* Remove repeated names by 'collapsing' the offset.list[]. * NB. Since the strings are sorted, repetitions will be next to each other. */ static void remove_reps(struct lookup_list *xptr) { if (xptr->ss.total > 1) { size_t i, new_total = 1; const char *cur_str = get_str(&xptr->ss, 0); for (i = 1; i < xptr->ss.total; ++i) { const size_t offset = xptr->ss.offset.list[i]; const char *str = xptr->ss.str.buf + offset; if (strNE(cur_str, str)) { xptr->ss.offset.list[new_total++] = offset; cur_str = str; } } xptr->ss.total = new_total; /* Update total to reflect new contents. */ } } /******************************************************************************/ /* Is a name in a lookup_list? */ static int lookup(const struct lookup_list *xptr, const char *name) { const char name_0 = name[0]; const size_t first_try = xptr->firstchar[UCHAR(name_0)]; size_t i; int cmp; if (first_try != NONE) { const char name_1 = name[1]; for (i = first_try; i < xptr->ss.total; ++i) { const char *s = get_str(&xptr->ss, i); if (s[0] != name_0) break; if (s[1] == name_1) { if (s[1] == '\0') return 1; cmp = strcmp(s + 2, name + 2); if (cmp == 0) return 1; if (cmp > 0) break; } else if (UCHAR(s[1]) > UCHAR(name_1)) break; } } return 0; } /******************************************************************************/ /* Is a name in a lookup_list (ignoring case distinctions)? * Assumes all strings in the lookup_list are already in lowercase. */ static int lookup_i(const struct lookup_list *xptr, const char *name) { const char lc_name_0 = lowercase(name[0]); const size_t first_try = xptr->firstchar[UCHAR(lc_name_0)]; size_t i; int cmp; if (first_try != NONE) { const char lc_name_1 = lowercase(name[1]); for (i = first_try; i < xptr->ss.total; ++i) { const char *s = get_str(&xptr->ss, i); /* NB. is lowercase */ if (s[0] != lc_name_0) break; if (s[1] == lc_name_1) { if (lc_name_1 == '\0') return 1; cmp = stri2cmp(s + 2, name + 2); if (cmp == 0) return 1; if (cmp > 0) break; } else if (UCHAR(s[1]) > UCHAR(lc_name_1)) break; } } return 0; } /******************************************************************************/ /* Should the given filename be excluded because of its extension? * Treats the extension case insensitively. */ static int is_exclude_extension(const char *f) { const char *s, *ext = NULL; for (s = f; *s; ) if (*s++ == '.') ext = s; if (ext && lookup_i(&xp, ext)) /* The file extension was found so we exclude it if we are _not_ * matching file extensions. */ return !match_extension; /* Exclude if we are matching file extensions. */ return match_extension; } /******************************************************************************/ /* Is the given name in the list of excluded directories? */ static int is_exclude_dir(const char *f) { return xd.ss.total && lookup(&xd, f); } /******************************************************************************/ /* Is dirname in the list of excluded paths? */ static int is_dirname_excluded(void) { return xD.ss.total && lookup(&xD, dirname); } /******************************************************************************/ /* Is the given filename in the list of excluded files? */ static int is_filename_excluded(const char *f) { return xv.ss.total && lookup(&xv, f); } /******************************************************************************/ /* Treating file extensions as case insensitive is probably the correct * approach even on platforms that are case sensitive. * Consider: "readme.txt" vs "readme.TXT" vs "readme.Txt". */ static void parse_ext_arg(char *arg, char opt) { /* NB. 'multipart' extensions such as "tar.gz" are not supported. * We could cater for "-p=.c:.h", but frankly it does not seem worth it * when it's easier to just type "-p=c:h". */ if (strchr(arg, '.')) die_msg("dotted extensions not supported"); if (xp.ss.total) { /* Not the first call to parse_ext_arg(). */ if (opt == 'p') { if (!match_extension) /* cf. "qfind -e=obj -p=c" */ clear_ss(&xp.ss); } else /* opt == 'e' */ if (match_extension) /* cf. "qfind -p=c -e=obj" */ return; /* -p overrides -e. */ } match_extension = (opt == 'p'); flags |= EXTENSION_TEST; lc_str(arg); /* Store the file extensions in lowercase. */ parse_name_list(arg, &xp); } /******************************************************************************/ /* Process -d/e/p's argument (eg. "=.git:CVS:RCS") and place the info in the * relevant lookup_list. */ static void parse_name_list(const char *arg, struct lookup_list *xptr) { size_t elements = 1, size = 1; const char *s; char *d; if (arg[0] == '=') ++arg; for (s = arg; *s; ++s) { ++size; if (*s == ':') ++elements; } /* Ensure the buffers in the xptr's strstack are large enough. */ grow_ss(&xptr->ss, elements, size); /* Copy the strings into the buffer and store their offsets. */ d = xptr->ss.str.buf + xptr->ss.offset.next; for (s = arg; 1; ++s) { if (*s == ':' || !*s) { *d++ = '\0'; xptr->ss.offset.list[xptr->ss.total++] = xptr->ss.offset.next; xptr->ss.offset.next = d - xptr->ss.str.buf; if (!*s) break; } else *d++ = *s; } } /******************************************************************************/ static void parse_time_arg(const char *arg, char opt) { static time_t time_now = 0; int time_is_since_now = 0; long l; time_t t; if (arg[0] == '=') ++arg; /* Provide a syntax to specify times relative to now. * So "-N=#60" filters files older than 60 secs since now. */ if (arg[0] == '#') { time_is_since_now = 1; ++arg; if (!time_now) time_now = time(NULL); } if (!parse_l(arg, &l)) die_bad_arg("time", arg); t = l; if (time_is_since_now) t = time_now - t; #if USE_FINDFILE else { /* Must convert the time_t from Win32's stat() into a UTC correct time_t * so comparisons's to the time values in a WIN32_FIND_DATA's are valid. * NB. to ensure comparisons are made at the 1 second granularity, * we compare time_t's and not FILETIMEs. */ t = to_utc_time(t); } #endif if (opt == 'N') { flags |= SKIP_OLD_FILES; opt_N_time = t; } else { /* 'O' */ flags |= SKIP_NEW_FILES; opt_O_time = t; } } /******************************************************************************/ /* Parse a string containing a file size. */ static void parse_size_arg(const char *arg, char opt) { #if USE_FINDFILE DWORD high, low; #else filesize_t size; #endif unsigned long ul; if (arg[0] == '=') ++arg; if (!parse_ul(arg, &ul)) die_bad_arg("size", arg); #if USE_FINDFILE high = 0; low = ul; #else size = ul; #endif if (opt == 'J') { flags |= MIN_SIZE_TEST_ALWAYS; #if USE_FINDFILE min_size_hl.high = high; min_size_hl.low = low; #else min_size = size; #endif } else { /* 'K' */ flags |= MAX_SIZE_TEST; #if USE_FINDFILE max_size_hl.high = high; max_size_hl.low = low; #else max_size = size; #endif } } /******************************************************************************/ /* Parse a string containing an integer >= 0. */ static void parse_depth_arg(const char *arg) { long l; if (arg[0] == '=') ++arg; if (!parse_l(arg, &l) || l < 0 || l > INT_MAX) die_bad_arg("depth", arg); max_depth = l; flags |= DEPTH_TEST; } /******************************************************************************/ #if !USE_FINDFILE static void parse_id_arg(const char *arg, char opt) { unsigned long ul; id_t id; if (arg[0] == '=') ++arg; if (!parse_ul(arg, &ul)) die_bad_arg("id", arg); id = ul; if (opt == 'u') { flags |= UID_TEST_MATCH; flags &= ~UID_TEST_SKIP; opt_uid = id; } else if (opt == 'U') { flags |= UID_TEST_SKIP; flags &= ~UID_TEST_MATCH; opt_uid = id; } else if (opt == 'g') { flags |= GID_TEST_MATCH; flags &= ~GID_TEST_SKIP; opt_gid = id; } else { /* 'G' */ flags |= GID_TEST_SKIP; flags &= ~GID_TEST_MATCH; opt_gid = id; } } /******************************************************************************/ /* When following links, we need to store the directory's ino/dev. */ static void found_dir(const char *str, const struct stat *statp, int is_a_link) { if (flags & FOLLOW_LINKS) push_dir_ss(str, statp, is_a_link); else push_ss(&dir_stack, str); } /******************************************************************************/ /* We store the directory's dir_info after its name in the dir_stack. */ static void push_dir_ss(const char *str, const struct stat *statp, int is_a_link) { struct dir_info dinfo; dinfo.di.ino = statp->st_ino; dinfo.di.dev = statp->st_dev; dinfo.is_a_link = is_a_link; push_ss_x(&dir_stack, str, &dinfo, sizeof(dinfo)); } /******************************************************************************/ /* Copy the dir_info stored after the directory name back to the caller. */ static void get_dir_info(const char *d, struct dir_info *dinfop) { const size_t d_size = strlen(d) + 1; memcpy(dinfop, d + d_size, sizeof(*dinfop)); } /******************************************************************************/ static void init_dirid_list(void) { size_t i; for (i = 0; i < NELEMS(dirid_list); ++i) { dirid_list[i].list = NULL; dirid_list[i].size = 0; dirid_list[i].total = 0; } } /******************************************************************************/ static void clear_dirid_list(void) { size_t i; for (i = 0; i < NELEMS(dirid_list); ++i) { dirid_list[i].total = 0; } } /******************************************************************************/ static void push_dirid_list(ino_t ino, dev_t dev) { struct dirids *disp = &dirid_list[ DIRID_LIST_IDX(ino, dev) ]; size_t needed = 1 + disp->total; if (disp->size < needed) { disp->size = get_alloc_size(needed); disp->list = CPP_CAST(struct dirid *) Realloc(disp->list, disp->size * sizeof(disp->list[0])); } disp->list[disp->total].ino = ino; disp->list[disp->total].dev = dev; ++disp->total; } /******************************************************************************/ static void pop_dirid_list(ino_t ino, dev_t dev) { UNUSED_ARG(dev); /* Due to definition of DIRID_LIST_IDX. */ --dirid_list[ DIRID_LIST_IDX(ino, dev) ].total; } /******************************************************************************/ static int in_dirid_list(ino_t ino, dev_t dev) { const struct dirids *disp = &dirid_list[ DIRID_LIST_IDX(ino, dev) ]; size_t i; #if defined(WINDOWS) /* Windows' stat() does not return a useful ino. For testing purposes, * it's best to pretend we didn't find a match. */ return 0; #endif for (i = 0; i < disp->total; ++i) if (disp->list[i].ino == ino && disp->list[i].dev == dev) return 1; return 0; } #endif /******************************************************************************/ /* The directory path names in xD are compared to dirname which will have a * trailing slash. This ensures the names in xD also have a trailing slash. */ static void fix_xD(void) { const size_t total = xD.ss.total; /* The total before adding more. */ char *buf; size_t i; if (!total) return; /* Use a temporary buffer to build 'fixed' path names. */ buf = CPP_CAST(char *) Malloc(xD.ss.offset.next + 2); for (i = 0; i < total; ++i) { char *orig = get_str(&xD.ss, i); char *s, *d = buf; for (s = orig; *s; ++s) { #if USE_FINDFILE if (*s == slosh) *s = slash; #endif *d++ = *s; } if (d > buf && *(d - 1) != slash) { *d++ = slash; *d = '\0'; push_ss(&xD.ss, buf); orig[0] = '\0'; /* Erase original string. */ } } free(buf); } /******************************************************************************/ /* Skip zero size files by setting a minimum size requirement of 1. */ static void skip_zero_size_files(const char opt) { if (flags & MIN_SIZE_TEST_ALWAYS) /* cf. "qfind -G=22 -z". */ return; flags |= (opt == 'z' ? MIN_SIZE_TEST_ALWAYS : MIN_SIZE_TEST_IF_FREE); #if USE_FINDFILE min_size_hl.high = 0; min_size_hl.low = 1; #else min_size = 1; #endif } /******************************************************************************/ static void parse_match_arg(const char *arg) { size_t len; if (arg[0] == '=') ++arg; if (!stbm) stbm = CPP_CAST(struct stbm_data *) Malloc(sizeof(*stbm)); len = strlen(arg); if (!len) die_msg("missing -M argument"); if (len > STBM_MATCH_STR_BUFSIZE - 1) die_msg("-M argument too long"); stbm->match_str_len = len; memcpy(stbm->match_str, arg, stbm->match_str_len + 1); flags |= MATCH_STR_TEST; } /******************************************************************************/ static void stbm_init(void) { const char *s, *end; unsigned int i; if (flags & NOCASE_MATCH_STR_TEST) lc_str(stbm->match_str); for (i = 0; i < NELEMS(stbm->skip_table); ++i) stbm->skip_table[i] = stbm->match_str_len; /* NB. >= 1 */ for (s = stbm->match_str, end = s + stbm->match_str_len - 1; s < end; ++s) { stbm->skip_table[UCHAR(*s)] = end - s; if (flags & NOCASE_MATCH_STR_TEST) stbm->skip_table[uppercase(*s)] = end - s; } stbm->md2 = stbm->skip_table[UCHAR(*end)]; stbm->skip_table[UCHAR(*end)] = 0; if (flags & NOCASE_MATCH_STR_TEST) stbm->skip_table[uppercase(*end)] = 0; /* Place sentinels at the end of the read buffer. */ stbm_setup_sentinels(STBM_READ_SIZE); } /******************************************************************************/ static int file_contains_match_str(const char *f, const filedata_t *fdp) { const unsigned int overlap = stbm->match_str_len - 1; char *read_buf, *search_buf, *end_ptr; size_t this_dirname_len = dirname_len; int bytes_read; int found = 0; fh_t fh; if (!file_size_ok(fdp)) return 0; /* File too small to contain match_str. */ /* Store the file's name in dirname[]. */ append_dirname(f, this_dirname_len); dirname[dirname_len - 1] = '\0'; /* Trim the trailing slash. */ fh = open_file(f); if (invalid_fh(fh)) { dirwarn("can't open ", get_err()); goto done; } read_buf = stbm->buffer + STBM_MATCH_STR_BUFSIZE; search_buf = read_buf; while (1) { if (!(bytes_read = fill_read_buffer(fh, read_buf))) break; end_ptr = read_buf + bytes_read; if (buffer_contains_match_str(search_buf, end_ptr - search_buf)) { found = 1; break; } search_buf = read_buf - overlap; /* Copy end of previous read to start of the next to ensure we find * matches sitting across a boundary. */ memcpy(search_buf, end_ptr - overlap, overlap); } if (!close_fh_ok(fh)) dirwarn("close error ", get_err()); done: restore_dirname(this_dirname_len); return found; } /******************************************************************************/ static int file_size_ok(const filedata_t *fdp) { size_t size; #if USE_FINDFILE const WIN32_FIND_DATA *fDatap = fdp; if (fDatap->nFileSizeHigh) /* The file's size is >= 4Tb > match_str_len. */ return 1; size = fDatap->nFileSizeLow; #else const struct stat *statp = fdp; size = statp->st_size; #endif if (size < stbm->match_str_len) return 0; return 1; } /******************************************************************************/ static fh_t open_file(const char *f) { int in_file_dir = QCODE(0, !(flags & NO_CHDIR)); const char *file_path = in_file_dir ? f : dirname; fh_t fh; #if USE_FINDFILE fh = CreateFile( file_path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_SEQUENTIAL_SCAN, NULL); #else fh = open(file_path, OPEN_READONLY_FLAGS); #endif return fh; } /******************************************************************************/ static int invalid_fh(fh_t fh) { #if USE_FINDFILE return fh == INVALID_HANDLE_VALUE; #else return fh == -1; #endif } /******************************************************************************/ static int fill_read_buffer(fh_t fh, char *read_buf) { int bytes_read; #if USE_FINDFILE { DWORD n = 0; int ok = ReadFile(fh, read_buf, STBM_READ_SIZE, &n, NULL); bytes_read = ok ? (int) n : -1; } #else bytes_read = read(fh, read_buf, STBM_READ_SIZE); #endif if (bytes_read == 0) ; /* Reached EOF. */ else if (bytes_read < 0) { dirwarn("read error ", get_err()); bytes_read = 0; } else if (bytes_read != STBM_READ_SIZE) stbm_setup_sentinels(bytes_read); return bytes_read; } /******************************************************************************/ /* NB. gcc does not automatically inline this trivially inlinable function. */ static inline err_t get_err(void) { #if USE_FINDFILE return GetLastError(); #else return errno; #endif } /******************************************************************************/ static int close_fh_ok(fh_t fh) { #if USE_FINDFILE return CloseHandle(fh) != 0; #else return close(fh) == 0; #endif } /******************************************************************************/ static int buffer_contains_match_str(const char *buf, size_t buf_len) { if (flags & NOCASE_MATCH_STR_TEST) return stbm_find_nocase(buf, buf_len, stbm->match_str, stbm->match_str_len); else if (stbm->match_str_len == 1) return memchr(buf, UCHAR(stbm->match_str[0]), buf_len) != NULL; else return stbm_find(buf, buf_len, stbm->match_str, stbm->match_str_len); } /******************************************************************************/ /* The code below for the "Self-Tuning Boyer-Moore" string search is derived * from Grouse grep. Thanks & kudos to Brenton Hoff! * http://www.grouse.com.au/ggrep/stbm.c.html */ /* Some ideas come from Allan Dystrup's code for Tuned Boyer-Moore. * http://qualitycode.files.wordpress.com/2012/02/tbm-c.pdf */ /* Part of the speed of this version of STBM algorithm comes from not performing * any bounds checking in the skip loop stage on the 's' pointer. This means it * can go past the end of the buffer. To ensure we trigger the breakout * condition and prevent the pointer referencing unallocated memory, we fill the * end of the buffer with the last character in the stbm->match_str. * NB. skip_table[last_char_in_match_str] is zero. */ static void stbm_setup_sentinels(int n) { char *read_buf = stbm->buffer + STBM_MATCH_STR_BUFSIZE; char *end = read_buf + n; int last = UCHAR(stbm->match_str[stbm->match_str_len - 1]); memset(end, last, stbm->match_str_len); } /******************************************************************************/ /* Find the needle in the haystack! */ static int stbm_find(const char *haystack, size_t h_len, const char *needle, size_t n_len) { const char *s, *haystack_end, *needle_end; int i, neg_len, guard_offset; char guard_char; unsigned int skip; if (h_len < n_len) return 0; haystack_end = haystack + h_len; s = haystack + n_len - 1; needle_end = needle + n_len - 1; neg_len = - ((int) n_len); guard_offset = (n_len == 1) ? 0 : -1; guard_char = needle_end[guard_offset]; while (1) { skip = stbm->skip_table[UCHAR(*s)]; while (skip) { /* Use 3-fold loop unrolling for max speed. */ s += skip; skip = stbm->skip_table[UCHAR(*s)]; s += skip; skip = stbm->skip_table[UCHAR(*s)]; s += skip; skip = stbm->skip_table[UCHAR(*s)]; } if (s >= haystack_end) return 0; if (s[guard_offset] != guard_char) goto advance; for (i = guard_offset - 1; i > neg_len; --i) if (s[i] != needle_end[i]) goto mismatch; for (i = -1; i > guard_offset; --i) if (s[i] != needle_end[i]) goto mismatch; return 1; /* Match found at (s + neg_len + 1). */ mismatch: /* Use last mismatch as new guard character. */ guard_offset = i; guard_char = needle_end[guard_offset]; advance: s += stbm->md2; } } /******************************************************************************/ /* A case insensitive version of stbm_find(). * NB. The "needle" is assumed to be in lowercase. */ static int stbm_find_nocase(const char *haystack, size_t h_len, const char *needle, size_t n_len) { const char *s, *haystack_end, *needle_end; int i, neg_len, guard_offset; char guard_char; unsigned int skip; if (h_len < n_len) return 0; haystack_end = haystack + h_len; s = haystack + n_len - 1; needle_end = needle + n_len - 1; neg_len = - ((int) n_len); guard_offset = (n_len == 1) ? 0 : -1; guard_char = needle_end[guard_offset]; while (1) { skip = stbm->skip_table[UCHAR(*s)]; while (skip) { /* Use 3-fold loop unrolling for max speed. */ s += skip; skip = stbm->skip_table[UCHAR(*s)]; s += skip; skip = stbm->skip_table[UCHAR(*s)]; s += skip; skip = stbm->skip_table[UCHAR(*s)]; } if (s >= haystack_end) return 0; if (UCHAR(lowercase(s[guard_offset])) != UCHAR(guard_char)) goto advance; for (i = guard_offset - 1; i > neg_len; --i) if (UCHAR(lowercase(s[i])) != UCHAR(needle_end[i])) goto mismatch; for (i = -1; i > guard_offset; --i) if (UCHAR(lowercase(s[i])) != UCHAR(needle_end[i])) goto mismatch; return 1; /* Match found at (s + neg_len + 1). */ mismatch: /* Use last mismatch as new guard character. */ guard_offset = i; guard_char = needle_end[guard_offset]; advance: s += stbm->md2; } } /******************************************************************************/ #if USE_FINDFILE /* Compare a high/low integer cf. the file size in a WIN32_FIND_DATA. * Returns 1 if a > b, -1 if a < b, and 0 if a == b. */ static inline int high_low_cmp(DWORD ah, DWORD al, DWORD bh, DWORD bl) { int result; if (ah == bh) { if (al > bl) result = 1; else if (al < bl) result = -1; else result = 0; } else if (ah > bh) result = 1; else result = -1; return result; } #endif /******************************************************************************/ static void sort_filenames(struct strstack *ssp, size_t from, size_t n) { if (filename_cmp) sort_ss(ssp, from, n, filename_cmp); } /******************************************************************************/ /* Compare two strings (ignoring case differences) where the first string is * known to be in lower case. */ static int stri2cmp(const char *s1, const char *s2) { while (1) { const int c1 = UCHAR(*s1); const int c2 = lowercase(*s2); if (c1 != c2) return c1 - c2; if (c1 == '\0') return 0; ++s1; ++s2; } } /******************************************************************************/ /* Qsort comparison function for the strings inside a strstack. */ static int cmp_ss(const void *elem1, const void *elem2) { const size_t offset1 = *((const size_t *) elem1); const size_t offset2 = *((const size_t *) elem2); const char *s1 = sort_data.ssp->str.buf + offset1; const char *s2 = sort_data.ssp->str.buf + offset2; return sort_data.cmp(s1, s2); } /******************************************************************************/ #if !(UCHAR_MAX <= INT_MAX) #error The code assumes that UCHAR_MAX <= INT_MAX. /* ...in particular on machines where char and int are types of the same size, * the difference of two unsigned char values (including the sign bit) doesn't * fit in an int. */ #endif /* Perform a string comparison ignoring case except in a tie. * NB. This is different to the standard stricmp() and should not be used for * testing equivalence eg. mystricmp("ABC", "abc") != 0. */ static int mystricmp(const char *s1, const char *s2) { int result = 0; while (1) { const int c1 = UCHAR(*s1); const int c2 = UCHAR(*s2); if (c1 == c2) { if (!c1) break; } else { const int lc1 = to_lower(c1); const int lc2 = to_lower(c2); if (lc1 == lc2) { if (!result) result = c1 - c2; } else { result = lc1 - lc2; break; } } ++s1; ++s2; } return result; } /******************************************************************************/ /* A string comparison that considers numbers embedded within them. */ static int strnumcmp(const char *s1, const char *s2) { int case_diff = 0; while (1) { if (izdigit(*s1) && izdigit(*s2)) { int num_diff = 0, zeros = 0; /* Ignore leading zeros except in a tie. */ while (*s1 == '0' && izdigit(s1[1])) { ++s1; --zeros; } while (*s2 == '0' && izdigit(s2[1])) { ++s2; ++zeros; } while (1) { /* Both *s1 and *s2 are digits. */ if (!num_diff) num_diff = UCHAR(*s1) - UCHAR(*s2); ++s1; ++s2; if (!izdigit(*s1)) { if (izdigit(*s2)) /* # in s1 has less digits, so is < # in s2. */ return -1; if (num_diff) return num_diff; if (zeros) return zeros; break; } else if (!izdigit(*s2)) /* # in s1 has more digits, so is > # in s2. */ return 1; } } { const int c1 = UCHAR(*s1); const int c2 = UCHAR(*s2); if (c1 == c2) { if (!c1) return case_diff; } else { const int lc1 = to_lower(c1); const int lc2 = to_lower(c2); if (lc1 == lc2) { if (!case_diff) case_diff = c1 - c2; } else return lc1 - lc2; } ++s1; ++s2; } } /* Unreachable. */ } /******************************************************************************/ static void alloc_dirname(size_t bufsize) { dirname_bufsize = bufsize; dirname = CPP_CAST(char *) Malloc(dirname_bufsize); } /******************************************************************************/ /* Append to dirname the given string at the given position, plus add a * trailing slash. */ static void append_dirname(const char *d, size_t old_dirname_len) { const size_t d_len = strlen(d); size_t needed; dirname_len = old_dirname_len + d_len + 1; /* We also need space for the NUL char and get_first_file() * assumes additional space to append a "*". */ needed = dirname_len + 2; if (dirname_bufsize < needed) { dirname_bufsize = get_alloc_size(needed); dirname = CPP_CAST(char *) Realloc(dirname, dirname_bufsize); } memcpy(dirname + old_dirname_len, d, d_len); { /* Append the trailing slash. */ char *s = dirname + dirname_len; *s-- = '\0'; *s = slash; } } /******************************************************************************/ /* Lowercase a string in place. */ static void lc_str(char *str) { char *s; for (s = str; *s; ++s) *s = lowercase(*s); } /******************************************************************************/ #if USE_FINDFILE static void consistent_slashes(char *path) { char *s; for (s = path; *s; ++s) if (*s == slosh) *s = slash; } #endif /******************************************************************************/ /* Since append_dirname() appends a trailing slash, if the given DIR command * line argument either (a) already ends in a slash OR (b) is a bare drive * letter (eg. "D:"), then we need to chomp the trailing slash from dirname. * NB. dirname_len >= 2. */ static void chomp_dirname_trailing_slash(void) { if (dirname[dirname_len - 2] == slash #if defined(WINDOWS) || (dirname_len == 3 && dirname[1] == ':') #endif ) { dirname[--dirname_len] = '\0'; } } /******************************************************************************/ /* An empty DIR argument is taken to imply scan the start directory. */ static int set_dirname(const char *dir) { #if !USE_FINDFILE if (start_dir) /* NB. => (num_dir_args > 1 && !(flags & NO_CHDIR)) */ chdir_to_start_dir(); /* Ensures relative paths work. */ if (flags & (FOLLOW_LINKS | XDEV)) { const char *d = dir[0] ? dir : "."; struct stat st; if (stat(d, &st)) { fprintf(stderr, "qfind: can't stat %s: %s\n", d, strerror(errno)); return 0; } if (flags & FOLLOW_LINKS) { if (flags & FOLLOW_LINKS_ALL) clear_dirid_list(); else /* FOLLOW_LINKS_NO_REPS */ if (in_dirid_list(st.st_ino, st.st_dev)) return 0; push_dirid_list(st.st_ino, st.st_dev); } start_dev = st.st_dev; } #endif if (!dir[0]) { restore_dirname(0); /* dirname := "" */ } else { #if USE_FINDFILE /* Ensure no wildcard characters. */ if (strpbrk(dir, "*?")) { fprintf(stderr, "qfind: wildcards not supported: %s\n", dir); terminate(EXIT_FAILURE); } #endif append_dirname(dir, 0); /* dirname := "{dir}/" */ #if USE_FINDFILE consistent_slashes(dirname); #endif chomp_dirname_trailing_slash(); #if !USE_FINDFILE if (!(flags & NO_CHDIR)) if (!chdir_into(dir)) return 0; #endif } argv_dir_len = dirname_len; depth = 0; return 1; } /******************************************************************************/ static void restore_dirname(size_t len) { dirname[len] = '\0'; dirname_len = len; } /******************************************************************************/ /* XXX In theory, the arithmetic below could overflow. In practise, if this did * occur your filesystem would be so clogged as to make this bug in qfind the * least of your problems :-) */ static size_t get_alloc_size(size_t needed) { size_t alloc; if (needed == 0) { alloc = 8; } else { alloc = 2 * needed; if (alloc & 7) { alloc &= ~7u; alloc += 8; } } return alloc; } /******************************************************************************/ static void * Malloc(size_t size) { void *result = malloc(size); if (!result) out_of_memory(); return result; } /******************************************************************************/ static void * Realloc(void *ptr, size_t size) { void *result = realloc(ptr, size); if (!result) out_of_memory(); return result; } /******************************************************************************/ static void out_of_memory(void) { die_msg("Out of memory!"); } /******************************************************************************/ #if USE_FINDFILE #define F(s) s #define P(s) #else #define F(s) #define P(s) s #endif static void print_help(void) { puts( "Usage: qfind [OPTION...] [DIR...]\n" " Recursively prints out the regular files beneath a directory.\n" " If no DIR is specified, the current directory is assumed.\n" "OPTIONs:\n" P(" -a Print all non-directory files.\n") F(" -b Print backslashed path names.\n") " -B Print directory after its contents.\n" " -c -N/-O uses most-recent(ctime, mtime).\n" " -d=DIR Ignore files beneath the directory DIR.\n" " Multiple directories can be specified:\n" " qfind -d=DIR1:DIR2:DIR3 -d=DIR4\n" " -D=PATH Ignore files beneath the pathname PATH.\n" " Multiple paths can be specified:\n" " qfind -D=PATH1:PATH2 -D=PATH3\n" " -e=EXT Ignore files whose extension is EXT.\n" " Multiple extensions can be specified:\n" " qfind -e=EXT1:EXT2:EXT3 -e=EXT4\n" P(" -g=GID Only print files belonging to group GID.\n") P(" -G=GID Ignore files belonging to group GID.\n") " -i Sort the filenames ignoring case distinctions.\n" P(" -l Follow links (no repetitions).\n") P(" -L Follow links.\n") P(" -m Ignore directories on other filesystems.\n") " -n Sort the filenames wrt. embedded numbers.\n" " -o Ignore 'dot' files/directories.\n" " -p=EXT Only print files with extension EXT.\n" " This option overrides -e.\n" " Multiple extensions can be specified:\n" " qfind -p=EXT1:EXT2:EXT3 -p=EXT4\n" #ifdef HAS_D_TYPE P(" -r Only print readable files.\n") P(" -R Only print readable files (if stat info available).\n") #else P(" -r/R Only print readable files.\n") #endif " -s Silent. No error messages to STDERR.\n" F(" -t Truncate filenames ie. the classic 8.3 format.\n") P(" -u=UID Only print files belonging to user UID.\n") P(" -U=UID Ignore files belonging to user UID.\n") " -v=NAME Ignore files with the given NAME.\n" " Multiple filenames can be specified:\n" " qfind -v=NAME1:NAME2:NAME3 -v=NAME4\n" " -x Do not sort filenames.\n" F(" -y Ignore HIDDEN|SYSTEM files/directories.\n") #ifdef HAS_D_TYPE " -z Ignore zero size files.\n" " -Z Ignore zero size files (if stat info available).\n" #else " -z/Z Ignore zero size files.\n" #endif " -E=DEPTH Ignore directories deeper than DEPTH.\n" " -J=SIZE Ignore files smaller than SIZE bytes.\n" " -K=SIZE Ignore files larger than SIZE bytes.\n" " -M=STR Ignore files not containing STR.\n" " -N=TIME Ignore files older than TIME (as a time_t).\n" " -O=TIME Ignore files newer than TIME (as a time_t).\n" " -S Sort the filenames prior to printing.\n" " -T Do not print leading DIR part.\n" P(" -X Do not chdir.\n") " -Y Ignore case when using option -M.\n" " -0 Print filenames followed by null character.\n" " -: Print directories but not files.\n" " -+ Print both files and directories.\n" " -/ Print directories with a trailing slash.\n" " -V Print version information and exit.\n" " -h Print this help and exit.\n" " -- End options.\n" ); } #undef F #undef P /******************************************************************************/