emacs/lisp/eshell/em-glob.el

460 lines
17 KiB
EmacsLisp

;;; em-glob.el --- extended file name globbing -*- lexical-binding:t -*-
;; Copyright (C) 1999-2024 Free Software Foundation, Inc.
;; Author: John Wiegley <johnw@gnu.org>
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;; The globbing code used by Eshell closely follows the syntax used by
;; zsh. Basically, here is a summary of examples:
;;
;; echo a* ; anything starting with 'a'
;; echo a#b ; zero or more 'a's, then 'b'
;; echo a##b ; one or more 'a's, then 'b'
;; echo a? ; a followed by any character
;; echo a*~ab ; 'a', then anything, but not 'ab'
;; echo c*~*~ ; all files beginning with 'c', except backups (*~)
;;
;; Recursive globbing is also supported:
;;
;; echo **/*.c ; all '.c' files at or under current directory
;; echo ***/*.c ; same as above, but traverse symbolic links
;;
;; Using argument predication, the recursive globbing syntax is
;; sufficient to replace the use of 'find <expr> | xargs <cmd>' in
;; most cases. For example, to change the readership of all files
;; belonging to 'johnw' in the '/tmp' directory or lower, use:
;;
;; chmod go-r /tmp/**/*(u'johnw')
;;
;; The glob above matches all of the files beneath '/tmp' that are
;; owned by the user 'johnw'. See [Value modifiers and predicates],
;; for more information about argument predication.
;;; Code:
(require 'esh-arg)
(require 'esh-module)
(require 'esh-util)
;;;###esh-module-autoload
(progn
(defgroup eshell-glob nil
"This module provides extended globbing syntax, similar what is used
by zsh for filename generation."
:tag "Extended filename globbing"
:group 'eshell-module))
;;; User Variables:
(defcustom eshell-glob-load-hook nil
"A list of functions to run when `eshell-glob' is loaded."
:version "24.1" ; removed eshell-glob-initialize
:type 'hook
:group 'eshell-glob)
(defcustom eshell-glob-splice-results nil
"If non-nil, the results of glob patterns will be spliced in-place.
When splicing, the resulting command is as though the user typed
each result individually. Otherwise, the glob results a single
argument as a list."
:version "30.1"
:type 'boolean
:group 'eshell-glob)
(defcustom eshell-glob-include-dot-files nil
"If non-nil, glob patterns will match files beginning with a dot."
:type 'boolean
:group 'eshell-glob)
(defcustom eshell-glob-include-dot-dot t
"If non-nil, glob patterns that match dots will match . and .."
:type 'boolean
:group 'eshell-glob)
(defcustom eshell-glob-case-insensitive (not (not (eshell-under-windows-p)))
"If non-nil, glob pattern matching will ignore case."
:type 'boolean
:group 'eshell-glob)
(defcustom eshell-glob-show-progress nil
"If non-nil, display progress messages during a recursive glob.
This option slows down recursive glob processing by quite a bit."
:type 'boolean
:group 'eshell-glob)
(defcustom eshell-error-if-no-glob nil
"If non-nil, it is an error for a glob pattern not to match.
This mimics the behavior of zsh if non-nil, but bash if nil."
:type 'boolean
:group 'eshell-glob)
(defcustom eshell-glob-chars-list '(?\] ?\[ ?* ?? ?~ ?\( ?\) ?| ?# ?^)
"List of additional characters used in extended globbing."
:type '(repeat character)
:group 'eshell-glob)
(defcustom eshell-glob-translate-alist
'((?\] . "]")
(?\[ . "[")
(?^ . "^")
(?? . ".")
(?* . ".*")
(?~ . "~")
(?\( . "\\(")
(?\) . "\\)")
(?\| . "\\|")
(?# . (lambda (str pos)
(if (and (< (1+ pos) (length str))
(memq (aref str (1+ pos)) '(?* ?# ?+ ??)))
(cons (if (eq (aref str (1+ pos)) ??)
"?"
(if (eq (aref str (1+ pos)) ?*)
"*" "+")) (+ pos 2))
(cons "*" (1+ pos))))))
"An alist for translation of extended globbing characters."
:type '(alist :key-type character
:value-type (choice string function))
:group 'eshell-glob)
;;; Functions:
(defun eshell-glob-initialize () ;Called from `eshell-mode' via intern-soft!
"Initialize the extended globbing code."
;; it's important that `eshell-glob-chars-list' come first
(when (boundp 'eshell-special-chars-outside-quoting)
(setq-local eshell-special-chars-outside-quoting
(append eshell-glob-chars-list eshell-special-chars-outside-quoting)))
(add-hook 'eshell-parse-argument-hook 'eshell-parse-glob-chars 90 t)
(add-hook 'eshell-pre-rewrite-command-hook
'eshell-no-command-globbing nil t))
(defun eshell-no-command-globbing (terms)
"Don't glob the command argument. Reflect this by modifying TERMS."
(ignore
(pcase (car terms)
((or `(eshell-expand-glob ,term)
`(eshell-splice-args (eshell-expand-glob ,term)))
(setcar terms term)))))
(defun eshell-add-glob-modifier ()
"Add `eshell-expand-glob' to the argument modifier list."
(when eshell-glob-splice-results
(add-hook 'eshell-current-modifiers #'eshell-splice-args 99))
(add-hook 'eshell-current-modifiers #'eshell-expand-glob))
(defun eshell-parse-glob-chars ()
"Parse a globbing character."
(when (memq (char-after) eshell-glob-chars-list)
(eshell-add-glob-modifier)
(prog1
(propertize (char-to-string (char-after)) 'eshell-glob-char t)
(forward-char))))
(defvar eshell-glob-chars-regexp nil)
(defsubst eshell-glob-chars-regexp ()
"Return the lazily-created value for `eshell-glob-chars-regexp'."
(or eshell-glob-chars-regexp
(setq-local eshell-glob-chars-regexp
(rx-to-string `(+ (any ,@eshell-glob-chars-list)) t))))
(defun eshell-parse-glob-string (glob)
"Add text properties to glob characters in GLOB and return the result."
(let ((regexp (rx-to-string
`(or (seq (group-n 1 "\\") anychar)
(group-n 2 (regexp ,(eshell-glob-chars-regexp))))
t)))
(with-temp-buffer
(insert glob)
(goto-char (point-min))
(while (re-search-forward regexp nil t)
(cond
((match-beginning 1) ; Remove backslash escape.
(delete-region (match-beginning 1) (match-end 1)))
((match-beginning 2) ; Propertize globbing character.
(put-text-property (match-beginning 2) (match-end 2)
'eshell-glob-char t))))
(buffer-string))))
(defvar eshell-glob-matches)
(defvar message-shown)
(defvar eshell-glob-recursive-alist
'(("**/" . recurse)
("***/" . recurse-symlink)))
(defsubst eshell--glob-char-p (string index)
(get-text-property index 'eshell-glob-char string))
(defsubst eshell--contains-glob-char-p (string)
(text-property-any 0 (length string) 'eshell-glob-char t string))
(defun eshell--all-glob-chars-p (string)
(and (length> string 0)
(not (text-property-not-all
0 (length string) 'eshell-glob-char t string))))
(defun eshell-glob-regexp (pattern)
"Convert glob-pattern PATTERN to a regular expression.
The basic syntax is:
glob regexp meaning
---- ------ -------
? . matches any single character
* .* matches any group of characters (or none)
# * matches zero or more occurrences of preceding
## + matches one or more occurrences of preceding
(x) \\(x\\) makes `x' a regular expression group
| \\| boolean OR within an expression group
[a-b] [a-b] matches a character or range
[^a] [^a] excludes a character or range
This function only considers in PATTERN that have the text property
`eshell-glob-char' set to t for conversion from glob to regexp syntax.
All other characters are treated as literals. See also
`eshell-parse-glob-chars' and `eshell-parse-glob-string'."
(let ((matched-in-pattern 0) ; How much of PATTERN handled
regexp)
(while (string-match (eshell-glob-chars-regexp)
pattern matched-in-pattern)
(let* ((op-begin (match-beginning 0))
(op-char (aref pattern op-begin)))
(setq regexp
(concat regexp
(regexp-quote
(substring pattern matched-in-pattern op-begin))))
(if (not (eshell--glob-char-p pattern op-begin))
(setq regexp (concat regexp
(regexp-quote (char-to-string op-char)))
matched-in-pattern (1+ op-begin))
(let ((xlat (assq op-char eshell-glob-translate-alist)))
(if (not xlat)
(error "Unrecognized globbing character `%c'" op-char)
(if (stringp (cdr xlat))
(setq regexp (concat regexp (cdr xlat))
matched-in-pattern (1+ op-begin))
(let ((result (funcall (cdr xlat) pattern op-begin)))
(setq regexp (concat regexp (car result))
matched-in-pattern (cdr result)))))))))
(concat "\\`"
regexp
(regexp-quote (substring pattern matched-in-pattern))
"\\'")))
(defun eshell-glob-p (pattern)
"Return non-nil if PATTERN has any special glob characters."
(declare (obsolete nil "31.1"))
;; "~" is an infix globbing character, so one at the start of a glob
;; must be a literal.
(let ((start (if (string-prefix-p "~" pattern) 1 0)))
(string-match (eshell-glob-chars-regexp) pattern start)))
(defun eshell-glob-convert-1 (glob &optional last)
"Convert a GLOB matching a single element of a file name to regexps.
If LAST is non-nil, this glob is the last element of a file name.
The result is a pair of regexps, the first for file names to
include, and the second for ones to exclude."
(let ((len (length glob)) (index 1) (incl glob) excl)
;; We can't use `directory-file-name' because it strips away text
;; properties in the string.
(let ((last (1- (length incl))))
(when (eq (aref incl last) ?/)
(setq incl (substring incl 0 last))))
;; Split the glob if it contains a negation like x~y.
(while (and (eq incl glob)
(setq index (string-search "~" glob index)))
(if (or (not (eshell--glob-char-p glob index))
(= (1+ index) len))
(setq index (1+ index))
(setq incl (substring glob 0 index)
excl (substring glob (1+ index)))))
(setq incl (eshell-glob-regexp incl)
excl (and excl (eshell-glob-regexp excl)))
;; Exclude dot files if requested.
(if (or eshell-glob-include-dot-files
(eq (aref glob 0) ?.))
(unless (or eshell-glob-include-dot-dot
(not last))
(setq excl (if excl
(concat "\\(\\`\\.\\.?\\'\\|" excl "\\)")
"\\`\\.\\.?\\'")))
(setq excl (if excl
(concat "\\(\\`\\.\\|" excl "\\)")
"\\`\\.")))
(cons incl excl)))
(defun eshell-glob-convert (glob)
"Convert an Eshell glob-pattern GLOB to regexps.
The result is a list of three elements:
1. The base directory to search in.
2. A list containing elements of the following forms:
* Regexp pairs as generated by `eshell-glob-convert-1'.
* `recurse', indicating that searches should recurse into
subdirectories.
* `recurse-symlink', like `recurse', but also following
symlinks.
3. A boolean indicating whether to match directories only."
(let ((globs (eshell-split-filename glob))
(isdir (string-suffix-p "/" glob))
start-dir result last-saw-recursion)
(if (and (cdr globs)
(file-name-absolute-p (car globs)))
(setq start-dir (pop globs))
(setq start-dir (file-name-as-directory ".")))
(while globs
;; "~" is an infix globbing character, so one at the start of a
;; glob component must be a literal.
(when (eq (aref (car globs) 0) ?~)
(remove-text-properties 0 1 '(eshell-glob-char) (car globs)))
(if-let* ((recurse (cdr (assoc (car globs) eshell-glob-recursive-alist)))
((eshell--all-glob-chars-p
(string-trim-right (car globs) "/"))))
(if last-saw-recursion
(setcar result recurse)
(push recurse result)
(setq last-saw-recursion t))
(if (or result (eshell--contains-glob-char-p (car globs)))
(push (eshell-glob-convert-1 (car globs) (null (cdr globs)))
result)
;; We haven't seen a glob yet, so instead append to the start
;; directory.
(setq start-dir (concat start-dir (car globs))))
(setq last-saw-recursion nil))
(setq globs (cdr globs)))
(list start-dir
(nreverse result)
isdir)))
(defun eshell-expand-glob (glob)
"Return a list of files matched by GLOB.
Each globbing character in GLOB should have a non-nil value for the text
property `eshell-glob-char' (e.g. by `eshell-parse-glob-chars') in order
for it to have syntactic meaning; otherwise, this function treats the
character literally.
This function is primarily intended for use within Eshell command
forms. If you want to use an ordinary string as a glob, use
`eshell-extended-glob' instead."
(let ((globs (eshell-glob-convert glob))
eshell-glob-matches message-shown)
(unwind-protect
;; After examining GLOB, make sure we actually got some globs
;; before computing the results. We can get zero globs for
;; remote file names using "~", like "/ssh:remote:~/file.txt".
;; During Eshell argument parsing, we can't always be sure if
;; the "~" is a home directory reference or part of a glob
;; (e.g. if the argument was assembled from variables).
(when (cadr globs)
(apply #'eshell-glob-entries globs))
(when message-shown
(message nil)))
(cond
(eshell-glob-matches
(sort eshell-glob-matches #'string<))
((and eshell-error-if-no-glob (cadr globs))
(error "No matches found: %s" glob))
(t
(let ((result (substring-no-properties glob)))
(if eshell-glob-splice-results (list result) result))))))
(defun eshell-extended-glob (glob)
"Return a list of files matched by GLOB.
If no files match, signal an error (if `eshell-error-if-no-glob'
is non-nil), or otherwise return GLOB itself.
This function almost fully supports zsh style filename generation
syntax. Things that are not supported are:
^foo for matching everything but foo
(foo~bar) tilde within a parenthesis group
foo<1-10> numeric ranges
foo~x(a|b) (a|b) will be interpreted as a predicate/modifier list
Mainly they are not supported because file matching is done with Emacs
regular expressions, and these cannot support the above constructs."
(eshell-expand-glob (eshell-parse-glob-string glob)))
(defconst eshell--glob-anything (eshell-parse-glob-string "*"))
;; FIXME does this really need to abuse eshell-glob-matches, message-shown?
(defun eshell-glob-entries (path globs only-dirs)
"Match the entries in PATH against GLOBS.
GLOBS is a list of globs as converted by `eshell-glob-convert',
which see.
If ONLY-DIRS is non-nil, only match directories; otherwise, match
directories and files."
(let* ((entries (ignore-errors
(file-name-all-completions "" path)))
(case-fold-search eshell-glob-case-insensitive)
glob glob-remainder recurse-p)
(if (rassq (car globs) eshell-glob-recursive-alist)
(setq recurse-p (car globs)
glob (or (cadr globs)
(eshell-glob-convert-1 eshell--glob-anything t))
glob-remainder (cddr globs))
(setq glob (car globs)
glob-remainder (cdr globs)))
(when (and recurse-p eshell-glob-show-progress)
(message "Building file list...%d so far: %s"
(length eshell-glob-matches) path)
(setq message-shown t))
(when (equal path "./") (setq path ""))
(let ((incl (car glob))
(excl (cdr glob))
dirs rdirs)
(dolist (name entries)
(let* ((len (length name))
(isdir (eq (aref name (1- len)) ?/))
pathname)
(when (let ((fname (directory-file-name name)))
(and (not (and excl (string-match excl fname)))
(string-match incl fname)))
(if glob-remainder
(when isdir
(push (concat path name) dirs))
(when (or (not only-dirs)
(and isdir
(not (and (eq recurse-p 'recurse)
(file-symlink-p
(directory-file-name
(concat path name)))))))
(push (concat path name) eshell-glob-matches))))
(when (and recurse-p isdir
(not (member name '("./" "../")))
(setq pathname (concat path name))
(not (and (eq recurse-p 'recurse)
(file-symlink-p
(directory-file-name pathname)))))
(push pathname rdirs))))
(dolist (dir (nreverse dirs))
(eshell-glob-entries dir glob-remainder only-dirs))
(dolist (rdir (nreverse rdirs))
(eshell-glob-entries rdir globs only-dirs)))))
(provide 'em-glob)
;;; em-glob.el ends here