1693 lines
52 KiB
EmacsLisp
1693 lines
52 KiB
EmacsLisp
;;; rng-match.el --- matching of RELAX NG patterns against XML events -*- lexical-binding:t -*-
|
|
|
|
;; Copyright (C) 2003, 2007-2024 Free Software Foundation, Inc.
|
|
|
|
;; Author: James Clark
|
|
;; Keywords: text, hypermedia, languages, XML, RelaxNG
|
|
|
|
;; This file is part of GNU Emacs.
|
|
|
|
;; GNU Emacs is free software: you can redistribute it and/or modify
|
|
;; it under the terms of the GNU General Public License as published by
|
|
;; the Free Software Foundation, either version 3 of the License, or
|
|
;; (at your option) any later version.
|
|
|
|
;; GNU Emacs is distributed in the hope that it will be useful,
|
|
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
;; GNU General Public License for more details.
|
|
|
|
;; You should have received a copy of the GNU General Public License
|
|
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
;;; Commentary:
|
|
|
|
;; This uses the algorithm described in
|
|
;; http://www.thaiopensource.com/relaxng/derivative.html
|
|
;;
|
|
;; The schema to be used is contained in the variable
|
|
;; rng-current-schema. It has the form described in the file
|
|
;; rng-pttrn.el.
|
|
;;
|
|
;;; Code:
|
|
|
|
(require 'rng-pttrn)
|
|
(require 'rng-util)
|
|
(require 'rng-dt)
|
|
(eval-when-compile (require 'cl-lib))
|
|
|
|
(defvar rng-not-allowed-ipattern nil)
|
|
(defvar rng-empty-ipattern nil)
|
|
(defvar rng-text-ipattern nil)
|
|
|
|
(defvar rng-compile-table nil)
|
|
|
|
(defvar rng-being-compiled nil
|
|
"Contains a list of ref patterns currently being compiled.
|
|
Used to detect invalid recursive references.")
|
|
|
|
(defvar rng-ipattern-table nil)
|
|
|
|
(defvar rng-last-ipattern-index nil)
|
|
|
|
(defvar rng-match-state nil
|
|
"An ipattern representing the current state of validation.")
|
|
|
|
;;; Inline functions
|
|
|
|
(defsubst rng-update-match-state (new-state)
|
|
(if (eq new-state rng-not-allowed-ipattern)
|
|
(eq rng-match-state rng-not-allowed-ipattern)
|
|
(setq rng-match-state new-state)
|
|
t))
|
|
|
|
;;; Interned patterns
|
|
|
|
(cl-defstruct (rng--ipattern
|
|
(:constructor nil)
|
|
(:type vector)
|
|
(:copier nil)
|
|
(:constructor rng-make-ipattern
|
|
(type index name-class child nullable)))
|
|
type
|
|
index
|
|
name-class ;; Field also known as: `datatype' and `after'.
|
|
child ;; Field also known as: `value-object'.
|
|
nullable
|
|
(memo-text-typed 'unknown)
|
|
memo-map-start-tag-open-deriv
|
|
memo-map-start-attribute-deriv
|
|
memo-start-tag-close-deriv
|
|
memo-text-only-deriv
|
|
memo-mixed-text-deriv
|
|
memo-map-data-deriv
|
|
memo-end-tag-deriv)
|
|
|
|
;; I think depending on the value of `type' the two fields after `index'
|
|
;; are used sometimes for different purposes, hence the aliases here:
|
|
(defalias 'rng--ipattern-datatype 'rng--ipattern-name-class)
|
|
(defalias 'rng--ipattern-after 'rng--ipattern-name-class)
|
|
(defalias 'rng--ipattern-value-object 'rng--ipattern-child)
|
|
|
|
(defconst rng-memo-map-alist-max 10)
|
|
|
|
(defsubst rng-memo-map-get (key mm)
|
|
"Return the value associated with KEY in memo-map MM."
|
|
(let ((found (assoc key mm)))
|
|
(if found
|
|
(cdr found)
|
|
(and mm
|
|
(let ((head (car mm)))
|
|
(and (hash-table-p head)
|
|
(gethash key head)))))))
|
|
|
|
(defun rng-memo-map-add (key value mm &optional weakness)
|
|
"Associate KEY with VALUE in memo-map MM and return the new memo-map.
|
|
The new memo-map may or may not be a different object from MM.
|
|
|
|
Alists are better for small maps. Hash tables are better for large
|
|
maps. A memo-map therefore starts off as an alist and switches to a
|
|
hash table for large memo-maps. A memo-map is always a list. An empty
|
|
memo-map is represented by nil. A large memo-map is represented by a
|
|
list containing just a hash-table. A small memo map is represented by
|
|
a list whose cdr is an alist and whose car is the number of entries in
|
|
the alist. The complete memo-map can be passed to `assoc' without
|
|
problems: assoc ignores any members that are not cons cells. There is
|
|
therefore minimal overhead in successful lookups on small lists
|
|
\(which is the most common case)."
|
|
(if (null mm)
|
|
(list 1 (cons key value))
|
|
(let ((head (car mm)))
|
|
(cond ((hash-table-p head)
|
|
(puthash key value head)
|
|
mm)
|
|
((>= head rng-memo-map-alist-max)
|
|
(let ((ht (make-hash-table :test 'equal
|
|
:weakness weakness
|
|
:size (* 2 rng-memo-map-alist-max))))
|
|
(setq mm (cdr mm))
|
|
(while mm
|
|
(setq head (car mm))
|
|
(puthash (car head) (cdr head) ht)
|
|
(setq mm (cdr mm)))
|
|
(cons ht nil)))
|
|
(t (cons (1+ head)
|
|
(cons (cons key value)
|
|
(cdr mm))))))))
|
|
|
|
(defun rng-ipattern-maybe-init ()
|
|
(unless rng-ipattern-table
|
|
(setq rng-ipattern-table (make-hash-table :test 'equal))
|
|
(setq rng-last-ipattern-index -1)))
|
|
|
|
(defun rng-ipattern-clear ()
|
|
(when rng-ipattern-table
|
|
(clrhash rng-ipattern-table))
|
|
(setq rng-last-ipattern-index -1))
|
|
|
|
(defsubst rng-gen-ipattern-index ()
|
|
(setq rng-last-ipattern-index (1+ rng-last-ipattern-index)))
|
|
|
|
(defun rng-put-ipattern (key type name-class child nullable)
|
|
(let ((ipattern
|
|
(rng-make-ipattern type
|
|
(rng-gen-ipattern-index)
|
|
name-class
|
|
child
|
|
nullable)))
|
|
(puthash key ipattern rng-ipattern-table)
|
|
ipattern))
|
|
|
|
(defun rng-get-ipattern (key)
|
|
(gethash key rng-ipattern-table))
|
|
|
|
(or rng-not-allowed-ipattern
|
|
(setq rng-not-allowed-ipattern
|
|
(rng-make-ipattern 'not-allowed -3 nil nil nil)))
|
|
|
|
(or rng-empty-ipattern
|
|
(setq rng-empty-ipattern
|
|
(rng-make-ipattern 'empty -2 nil nil t)))
|
|
|
|
(or rng-text-ipattern
|
|
(setq rng-text-ipattern
|
|
(rng-make-ipattern 'text -1 nil nil t)))
|
|
|
|
(defconst rng-const-ipatterns
|
|
(list rng-not-allowed-ipattern
|
|
rng-empty-ipattern
|
|
rng-text-ipattern))
|
|
|
|
(defun rng-intern-after (child after)
|
|
(if (eq child rng-not-allowed-ipattern)
|
|
rng-not-allowed-ipattern
|
|
(let ((key (list 'after
|
|
(rng--ipattern-index child)
|
|
(rng--ipattern-index after))))
|
|
(or (rng-get-ipattern key)
|
|
(rng-put-ipattern key
|
|
'after
|
|
after
|
|
child
|
|
nil)))))
|
|
|
|
(defun rng-intern-attribute (name-class ipattern)
|
|
(if (eq ipattern rng-not-allowed-ipattern)
|
|
rng-not-allowed-ipattern
|
|
(let ((key (list 'attribute
|
|
name-class
|
|
(rng--ipattern-index ipattern))))
|
|
(or (rng-get-ipattern key)
|
|
(rng-put-ipattern key
|
|
'attribute
|
|
name-class
|
|
ipattern
|
|
nil)))))
|
|
|
|
(defun rng-intern-data (dt matches-anything)
|
|
(let ((key (list 'data dt)))
|
|
(or (rng-get-ipattern key)
|
|
(let ((ipattern (rng-put-ipattern key
|
|
'data
|
|
dt
|
|
nil
|
|
matches-anything)))
|
|
(setf (rng--ipattern-memo-text-typed ipattern)
|
|
(not matches-anything))
|
|
ipattern))))
|
|
|
|
(defun rng-intern-data-except (dt ipattern)
|
|
(let ((key (list 'data-except dt ipattern)))
|
|
(or (rng-get-ipattern key)
|
|
(rng-put-ipattern key
|
|
'data-except
|
|
dt
|
|
ipattern
|
|
nil))))
|
|
|
|
(defun rng-intern-value (dt obj)
|
|
(let ((key (list 'value dt obj)))
|
|
(or (rng-get-ipattern key)
|
|
(rng-put-ipattern key
|
|
'value
|
|
dt
|
|
obj
|
|
nil))))
|
|
|
|
(defun rng-intern-one-or-more (ipattern)
|
|
(or (rng-intern-one-or-more-shortcut ipattern)
|
|
(let ((key (cons 'one-or-more
|
|
(list (rng--ipattern-index ipattern)))))
|
|
(or (rng-get-ipattern key)
|
|
(rng-put-ipattern key
|
|
'one-or-more
|
|
nil
|
|
ipattern
|
|
(rng--ipattern-nullable ipattern))))))
|
|
|
|
(defun rng-intern-one-or-more-shortcut (ipattern)
|
|
(cond ((eq ipattern rng-not-allowed-ipattern)
|
|
rng-not-allowed-ipattern)
|
|
((eq ipattern rng-empty-ipattern)
|
|
rng-empty-ipattern)
|
|
((eq (rng--ipattern-type ipattern) 'one-or-more)
|
|
ipattern)
|
|
(t nil)))
|
|
|
|
(defun rng-intern-list (ipattern)
|
|
(if (eq ipattern rng-not-allowed-ipattern)
|
|
rng-not-allowed-ipattern
|
|
(let ((key (cons 'list
|
|
(list (rng--ipattern-index ipattern)))))
|
|
(or (rng-get-ipattern key)
|
|
(rng-put-ipattern key
|
|
'list
|
|
nil
|
|
ipattern
|
|
nil)))))
|
|
|
|
(defun rng-intern-group (ipatterns)
|
|
"Return an ipattern for the list of group members in IPATTERNS."
|
|
(or (rng-intern-group-shortcut ipatterns)
|
|
(let* ((tem (rng-normalize-group-list ipatterns))
|
|
(normalized (cdr tem)))
|
|
(or (rng-intern-group-shortcut normalized)
|
|
(let ((key (cons 'group
|
|
(mapcar #'rng--ipattern-index normalized))))
|
|
(or (rng-get-ipattern key)
|
|
(rng-put-ipattern key
|
|
'group
|
|
nil
|
|
normalized
|
|
(car tem))))))))
|
|
|
|
(defun rng-intern-group-shortcut (ipatterns)
|
|
"Try to shortcut interning a group list.
|
|
If successful, return the interned pattern. Otherwise return nil."
|
|
(while (and ipatterns
|
|
(eq (car ipatterns) rng-empty-ipattern))
|
|
(setq ipatterns (cdr ipatterns)))
|
|
(if ipatterns
|
|
(let ((ret (car ipatterns)))
|
|
(if (eq ret rng-not-allowed-ipattern)
|
|
rng-not-allowed-ipattern
|
|
(setq ipatterns (cdr ipatterns))
|
|
(while (and ipatterns ret)
|
|
(let ((tem (car ipatterns)))
|
|
(cond ((eq tem rng-not-allowed-ipattern)
|
|
(setq ret tem)
|
|
(setq ipatterns nil))
|
|
((eq tem rng-empty-ipattern)
|
|
(setq ipatterns (cdr ipatterns)))
|
|
(t
|
|
;; Stop here rather than continuing
|
|
;; looking for not-allowed patterns.
|
|
;; We do a complete scan elsewhere.
|
|
(setq ret nil)))))
|
|
ret))
|
|
rng-empty-ipattern))
|
|
|
|
(defun rng-normalize-group-list (ipatterns)
|
|
"Normalize a list containing members of a group.
|
|
Expands nested groups, removes empty members, handles notAllowed.
|
|
Returns a pair whose car says whether the list is nullable and whose
|
|
cdr is the normalized list."
|
|
(let ((nullable t)
|
|
(result nil)
|
|
member)
|
|
(while ipatterns
|
|
(setq member (car ipatterns))
|
|
(setq ipatterns (cdr ipatterns))
|
|
(when nullable
|
|
(setq nullable (rng--ipattern-nullable member)))
|
|
(cond ((eq (rng--ipattern-type member) 'group)
|
|
(setq result
|
|
(nconc (reverse (rng--ipattern-child member))
|
|
result)))
|
|
((eq member rng-not-allowed-ipattern)
|
|
(setq result (list rng-not-allowed-ipattern))
|
|
(setq ipatterns nil))
|
|
((not (eq member rng-empty-ipattern))
|
|
(setq result (cons member result)))))
|
|
(cons nullable (nreverse result))))
|
|
|
|
(defun rng-intern-interleave (ipatterns)
|
|
(or (rng-intern-group-shortcut ipatterns)
|
|
(let* ((tem (rng-normalize-interleave-list ipatterns))
|
|
(normalized (cdr tem)))
|
|
(or (rng-intern-group-shortcut normalized)
|
|
(let ((key (cons 'interleave
|
|
(mapcar #'rng--ipattern-index normalized))))
|
|
(or (rng-get-ipattern key)
|
|
(rng-put-ipattern key
|
|
'interleave
|
|
nil
|
|
normalized
|
|
(car tem))))))))
|
|
|
|
(defun rng-normalize-interleave-list (ipatterns)
|
|
"Normalize a list containing members of an interleave.
|
|
Expands nested groups, removes empty members, handles notAllowed.
|
|
Returns a pair whose car says whether the list is nullable and whose
|
|
cdr is the normalized list."
|
|
(let ((nullable t)
|
|
(result nil)
|
|
member)
|
|
(while ipatterns
|
|
(setq member (car ipatterns))
|
|
(setq ipatterns (cdr ipatterns))
|
|
(when nullable
|
|
(setq nullable (rng--ipattern-nullable member)))
|
|
(cond ((eq (rng--ipattern-type member) 'interleave)
|
|
(setq result
|
|
(append (rng--ipattern-child member)
|
|
result)))
|
|
((eq member rng-not-allowed-ipattern)
|
|
(setq result (list rng-not-allowed-ipattern))
|
|
(setq ipatterns nil))
|
|
((not (eq member rng-empty-ipattern))
|
|
(setq result (cons member result)))))
|
|
(cons nullable (sort result 'rng-compare-ipattern))))
|
|
|
|
;; Would be cleaner if this didn't modify IPATTERNS.
|
|
|
|
(defun rng-intern-choice (ipatterns)
|
|
"Return a choice ipattern for the list of choices in IPATTERNS.
|
|
May alter IPATTERNS."
|
|
(or (rng-intern-choice-shortcut ipatterns)
|
|
(let* ((tem (rng-normalize-choice-list ipatterns))
|
|
(normalized (cdr tem)))
|
|
(or (rng-intern-choice-shortcut normalized)
|
|
(rng-intern-choice1 normalized (car tem))))))
|
|
|
|
(defun rng-intern-optional (ipattern)
|
|
(cond ((rng--ipattern-nullable ipattern) ipattern)
|
|
((eq ipattern rng-not-allowed-ipattern) rng-empty-ipattern)
|
|
(t (rng-intern-choice1
|
|
;; This is sorted since the empty pattern
|
|
;; is before everything except not allowed.
|
|
;; It cannot have a duplicate empty pattern,
|
|
;; since it is not nullable.
|
|
(cons rng-empty-ipattern
|
|
(if (eq (rng--ipattern-type ipattern) 'choice)
|
|
(rng--ipattern-child ipattern)
|
|
(list ipattern)))
|
|
t))))
|
|
|
|
|
|
(defun rng-intern-choice1 (normalized nullable)
|
|
(let ((key (cons 'choice
|
|
(mapcar #'rng--ipattern-index normalized))))
|
|
(or (rng-get-ipattern key)
|
|
(rng-put-ipattern key
|
|
'choice
|
|
nil
|
|
normalized
|
|
nullable))))
|
|
|
|
(defun rng-intern-choice-shortcut (ipatterns)
|
|
"Try to shortcut interning a choice list.
|
|
If successful, return the interned pattern. Otherwise return nil."
|
|
(while (and ipatterns
|
|
(eq (car ipatterns)
|
|
rng-not-allowed-ipattern))
|
|
(setq ipatterns (cdr ipatterns)))
|
|
(if ipatterns
|
|
(let ((ret (car ipatterns)))
|
|
(setq ipatterns (cdr ipatterns))
|
|
(while (and ipatterns ret)
|
|
(or (eq (car ipatterns) rng-not-allowed-ipattern)
|
|
(eq (car ipatterns) ret)
|
|
(setq ret nil))
|
|
(setq ipatterns (cdr ipatterns)))
|
|
ret)
|
|
rng-not-allowed-ipattern))
|
|
|
|
(defun rng-normalize-choice-list (ipatterns)
|
|
"Normalize a list of choices.
|
|
Expands nested choices, removes not-allowed members, sorts by index
|
|
and removes duplicates. Return a pair whose car says whether the
|
|
list is nullable and whose cdr is the normalized list."
|
|
(let ((sorted t)
|
|
(nullable nil)
|
|
(head (cons nil ipatterns)))
|
|
(let ((tail head)
|
|
(final-tail nil)
|
|
(prev-index -100)
|
|
(cur ipatterns)
|
|
member)
|
|
;; the cdr of tail is always cur
|
|
(while cur
|
|
(setq member (car cur))
|
|
(or nullable
|
|
(setq nullable (rng--ipattern-nullable member)))
|
|
(cond ((eq (rng--ipattern-type member) 'choice)
|
|
(setq final-tail
|
|
(append (rng--ipattern-child member)
|
|
final-tail))
|
|
(setq cur (cdr cur))
|
|
(setq sorted nil)
|
|
(setcdr tail cur))
|
|
((eq member rng-not-allowed-ipattern)
|
|
(setq cur (cdr cur))
|
|
(setcdr tail cur))
|
|
(t
|
|
(if (and sorted
|
|
(let ((cur-index (rng--ipattern-index member)))
|
|
(if (>= prev-index cur-index)
|
|
(or (= prev-index cur-index) ; will remove it
|
|
(setq sorted nil)) ; won't remove it
|
|
(setq prev-index cur-index)
|
|
;; won't remove it
|
|
nil)))
|
|
(progn
|
|
;; remove it
|
|
(setq cur (cdr cur))
|
|
(setcdr tail cur))
|
|
;; don't remove it
|
|
(setq tail cur)
|
|
(setq cur (cdr cur))))))
|
|
(setcdr tail final-tail))
|
|
(setq head (cdr head))
|
|
(cons nullable
|
|
(if sorted
|
|
head
|
|
(seq-uniq (sort head 'rng-compare-ipattern) #'eq)))))
|
|
|
|
(defun rng-compare-ipattern (p1 p2)
|
|
(< (rng--ipattern-index p1)
|
|
(rng--ipattern-index p2)))
|
|
|
|
;;; Name classes
|
|
|
|
(defsubst rng-name-class-contains (nc nm)
|
|
(if (consp nc)
|
|
(equal nm nc)
|
|
(rng-name-class-contains1 nc nm)))
|
|
|
|
(defun rng-name-class-contains1 (nc nm)
|
|
(let ((type (aref nc 0)))
|
|
(cond ((eq type 'any-name) t)
|
|
((eq type 'any-name-except)
|
|
(not (rng-name-class-contains (aref nc 1) nm)))
|
|
((eq type 'ns-name)
|
|
(eq (car nm) (aref nc 1)))
|
|
((eq type 'ns-name-except)
|
|
(and (eq (car nm) (aref nc 1))
|
|
(not (rng-name-class-contains (aref nc 2) nm))))
|
|
((eq type 'choice)
|
|
(let ((choices (aref nc 1))
|
|
(ret nil))
|
|
(while choices
|
|
(if (rng-name-class-contains (car choices) nm)
|
|
(progn
|
|
(setq choices nil)
|
|
(setq ret t))
|
|
(setq choices (cdr choices))))
|
|
ret)))))
|
|
|
|
(defun rng-name-class-possible-names (nc accum)
|
|
"Return a list of possible names that nameclass NC can match.
|
|
|
|
Each possible name should be returned as a (NAMESPACE . LOCAL-NAME)
|
|
pair, where NAMESPACE is a symbol or nil and LOCAL-NAME is a string.
|
|
NAMESPACE, if nil, matches the absent namespace. ACCUM is a list of
|
|
names which should be appended to the returned list. The returned
|
|
list may contain duplicates."
|
|
(if (consp nc)
|
|
(cons nc accum)
|
|
(when (eq (aref nc 0) 'choice)
|
|
(let ((members (aref nc 1)) member)
|
|
(while members
|
|
(setq member (car members))
|
|
(setq accum
|
|
(if (consp member)
|
|
(cons member accum)
|
|
(rng-name-class-possible-names member
|
|
accum)))
|
|
(setq members (cdr members)))))
|
|
accum))
|
|
|
|
;;; Debugging utilities
|
|
|
|
(defun rng-ipattern-to-string (ipattern)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'after)
|
|
(concat (rng-ipattern-to-string
|
|
(rng--ipattern-child ipattern))
|
|
" </> "
|
|
(rng-ipattern-to-string
|
|
(rng--ipattern-after ipattern))))
|
|
((eq type 'element)
|
|
(concat "element "
|
|
(rng-name-class-to-string
|
|
(rng--ipattern-name-class ipattern))
|
|
;; we can get cycles with elements so don't print it out
|
|
" {...}"))
|
|
((eq type 'attribute)
|
|
(concat "attribute "
|
|
(rng-name-class-to-string
|
|
(rng--ipattern-name-class ipattern))
|
|
" { "
|
|
(rng-ipattern-to-string
|
|
(rng--ipattern-child ipattern))
|
|
" } "))
|
|
((eq type 'empty) "empty")
|
|
((eq type 'text) "text")
|
|
((eq type 'not-allowed) "notAllowed")
|
|
((eq type 'one-or-more)
|
|
(concat (rng-ipattern-to-string
|
|
(rng--ipattern-child ipattern))
|
|
"+"))
|
|
((eq type 'choice)
|
|
(concat "("
|
|
(mapconcat 'rng-ipattern-to-string
|
|
(rng--ipattern-child ipattern)
|
|
" | ")
|
|
")"))
|
|
((eq type 'group)
|
|
(concat "("
|
|
(mapconcat 'rng-ipattern-to-string
|
|
(rng--ipattern-child ipattern)
|
|
", ")
|
|
")"))
|
|
((eq type 'interleave)
|
|
(concat "("
|
|
(mapconcat 'rng-ipattern-to-string
|
|
(rng--ipattern-child ipattern)
|
|
" & ")
|
|
")"))
|
|
(t (symbol-name type)))))
|
|
|
|
(defun rng-name-class-to-string (nc)
|
|
(if (consp nc)
|
|
(cdr nc)
|
|
(let ((type (aref nc 0)))
|
|
(cond ((eq type 'choice)
|
|
(mapconcat 'rng-name-class-to-string
|
|
(aref nc 1)
|
|
"|"))
|
|
(t (concat (symbol-name type) "*"))))))
|
|
|
|
|
|
;;; Compiling
|
|
|
|
(defun rng-compile-maybe-init ()
|
|
(unless rng-compile-table
|
|
(setq rng-compile-table (make-hash-table :test 'eq))))
|
|
|
|
(defun rng-compile-clear ()
|
|
(when rng-compile-table
|
|
(clrhash rng-compile-table)))
|
|
|
|
(defun rng-compile (pattern)
|
|
(or (gethash pattern rng-compile-table)
|
|
(let ((ipattern (apply (get (car pattern) 'rng-compile)
|
|
(cdr pattern))))
|
|
(puthash pattern ipattern rng-compile-table)
|
|
ipattern)))
|
|
|
|
(put 'empty 'rng-compile 'rng-compile-empty)
|
|
(put 'text 'rng-compile 'rng-compile-text)
|
|
(put 'not-allowed 'rng-compile 'rng-compile-not-allowed)
|
|
(put 'element 'rng-compile 'rng-compile-element)
|
|
(put 'attribute 'rng-compile 'rng-compile-attribute)
|
|
(put 'choice 'rng-compile 'rng-compile-choice)
|
|
(put 'optional 'rng-compile 'rng-compile-optional)
|
|
(put 'group 'rng-compile 'rng-compile-group)
|
|
(put 'interleave 'rng-compile 'rng-compile-interleave)
|
|
(put 'ref 'rng-compile 'rng-compile-ref)
|
|
(put 'one-or-more 'rng-compile 'rng-compile-one-or-more)
|
|
(put 'zero-or-more 'rng-compile 'rng-compile-zero-or-more)
|
|
(put 'mixed 'rng-compile 'rng-compile-mixed)
|
|
(put 'data 'rng-compile 'rng-compile-data)
|
|
(put 'data-except 'rng-compile 'rng-compile-data-except)
|
|
(put 'value 'rng-compile 'rng-compile-value)
|
|
(put 'list 'rng-compile 'rng-compile-list)
|
|
|
|
(defun rng-compile-not-allowed () rng-not-allowed-ipattern)
|
|
(defun rng-compile-empty () rng-empty-ipattern)
|
|
(defun rng-compile-text () rng-text-ipattern)
|
|
|
|
(defun rng-compile-element (name-class pattern)
|
|
;; don't intern
|
|
(rng-make-ipattern 'element
|
|
(rng-gen-ipattern-index)
|
|
(rng-compile-name-class name-class)
|
|
pattern ; compile lazily
|
|
nil))
|
|
|
|
(defun rng-element-get-child (element)
|
|
(let ((tem (rng--ipattern-child element)))
|
|
(if (vectorp tem)
|
|
tem
|
|
(setf (rng--ipattern-child element) (rng-compile tem)))))
|
|
|
|
(defun rng-compile-attribute (name-class pattern)
|
|
(rng-intern-attribute (rng-compile-name-class name-class)
|
|
(rng-compile pattern)))
|
|
|
|
(defun rng-compile-ref (pattern name)
|
|
(and (memq pattern rng-being-compiled)
|
|
(rng-compile-error "Reference loop on symbol %s" name))
|
|
(setq rng-being-compiled
|
|
(cons pattern rng-being-compiled))
|
|
(unwind-protect
|
|
(rng-compile pattern)
|
|
(setq rng-being-compiled
|
|
(cdr rng-being-compiled))))
|
|
|
|
(defun rng-compile-one-or-more (pattern)
|
|
(rng-intern-one-or-more (rng-compile pattern)))
|
|
|
|
(defun rng-compile-zero-or-more (pattern)
|
|
(rng-intern-optional
|
|
(rng-intern-one-or-more (rng-compile pattern))))
|
|
|
|
(defun rng-compile-optional (pattern)
|
|
(rng-intern-optional (rng-compile pattern)))
|
|
|
|
(defun rng-compile-mixed (pattern)
|
|
(rng-intern-interleave (cons rng-text-ipattern
|
|
(list (rng-compile pattern)))))
|
|
|
|
(defun rng-compile-list (pattern)
|
|
(rng-intern-list (rng-compile pattern)))
|
|
|
|
(defun rng-compile-choice (&rest patterns)
|
|
(rng-intern-choice (mapcar 'rng-compile patterns)))
|
|
|
|
(defun rng-compile-group (&rest patterns)
|
|
(rng-intern-group (mapcar 'rng-compile patterns)))
|
|
|
|
(defun rng-compile-interleave (&rest patterns)
|
|
(rng-intern-interleave (mapcar 'rng-compile patterns)))
|
|
|
|
(defun rng-compile-dt (name params)
|
|
(let ((rng-dt-error-reporter 'rng-compile-error))
|
|
(funcall (let ((uri (car name)))
|
|
(or (get uri 'rng-dt-compile)
|
|
(rng-compile-error "Unknown datatype library %s" uri)))
|
|
(cdr name)
|
|
params)))
|
|
|
|
(defun rng-compile-data (name params)
|
|
(let ((dt (rng-compile-dt name params)))
|
|
(rng-intern-data (cdr dt) (car dt))))
|
|
|
|
(defun rng-compile-data-except (name params pattern)
|
|
(rng-intern-data-except (cdr (rng-compile-dt name params))
|
|
(rng-compile pattern)))
|
|
|
|
(defun rng-compile-value (name str context)
|
|
(let* ((dt (cdr (rng-compile-dt name '())))
|
|
(rng-dt-namespace-context-getter (list 'identity context))
|
|
(obj (rng-dt-make-value dt str)))
|
|
(if obj
|
|
(rng-intern-value dt obj)
|
|
(rng-compile-error "Value %s is not a valid instance of the datatype %s"
|
|
str
|
|
name))))
|
|
|
|
(defun rng-compile-name-class (nc)
|
|
(let ((type (car nc)))
|
|
(cond ((eq type 'name) (nth 1 nc))
|
|
((eq type 'any-name) [any-name])
|
|
((eq type 'any-name-except)
|
|
(vector 'any-name-except
|
|
(rng-compile-name-class (nth 1 nc))))
|
|
((eq type 'ns-name)
|
|
(vector 'ns-name (nth 1 nc)))
|
|
((eq type 'ns-name-except)
|
|
(vector 'ns-name-except
|
|
(nth 1 nc)
|
|
(rng-compile-name-class (nth 2 nc))))
|
|
((eq type 'choice)
|
|
(vector 'choice
|
|
(mapcar 'rng-compile-name-class (cdr nc))))
|
|
(t (error "Bad name-class type %s" type)))))
|
|
|
|
;;; Searching patterns
|
|
|
|
;; We write this non-recursively to avoid hitting max-lisp-eval-depth
|
|
;; on large schemas.
|
|
|
|
(defun rng-map-element-attribute (function pattern accum &rest args)
|
|
(let ((searched (make-hash-table :test 'eq))
|
|
type todo patterns)
|
|
(while (progn
|
|
(setq type (car pattern))
|
|
(cond ((memq type '(element attribute))
|
|
(setq accum
|
|
(apply function
|
|
(cons pattern
|
|
(cons accum args))))
|
|
(setq pattern (nth 2 pattern)))
|
|
((eq type 'ref)
|
|
(setq pattern (nth 1 pattern))
|
|
(if (gethash pattern searched)
|
|
(setq pattern nil)
|
|
(puthash pattern t searched)))
|
|
((memq type '(choice group interleave))
|
|
(setq todo (cons (cdr pattern) todo))
|
|
(setq pattern nil))
|
|
((memq type '(one-or-more
|
|
zero-or-more
|
|
optional
|
|
mixed))
|
|
(setq pattern (nth 1 pattern)))
|
|
(t (setq pattern nil)))
|
|
(cond (pattern)
|
|
(patterns
|
|
(setq pattern (car patterns))
|
|
(setq patterns (cdr patterns))
|
|
t)
|
|
(todo
|
|
(setq patterns (car todo))
|
|
(setq todo (cdr todo))
|
|
(setq pattern (car patterns))
|
|
(setq patterns (cdr patterns))
|
|
t))))
|
|
accum))
|
|
|
|
(defun rng-find-element-content-pattern (pattern accum name)
|
|
(if (and (eq (car pattern) 'element)
|
|
(rng-search-name name (nth 1 pattern)))
|
|
(cons (rng-compile (nth 2 pattern)) accum)
|
|
accum))
|
|
|
|
(defun rng-search-name (name nc)
|
|
(let ((type (car nc)))
|
|
(cond ((eq type 'name)
|
|
(equal (cadr nc) name))
|
|
((eq type 'choice)
|
|
(let ((choices (cdr nc))
|
|
(found nil))
|
|
(while (and choices (not found))
|
|
(if (rng-search-name name (car choices))
|
|
(setq found t)
|
|
(setq choices (cdr choices))))
|
|
found))
|
|
(t nil))))
|
|
|
|
(defun rng-find-name-class-uris (nc accum)
|
|
(let ((type (car nc)))
|
|
(cond ((eq type 'name)
|
|
(rng-accum-namespace-uri (car (nth 1 nc)) accum))
|
|
((memq type '(ns-name ns-name-except))
|
|
(rng-accum-namespace-uri (nth 1 nc) accum))
|
|
((eq type 'choice)
|
|
(let ((choices (cdr nc)))
|
|
(while choices
|
|
(setq accum
|
|
(rng-find-name-class-uris (car choices) accum))
|
|
(setq choices (cdr choices))))
|
|
accum)
|
|
(t accum))))
|
|
|
|
(defun rng-accum-namespace-uri (ns accum)
|
|
(if (and ns (not (memq ns accum)))
|
|
(cons ns accum)
|
|
accum))
|
|
|
|
;;; Derivatives
|
|
|
|
(defun rng-ipattern-text-typed-p (ipattern)
|
|
(let ((memo (rng--ipattern-memo-text-typed ipattern)))
|
|
(if (eq memo 'unknown)
|
|
(setf (rng--ipattern-memo-text-typed ipattern)
|
|
(rng-ipattern-compute-text-typed-p ipattern))
|
|
memo)))
|
|
|
|
(defun rng-ipattern-compute-text-typed-p (ipattern)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'choice)
|
|
(let ((cur (rng--ipattern-child ipattern))
|
|
(ret nil))
|
|
(while (and cur (not ret))
|
|
(if (rng-ipattern-text-typed-p (car cur))
|
|
(setq ret t)
|
|
(setq cur (cdr cur))))
|
|
ret))
|
|
((eq type 'group)
|
|
(let ((cur (rng--ipattern-child ipattern))
|
|
(ret nil)
|
|
member)
|
|
(while (and cur (not ret))
|
|
(setq member (car cur))
|
|
(if (rng-ipattern-text-typed-p member)
|
|
(setq ret t))
|
|
(setq cur
|
|
(and (rng--ipattern-nullable member)
|
|
(cdr cur))))
|
|
ret))
|
|
((eq type 'after)
|
|
(rng-ipattern-text-typed-p (rng--ipattern-child ipattern)))
|
|
(t (and (memq type '(value list data data-except)) t)))))
|
|
|
|
(defun rng-start-tag-open-deriv (ipattern nm)
|
|
(or (rng-memo-map-get
|
|
nm
|
|
(rng--ipattern-memo-map-start-tag-open-deriv ipattern))
|
|
(rng-ipattern-memo-start-tag-open-deriv
|
|
ipattern
|
|
nm
|
|
(rng-compute-start-tag-open-deriv ipattern nm))))
|
|
|
|
(defun rng-ipattern-memo-start-tag-open-deriv (ipattern nm deriv)
|
|
(or (memq ipattern rng-const-ipatterns)
|
|
(setf (rng--ipattern-memo-map-start-tag-open-deriv ipattern)
|
|
(rng-memo-map-add nm
|
|
deriv
|
|
(rng--ipattern-memo-map-start-tag-open-deriv
|
|
ipattern))))
|
|
deriv)
|
|
|
|
(defun rng-compute-start-tag-open-deriv (ipattern nm)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'choice)
|
|
(rng-transform-choice (lambda (p)
|
|
(rng-start-tag-open-deriv p nm))
|
|
ipattern))
|
|
((eq type 'element)
|
|
(if (rng-name-class-contains
|
|
(rng--ipattern-name-class ipattern)
|
|
nm)
|
|
(rng-intern-after (rng-element-get-child ipattern)
|
|
rng-empty-ipattern)
|
|
rng-not-allowed-ipattern))
|
|
((eq type 'group)
|
|
(rng-transform-group-nullable
|
|
(lambda (p) (rng-start-tag-open-deriv p nm))
|
|
'rng-cons-group-after
|
|
ipattern))
|
|
((eq type 'interleave)
|
|
(rng-transform-interleave-single
|
|
(lambda (p) (rng-start-tag-open-deriv p nm))
|
|
'rng-subst-interleave-after
|
|
ipattern))
|
|
((eq type 'one-or-more)
|
|
(let ((ip (rng-intern-optional ipattern)))
|
|
(rng-apply-after
|
|
(lambda (p) (rng-intern-group (list p ip)))
|
|
(rng-start-tag-open-deriv (rng--ipattern-child ipattern)
|
|
nm))))
|
|
((eq type 'after)
|
|
(let ((nip (rng--ipattern-after ipattern)))
|
|
(rng-apply-after
|
|
(lambda (p) (rng-intern-after p nip))
|
|
(rng-start-tag-open-deriv (rng--ipattern-child ipattern)
|
|
nm))))
|
|
(t rng-not-allowed-ipattern))))
|
|
|
|
(defun rng-start-attribute-deriv (ipattern nm)
|
|
(or (rng-memo-map-get
|
|
nm
|
|
(rng--ipattern-memo-map-start-attribute-deriv ipattern))
|
|
(rng-ipattern-memo-start-attribute-deriv
|
|
ipattern
|
|
nm
|
|
(rng-compute-start-attribute-deriv ipattern nm))))
|
|
|
|
(defun rng-ipattern-memo-start-attribute-deriv (ipattern nm deriv)
|
|
(or (memq ipattern rng-const-ipatterns)
|
|
(setf (rng--ipattern-memo-map-start-attribute-deriv ipattern)
|
|
(rng-memo-map-add
|
|
nm
|
|
deriv
|
|
(rng--ipattern-memo-map-start-attribute-deriv ipattern))))
|
|
deriv)
|
|
|
|
(defun rng-compute-start-attribute-deriv (ipattern nm)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'choice)
|
|
(rng-transform-choice (lambda (p)
|
|
(rng-start-attribute-deriv p nm))
|
|
ipattern))
|
|
((eq type 'attribute)
|
|
(if (rng-name-class-contains
|
|
(rng--ipattern-name-class ipattern)
|
|
nm)
|
|
(rng-intern-after (rng--ipattern-child ipattern)
|
|
rng-empty-ipattern)
|
|
rng-not-allowed-ipattern))
|
|
((eq type 'group)
|
|
(rng-transform-interleave-single
|
|
(lambda (p) (rng-start-attribute-deriv p nm))
|
|
'rng-subst-group-after
|
|
ipattern))
|
|
((eq type 'interleave)
|
|
(rng-transform-interleave-single
|
|
(lambda (p) (rng-start-attribute-deriv p nm))
|
|
'rng-subst-interleave-after
|
|
ipattern))
|
|
((eq type 'one-or-more)
|
|
(let ((ip (rng-intern-optional ipattern)))
|
|
(rng-apply-after
|
|
(lambda (p) (rng-intern-group (list p ip)))
|
|
(rng-start-attribute-deriv (rng--ipattern-child ipattern)
|
|
nm))))
|
|
((eq type 'after)
|
|
(let ((nip (rng--ipattern-after ipattern)))
|
|
(rng-apply-after
|
|
(lambda (p) (rng-intern-after p nip))
|
|
(rng-start-attribute-deriv (rng--ipattern-child ipattern)
|
|
nm))))
|
|
(t rng-not-allowed-ipattern))))
|
|
|
|
(defun rng-cons-group-after (x y)
|
|
(rng-apply-after (lambda (p) (rng-intern-group (cons p y)))
|
|
x))
|
|
|
|
(defun rng-subst-group-after (new old list)
|
|
(rng-apply-after (lambda (p)
|
|
(rng-intern-group (rng-substq p old list)))
|
|
new))
|
|
|
|
(defun rng-subst-interleave-after (new old list)
|
|
(rng-apply-after (lambda (p)
|
|
(rng-intern-interleave (rng-substq p old list)))
|
|
new))
|
|
|
|
(defun rng-apply-after (f ipattern)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'after)
|
|
(rng-intern-after
|
|
(rng--ipattern-child ipattern)
|
|
(funcall f (rng--ipattern-after ipattern))))
|
|
((eq type 'choice)
|
|
(rng-transform-choice (lambda (x) (rng-apply-after f x))
|
|
ipattern))
|
|
(t rng-not-allowed-ipattern))))
|
|
|
|
(defun rng-start-tag-close-deriv (ipattern)
|
|
(or (rng--ipattern-memo-start-tag-close-deriv ipattern)
|
|
(setf (rng--ipattern-memo-start-tag-close-deriv ipattern)
|
|
(rng-compute-start-tag-close-deriv ipattern))))
|
|
|
|
(defconst rng-transform-map
|
|
'((choice . rng-transform-choice)
|
|
(group . rng-transform-group)
|
|
(interleave . rng-transform-interleave)
|
|
(one-or-more . rng-transform-one-or-more)
|
|
(after . rng-transform-after-child)))
|
|
|
|
(defun rng-compute-start-tag-close-deriv (ipattern)
|
|
(let* ((type (rng--ipattern-type ipattern)))
|
|
(if (eq type 'attribute)
|
|
rng-not-allowed-ipattern
|
|
(let ((transform (assq type rng-transform-map)))
|
|
(if transform
|
|
(funcall (cdr transform)
|
|
'rng-start-tag-close-deriv
|
|
ipattern)
|
|
ipattern)))))
|
|
|
|
(defun rng-ignore-attributes-deriv (ipattern)
|
|
(let* ((type (rng--ipattern-type ipattern)))
|
|
(if (eq type 'attribute)
|
|
rng-empty-ipattern
|
|
(let ((transform (assq type rng-transform-map)))
|
|
(if transform
|
|
(funcall (cdr transform)
|
|
'rng-ignore-attributes-deriv
|
|
ipattern)
|
|
ipattern)))))
|
|
|
|
(defun rng-text-only-deriv (ipattern)
|
|
(or (rng--ipattern-memo-text-only-deriv ipattern)
|
|
(setf (rng--ipattern-memo-text-only-deriv ipattern)
|
|
(rng-compute-text-only-deriv ipattern))))
|
|
|
|
(defun rng-compute-text-only-deriv (ipattern)
|
|
(let* ((type (rng--ipattern-type ipattern)))
|
|
(if (eq type 'element)
|
|
rng-not-allowed-ipattern
|
|
(let ((transform (assq type
|
|
'((choice . rng-transform-choice)
|
|
(group . rng-transform-group)
|
|
(interleave . rng-transform-interleave)
|
|
(one-or-more . rng-transform-one-or-more)
|
|
(after . rng-transform-after-child)))))
|
|
(if transform
|
|
(funcall (cdr transform)
|
|
'rng-text-only-deriv
|
|
ipattern)
|
|
ipattern)))))
|
|
|
|
(defun rng-mixed-text-deriv (ipattern)
|
|
(or (rng--ipattern-memo-mixed-text-deriv ipattern)
|
|
(setf (rng--ipattern-memo-mixed-text-deriv ipattern)
|
|
(rng-compute-mixed-text-deriv ipattern))))
|
|
|
|
(defun rng-compute-mixed-text-deriv (ipattern)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'text) ipattern)
|
|
((eq type 'after)
|
|
(rng-transform-after-child 'rng-mixed-text-deriv
|
|
ipattern))
|
|
((eq type 'choice)
|
|
(rng-transform-choice 'rng-mixed-text-deriv
|
|
ipattern))
|
|
((eq type 'one-or-more)
|
|
(rng-intern-group
|
|
(list (rng-mixed-text-deriv
|
|
(rng--ipattern-child ipattern))
|
|
(rng-intern-optional ipattern))))
|
|
((eq type 'group)
|
|
(rng-transform-group-nullable
|
|
'rng-mixed-text-deriv
|
|
(lambda (x y) (rng-intern-group (cons x y)))
|
|
ipattern))
|
|
((eq type 'interleave)
|
|
(rng-transform-interleave-single
|
|
'rng-mixed-text-deriv
|
|
(lambda (new old list) (rng-intern-interleave
|
|
(rng-substq new old list)))
|
|
ipattern))
|
|
((and (eq type 'data)
|
|
(not (rng--ipattern-memo-text-typed ipattern)))
|
|
ipattern)
|
|
(t rng-not-allowed-ipattern))))
|
|
|
|
(defun rng-end-tag-deriv (ipattern)
|
|
(or (rng--ipattern-memo-end-tag-deriv ipattern)
|
|
(setf (rng--ipattern-memo-end-tag-deriv ipattern)
|
|
(rng-compute-end-tag-deriv ipattern))))
|
|
|
|
(defun rng-compute-end-tag-deriv (ipattern)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'choice)
|
|
(rng-intern-choice
|
|
(mapcar 'rng-end-tag-deriv
|
|
(rng--ipattern-child ipattern))))
|
|
((eq type 'after)
|
|
(if (rng--ipattern-nullable
|
|
(rng--ipattern-child ipattern))
|
|
(rng--ipattern-after ipattern)
|
|
rng-not-allowed-ipattern))
|
|
(t rng-not-allowed-ipattern))))
|
|
|
|
(defun rng-data-deriv (ipattern value)
|
|
(or (rng-memo-map-get value
|
|
(rng--ipattern-memo-map-data-deriv ipattern))
|
|
(and (rng-memo-map-get
|
|
(cons value (rng-namespace-context-get-no-trace))
|
|
(rng--ipattern-memo-map-data-deriv ipattern))
|
|
(rng-memo-map-get
|
|
(cons value (apply (car rng-dt-namespace-context-getter)
|
|
(cdr rng-dt-namespace-context-getter)))
|
|
(rng--ipattern-memo-map-data-deriv ipattern)))
|
|
(let* ((used-context (vector nil))
|
|
(rng-dt-namespace-context-getter
|
|
(cons 'rng-namespace-context-tracer
|
|
(cons used-context
|
|
rng-dt-namespace-context-getter)))
|
|
(deriv (rng-compute-data-deriv ipattern value)))
|
|
(rng-ipattern-memo-data-deriv ipattern
|
|
value
|
|
(aref used-context 0)
|
|
deriv))))
|
|
|
|
(defun rng-namespace-context-tracer (used getter &rest args)
|
|
(let ((context (apply getter args)))
|
|
(aset used 0 context)
|
|
context))
|
|
|
|
(defun rng-namespace-context-get-no-trace ()
|
|
(let ((tem rng-dt-namespace-context-getter))
|
|
(while (and tem (eq (car tem) 'rng-namespace-context-tracer))
|
|
(setq tem (cddr tem)))
|
|
(apply (car tem) (cdr tem))))
|
|
|
|
(defconst rng-memo-data-deriv-max-length 80
|
|
"Don't memoize data-derivs for values longer than this.")
|
|
|
|
(defun rng-ipattern-memo-data-deriv (ipattern value context deriv)
|
|
(or (memq ipattern rng-const-ipatterns)
|
|
(> (length value) rng-memo-data-deriv-max-length)
|
|
(setf (rng--ipattern-memo-map-data-deriv ipattern)
|
|
(rng-memo-map-add (if context (cons value context) value)
|
|
deriv
|
|
(rng--ipattern-memo-map-data-deriv ipattern)
|
|
t)))
|
|
deriv)
|
|
|
|
(defun rng-compute-data-deriv (ipattern value)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'text) ipattern)
|
|
((eq type 'choice)
|
|
(rng-transform-choice (lambda (p) (rng-data-deriv p value))
|
|
ipattern))
|
|
((eq type 'group)
|
|
(rng-transform-group-nullable
|
|
(lambda (p) (rng-data-deriv p value))
|
|
(lambda (x y) (rng-intern-group (cons x y)))
|
|
ipattern))
|
|
((eq type 'one-or-more)
|
|
(rng-intern-group (list (rng-data-deriv
|
|
(rng--ipattern-child ipattern)
|
|
value)
|
|
(rng-intern-optional ipattern))))
|
|
((eq type 'after)
|
|
(let ((child (rng--ipattern-child ipattern)))
|
|
(if (or (rng--ipattern-nullable
|
|
(rng-data-deriv child value))
|
|
(and (rng--ipattern-nullable child)
|
|
(string-blank-p value)))
|
|
(rng--ipattern-after ipattern)
|
|
rng-not-allowed-ipattern)))
|
|
((eq type 'data)
|
|
(if (rng-dt-make-value (rng--ipattern-datatype ipattern)
|
|
value)
|
|
rng-empty-ipattern
|
|
rng-not-allowed-ipattern))
|
|
((eq type 'data-except)
|
|
(if (and (rng-dt-make-value (rng--ipattern-datatype ipattern)
|
|
value)
|
|
(not (rng--ipattern-nullable
|
|
(rng-data-deriv
|
|
(rng--ipattern-child ipattern)
|
|
value))))
|
|
rng-empty-ipattern
|
|
rng-not-allowed-ipattern))
|
|
((eq type 'value)
|
|
(if (equal (rng-dt-make-value (rng--ipattern-datatype ipattern)
|
|
value)
|
|
(rng--ipattern-value-object ipattern))
|
|
rng-empty-ipattern
|
|
rng-not-allowed-ipattern))
|
|
((eq type 'list)
|
|
(let ((tokens (split-string value))
|
|
(state (rng--ipattern-child ipattern)))
|
|
(while (and tokens
|
|
(not (eq state rng-not-allowed-ipattern)))
|
|
(setq state (rng-data-deriv state (car tokens)))
|
|
(setq tokens (cdr tokens)))
|
|
(if (rng--ipattern-nullable state)
|
|
rng-empty-ipattern
|
|
rng-not-allowed-ipattern)))
|
|
;; don't think interleave can occur
|
|
;; since we do text-only-deriv first
|
|
(t rng-not-allowed-ipattern))))
|
|
|
|
(defun rng-transform-multi (f ipattern interner)
|
|
(let* ((members (rng--ipattern-child ipattern))
|
|
(transformed (mapcar f members)))
|
|
(if (rng-members-eq members transformed)
|
|
ipattern
|
|
(funcall interner transformed))))
|
|
|
|
(defun rng-transform-choice (f ipattern)
|
|
(rng-transform-multi f ipattern 'rng-intern-choice))
|
|
|
|
(defun rng-transform-group (f ipattern)
|
|
(rng-transform-multi f ipattern 'rng-intern-group))
|
|
|
|
(defun rng-transform-interleave (f ipattern)
|
|
(rng-transform-multi f ipattern 'rng-intern-interleave))
|
|
|
|
(defun rng-transform-one-or-more (f ipattern)
|
|
(let* ((child (rng--ipattern-child ipattern))
|
|
(transformed (funcall f child)))
|
|
(if (eq child transformed)
|
|
ipattern
|
|
(rng-intern-one-or-more transformed))))
|
|
|
|
(defun rng-transform-after-child (f ipattern)
|
|
(let* ((child (rng--ipattern-child ipattern))
|
|
(transformed (funcall f child)))
|
|
(if (eq child transformed)
|
|
ipattern
|
|
(rng-intern-after transformed
|
|
(rng--ipattern-after ipattern)))))
|
|
|
|
(defun rng-transform-interleave-single (f subster ipattern)
|
|
(let ((children (rng--ipattern-child ipattern))
|
|
found)
|
|
(while (and children (not found))
|
|
(let* ((child (car children))
|
|
(transformed (funcall f child)))
|
|
(if (eq transformed rng-not-allowed-ipattern)
|
|
(setq children (cdr children))
|
|
(setq found
|
|
(funcall subster
|
|
transformed
|
|
child
|
|
(rng--ipattern-child ipattern))))))
|
|
(or found
|
|
rng-not-allowed-ipattern)))
|
|
|
|
(defun rng-transform-group-nullable (f conser ipattern)
|
|
"Given a group x1,...,xn,y1,...,yn where the xs are all
|
|
nullable and y1 isn't, return a choice
|
|
(conser f(x1) x2,...,xm,y1,...,yn)
|
|
|(conser f(x2) x3,...,xm,y1,...,yn)
|
|
|...
|
|
|(conser f(xm) y1,...,yn)
|
|
|(conser f(y1) y2,...,yn)"
|
|
(rng-intern-choice
|
|
(rng-transform-group-nullable-gen-choices
|
|
f
|
|
conser
|
|
(rng--ipattern-child ipattern))))
|
|
|
|
(defun rng-transform-group-nullable-gen-choices (f conser members)
|
|
(let ((head (car members))
|
|
(tail (cdr members)))
|
|
(if tail
|
|
(cons (funcall conser (funcall f head) tail)
|
|
(if (rng--ipattern-nullable head)
|
|
(rng-transform-group-nullable-gen-choices f conser tail)
|
|
nil))
|
|
(list (funcall f head)))))
|
|
|
|
(defun rng-members-eq (list1 list2)
|
|
(while (and list1
|
|
list2
|
|
(eq (car list1) (car list2)))
|
|
(setq list1 (cdr list1))
|
|
(setq list2 (cdr list2)))
|
|
(and (null list1) (null list2)))
|
|
|
|
|
|
(defun rng-ipattern-after (ipattern)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'choice)
|
|
(rng-transform-choice 'rng-ipattern-after ipattern))
|
|
((eq type 'after)
|
|
(rng--ipattern-after ipattern))
|
|
((eq type 'not-allowed)
|
|
ipattern)
|
|
(t (error "Internal error in rng-ipattern-after: unexpected type %s" type)))))
|
|
|
|
(defun rng-unknown-start-tag-open-deriv (ipattern)
|
|
(rng-intern-after (rng-compile rng-any-content) ipattern))
|
|
|
|
(defun rng-ipattern-optionalize-elements (ipattern)
|
|
(let* ((type (rng--ipattern-type ipattern))
|
|
(transform (assq type rng-transform-map)))
|
|
(cond (transform
|
|
(funcall (cdr transform)
|
|
'rng-ipattern-optionalize-elements
|
|
ipattern))
|
|
((eq type 'element)
|
|
(rng-intern-optional ipattern))
|
|
(t ipattern))))
|
|
|
|
(defun rng-ipattern-empty-before-p (ipattern)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'after)
|
|
(eq (rng--ipattern-child ipattern) rng-empty-ipattern))
|
|
((eq type 'choice)
|
|
(let ((members (rng--ipattern-child ipattern))
|
|
(ret t))
|
|
(while (and members ret)
|
|
(or (rng-ipattern-empty-before-p (car members))
|
|
(setq ret nil))
|
|
(setq members (cdr members)))
|
|
ret))
|
|
(t nil))))
|
|
|
|
(defun rng-ipattern-possible-start-tags (ipattern accum)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'after)
|
|
(rng-ipattern-possible-start-tags
|
|
(rng--ipattern-child ipattern)
|
|
accum))
|
|
((memq type '(choice interleave))
|
|
(let ((members (rng--ipattern-child ipattern)))
|
|
(while members
|
|
(setq accum
|
|
(rng-ipattern-possible-start-tags (car members)
|
|
accum))
|
|
(setq members (cdr members))))
|
|
accum)
|
|
((eq type 'group)
|
|
(let ((members (rng--ipattern-child ipattern)))
|
|
(while members
|
|
(setq accum
|
|
(rng-ipattern-possible-start-tags (car members)
|
|
accum))
|
|
(setq members
|
|
(and (rng--ipattern-nullable (car members))
|
|
(cdr members)))))
|
|
accum)
|
|
((eq type 'element)
|
|
(if (eq (rng-element-get-child ipattern) rng-not-allowed-ipattern)
|
|
accum
|
|
(rng-name-class-possible-names
|
|
(rng--ipattern-name-class ipattern)
|
|
accum)))
|
|
((eq type 'one-or-more)
|
|
(rng-ipattern-possible-start-tags
|
|
(rng--ipattern-child ipattern)
|
|
accum))
|
|
(t accum))))
|
|
|
|
(defun rng-ipattern-start-tag-possible-p (ipattern)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((memq type '(after one-or-more))
|
|
(rng-ipattern-start-tag-possible-p
|
|
(rng--ipattern-child ipattern)))
|
|
((memq type '(choice interleave))
|
|
(let ((members (rng--ipattern-child ipattern))
|
|
(possible nil))
|
|
(while (and members (not possible))
|
|
(setq possible
|
|
(rng-ipattern-start-tag-possible-p (car members)))
|
|
(setq members (cdr members)))
|
|
possible))
|
|
((eq type 'group)
|
|
(let ((members (rng--ipattern-child ipattern))
|
|
(possible nil))
|
|
(while (and members (not possible))
|
|
(setq possible
|
|
(rng-ipattern-start-tag-possible-p (car members)))
|
|
(setq members
|
|
(and (rng--ipattern-nullable (car members))
|
|
(cdr members))))
|
|
possible))
|
|
((eq type 'element)
|
|
(not (eq (rng-element-get-child ipattern)
|
|
rng-not-allowed-ipattern)))
|
|
(t nil))))
|
|
|
|
(defun rng-ipattern-possible-attributes (ipattern accum)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'after)
|
|
(rng-ipattern-possible-attributes (rng--ipattern-child ipattern)
|
|
accum))
|
|
((memq type '(choice interleave group))
|
|
(let ((members (rng--ipattern-child ipattern)))
|
|
(while members
|
|
(setq accum
|
|
(rng-ipattern-possible-attributes (car members)
|
|
accum))
|
|
(setq members (cdr members))))
|
|
accum)
|
|
((eq type 'attribute)
|
|
(rng-name-class-possible-names
|
|
(rng--ipattern-name-class ipattern)
|
|
accum))
|
|
((eq type 'one-or-more)
|
|
(rng-ipattern-possible-attributes
|
|
(rng--ipattern-child ipattern)
|
|
accum))
|
|
(t accum))))
|
|
|
|
(defun rng-ipattern-possible-values (ipattern accum)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'after)
|
|
(rng-ipattern-possible-values (rng--ipattern-child ipattern)
|
|
accum))
|
|
((eq type 'choice)
|
|
(let ((members (rng--ipattern-child ipattern)))
|
|
(while members
|
|
(setq accum
|
|
(rng-ipattern-possible-values (car members)
|
|
accum))
|
|
(setq members (cdr members))))
|
|
accum)
|
|
((eq type 'value)
|
|
(let ((value-object (rng--ipattern-value-object ipattern)))
|
|
(if (stringp value-object)
|
|
(cons value-object accum)
|
|
accum)))
|
|
(t accum))))
|
|
|
|
(defun rng-ipattern-required-element (ipattern)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((memq type '(after one-or-more))
|
|
(rng-ipattern-required-element (rng--ipattern-child ipattern)))
|
|
((eq type 'choice)
|
|
(let* ((members (rng--ipattern-child ipattern))
|
|
(required (rng-ipattern-required-element (car members))))
|
|
(while (and required
|
|
(setq members (cdr members)))
|
|
(unless (equal required
|
|
(rng-ipattern-required-element (car members)))
|
|
(setq required nil)))
|
|
required))
|
|
((eq type 'group)
|
|
(let ((members (rng--ipattern-child ipattern))
|
|
required)
|
|
(while (and (not (setq required
|
|
(rng-ipattern-required-element
|
|
(car members))))
|
|
(rng--ipattern-nullable (car members))
|
|
(setq members (cdr members))))
|
|
required))
|
|
((eq type 'interleave)
|
|
(let ((members (rng--ipattern-child ipattern))
|
|
required)
|
|
(while members
|
|
(let ((tem (rng-ipattern-required-element (car members))))
|
|
(cond ((not tem)
|
|
(setq members (cdr members)))
|
|
((not required)
|
|
(setq required tem)
|
|
(setq members (cdr members)))
|
|
((equal required tem)
|
|
(setq members (cdr members)))
|
|
(t
|
|
(setq required nil)
|
|
(setq members nil)))))
|
|
required))
|
|
((eq type 'element)
|
|
(let ((nc (rng--ipattern-name-class ipattern)))
|
|
(and (consp nc)
|
|
(not (eq (rng-element-get-child ipattern)
|
|
rng-not-allowed-ipattern))
|
|
nc))))))
|
|
|
|
(defun rng-ipattern-required-attributes (ipattern accum)
|
|
(let ((type (rng--ipattern-type ipattern)))
|
|
(cond ((eq type 'after)
|
|
(rng-ipattern-required-attributes (rng--ipattern-child ipattern)
|
|
accum))
|
|
((memq type '(interleave group))
|
|
(let ((members (rng--ipattern-child ipattern)))
|
|
(while members
|
|
(setq accum
|
|
(rng-ipattern-required-attributes (car members)
|
|
accum))
|
|
(setq members (cdr members))))
|
|
accum)
|
|
((eq type 'choice)
|
|
(let ((members (rng--ipattern-child ipattern))
|
|
in-all in-this new-in-all)
|
|
(setq in-all
|
|
(rng-ipattern-required-attributes (car members)
|
|
nil))
|
|
(while (and in-all (setq members (cdr members)))
|
|
(setq in-this
|
|
(rng-ipattern-required-attributes (car members) nil))
|
|
(setq new-in-all nil)
|
|
(while in-this
|
|
(when (member (car in-this) in-all)
|
|
(setq new-in-all
|
|
(cons (car in-this) new-in-all)))
|
|
(setq in-this (cdr in-this)))
|
|
(setq in-all new-in-all))
|
|
(append in-all accum)))
|
|
((eq type 'attribute)
|
|
(let ((nc (rng--ipattern-name-class ipattern)))
|
|
(if (consp nc)
|
|
(cons nc accum)
|
|
accum)))
|
|
((eq type 'one-or-more)
|
|
(rng-ipattern-required-attributes (rng--ipattern-child ipattern)
|
|
accum))
|
|
(t accum))))
|
|
|
|
(defun rng-compile-error (&rest args)
|
|
(signal 'rng-compile-error
|
|
(list (apply #'format-message args))))
|
|
|
|
(define-error 'rng-compile-error "Incorrect schema" 'rng-error)
|
|
|
|
;;; External API
|
|
|
|
(defsubst rng-match-state () rng-match-state)
|
|
|
|
(defsubst rng-set-match-state (state)
|
|
(setq rng-match-state state))
|
|
|
|
(defsubst rng-match-state-equal (state)
|
|
(eq state rng-match-state))
|
|
|
|
(defun rng-schema-changed ()
|
|
(rng-ipattern-clear)
|
|
(rng-compile-clear))
|
|
|
|
(defun rng-match-init-buffer ()
|
|
(make-local-variable 'rng-compile-table)
|
|
(make-local-variable 'rng-ipattern-table)
|
|
(make-local-variable 'rng-last-ipattern-index))
|
|
|
|
(defun rng-match-start-document ()
|
|
(rng-ipattern-maybe-init)
|
|
(rng-compile-maybe-init)
|
|
(add-hook 'rng-schema-change-hook 'rng-schema-changed nil t)
|
|
(setq rng-match-state (rng-compile rng-current-schema)))
|
|
|
|
(defun rng-match-start-tag-open (name)
|
|
(rng-update-match-state (rng-start-tag-open-deriv rng-match-state
|
|
name)))
|
|
|
|
(defun rng-match-attribute-name (name)
|
|
(rng-update-match-state (rng-start-attribute-deriv rng-match-state
|
|
name)))
|
|
|
|
(defun rng-match-attribute-value (value)
|
|
(rng-update-match-state (rng-data-deriv rng-match-state
|
|
value)))
|
|
|
|
(defun rng-match-element-value (value)
|
|
(and (rng-update-match-state (rng-text-only-deriv rng-match-state))
|
|
(rng-update-match-state (rng-data-deriv rng-match-state
|
|
value))))
|
|
|
|
(defun rng-match-start-tag-close ()
|
|
(rng-update-match-state (rng-start-tag-close-deriv rng-match-state)))
|
|
|
|
(defun rng-match-mixed-text ()
|
|
(rng-update-match-state (rng-mixed-text-deriv rng-match-state)))
|
|
|
|
(defun rng-match-end-tag ()
|
|
(rng-update-match-state (rng-end-tag-deriv rng-match-state)))
|
|
|
|
(defun rng-match-after ()
|
|
(rng-update-match-state
|
|
(rng-ipattern-after rng-match-state)))
|
|
|
|
(defun rng-match-out-of-context-start-tag-open (name)
|
|
(let* ((found (rng-map-element-attribute 'rng-find-element-content-pattern
|
|
rng-current-schema
|
|
nil
|
|
name))
|
|
(content-pattern (if found
|
|
(rng-intern-choice found)
|
|
rng-not-allowed-ipattern)))
|
|
(rng-update-match-state
|
|
(rng-intern-after content-pattern rng-match-state))))
|
|
|
|
(defun rng-match-possible-namespace-uris ()
|
|
"Return a list of all the namespace URIs used in the current schema.
|
|
The absent URI is not included, so the result is always a list of symbols."
|
|
(rng-map-element-attribute (lambda (pattern accum)
|
|
(rng-find-name-class-uris (nth 1 pattern)
|
|
accum))
|
|
rng-current-schema
|
|
nil))
|
|
|
|
(defun rng-match-unknown-start-tag-open ()
|
|
(rng-update-match-state
|
|
(rng-unknown-start-tag-open-deriv rng-match-state)))
|
|
|
|
(defun rng-match-optionalize-elements ()
|
|
(rng-update-match-state
|
|
(rng-ipattern-optionalize-elements rng-match-state)))
|
|
|
|
(defun rng-match-ignore-attributes ()
|
|
(rng-update-match-state
|
|
(rng-ignore-attributes-deriv rng-match-state)))
|
|
|
|
(defun rng-match-text-typed-p ()
|
|
(rng-ipattern-text-typed-p rng-match-state))
|
|
|
|
(defun rng-match-empty-content ()
|
|
(if (rng-match-text-typed-p)
|
|
(rng-match-element-value "")
|
|
(rng-match-end-tag)))
|
|
|
|
(defun rng-match-empty-before-p ()
|
|
"Return non-nil if what can be matched before an end-tag is empty.
|
|
In other words, return non-nil if the pattern for what can be matched
|
|
for an end-tag is equivalent to empty."
|
|
(rng-ipattern-empty-before-p rng-match-state))
|
|
|
|
(defun rng-match-infer-start-tag-namespace (local-name)
|
|
(let ((ncs (rng-ipattern-possible-start-tags rng-match-state nil))
|
|
(nc nil)
|
|
(ns nil))
|
|
(while ncs
|
|
(setq nc (car ncs))
|
|
(if (and (equal (cdr nc) local-name)
|
|
(symbolp (car nc)))
|
|
(cond ((not ns)
|
|
;; first possible namespace
|
|
(setq ns (car nc))
|
|
(setq ncs (cdr ncs)))
|
|
((equal ns (car nc))
|
|
;; same as first namespace
|
|
(setq ncs (cdr ncs)))
|
|
(t
|
|
;; more than one possible namespace
|
|
(setq ns nil)
|
|
(setq ncs nil)))
|
|
(setq ncs (cdr ncs))))
|
|
ns))
|
|
|
|
(defun rng-match-nullable-p ()
|
|
(rng--ipattern-nullable rng-match-state))
|
|
|
|
(defun rng-match-possible-start-tag-names ()
|
|
"Return a list of possible names that would be valid for start-tags.
|
|
|
|
Each possible name is returned as a (NAMESPACE . LOCAL-NAME) pair,
|
|
where NAMESPACE is a symbol or nil (meaning the absent namespace) and
|
|
LOCAL-NAME is a string. The returned list may contain duplicates."
|
|
(rng-ipattern-possible-start-tags rng-match-state nil))
|
|
|
|
;; This is no longer used. It might be useful so leave it in for now.
|
|
(defun rng-match-start-tag-possible-p ()
|
|
"Return non-nil if a start-tag is possible."
|
|
(rng-ipattern-start-tag-possible-p rng-match-state))
|
|
|
|
(defun rng-match-possible-attribute-names ()
|
|
"Return a list of possible names that would be valid for attributes.
|
|
|
|
See the function `rng-match-possible-start-tag-names' for
|
|
more information."
|
|
(rng-ipattern-possible-attributes rng-match-state nil))
|
|
|
|
(defun rng-match-possible-value-strings ()
|
|
"Return a list of strings that would be valid as content.
|
|
The list may contain duplicates. Typically, the list will not
|
|
be exhaustive."
|
|
(rng-ipattern-possible-values rng-match-state nil))
|
|
|
|
(defun rng-match-required-element-name ()
|
|
"Return the name of an element which must occur, or nil if none."
|
|
(rng-ipattern-required-element rng-match-state))
|
|
|
|
(defun rng-match-required-attribute-names ()
|
|
"Return a list of names of attributes which must all occur."
|
|
(rng-ipattern-required-attributes rng-match-state nil))
|
|
|
|
(defmacro rng-match-save (&rest body)
|
|
(declare (indent 0) (debug t))
|
|
(let ((state (make-symbol "state")))
|
|
`(let ((,state rng-match-state))
|
|
(unwind-protect
|
|
(progn ,@body)
|
|
(setq rng-match-state ,state)))))
|
|
|
|
(defmacro rng-match-with-schema (schema &rest body)
|
|
(declare (indent 1) (debug t))
|
|
`(let ((rng-current-schema ,schema)
|
|
rng-match-state
|
|
rng-compile-table
|
|
rng-ipattern-table
|
|
rng-last-ipattern-index)
|
|
(rng-ipattern-maybe-init)
|
|
(rng-compile-maybe-init)
|
|
(setq rng-match-state (rng-compile rng-current-schema))
|
|
,@body))
|
|
|
|
(provide 'rng-match)
|
|
|
|
;;; rng-match.el ends here
|