3534 lines
124 KiB
EmacsLisp
3534 lines
124 KiB
EmacsLisp
;;; semantic/wisent/comp.el --- GNU Bison for Emacs - Grammar compiler -*- lexical-binding: t; -*-
|
||
|
||
;; Copyright (C) 1984, 1986, 1989, 1992, 1995, 2000-2007, 2009-2024 Free
|
||
;; Software Foundation, Inc.
|
||
|
||
;; Author: David Ponce <david@dponce.com>
|
||
;; Created: 30 January 2002
|
||
;; Keywords: syntax
|
||
|
||
;; This file is part of GNU Emacs.
|
||
|
||
;; GNU Emacs is free software: you can redistribute it and/or modify
|
||
;; it under the terms of the GNU General Public License as published by
|
||
;; the Free Software Foundation, either version 3 of the License, or
|
||
;; (at your option) any later version.
|
||
|
||
;; GNU Emacs is distributed in the hope that it will be useful,
|
||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
;; GNU General Public License for more details.
|
||
|
||
;; You should have received a copy of the GNU General Public License
|
||
;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
|
||
|
||
;;; Commentary:
|
||
;;
|
||
;; Grammar compiler that produces Wisent's LALR automatons.
|
||
;;
|
||
;; Wisent (the European Bison ;-) is an Elisp implementation of the
|
||
;; GNU Compiler Compiler Bison. The Elisp code is a port of the C
|
||
;; code of GNU Bison 1.28 & 1.31.
|
||
;;
|
||
;; For more details on the basic concepts for understanding Wisent,
|
||
;; read the Bison manual ;)
|
||
;;
|
||
;; For more details on Wisent itself read the Wisent manual.
|
||
|
||
;;; Code:
|
||
(require 'semantic/wisent)
|
||
(eval-when-compile (require 'cl-lib))
|
||
(require 'subr-x) ; `string-pad'
|
||
|
||
;;;; -------------------
|
||
;;;; Misc. useful things
|
||
;;;; -------------------
|
||
|
||
;; As much as possible I would like to keep the name of global
|
||
;; variables used in Bison without polluting too much the Elisp global
|
||
;; name space. Elisp dynamic binding allows that ;-)
|
||
|
||
;; Here are simple macros to easily define and use set of variables
|
||
;; bound locally, without all these "reference to free variable"
|
||
;; compiler warnings!
|
||
|
||
(eval-when-compile
|
||
(defun wisent-context-name (name)
|
||
"Return the context name from NAME."
|
||
(if (and name (symbolp name))
|
||
(intern (format "wisent-context-%s" name))
|
||
(error "Invalid context name: %S" name)))
|
||
|
||
(defun wisent-context-bindings (name)
|
||
"Return the variables in context NAME."
|
||
(symbol-value (wisent-context-name name))))
|
||
|
||
(defmacro wisent-defcontext (name &rest vars)
|
||
"Define a context NAME that will bind variables VARS."
|
||
(declare (indent 1))
|
||
(declare-function wisent-context-name nil (name))
|
||
(let* ((context (wisent-context-name name))
|
||
(declarations (mapcar (lambda (v) (list 'defvar v)) vars)))
|
||
`(progn
|
||
,@declarations
|
||
(eval-when-compile
|
||
(defvar ,context ',vars)))))
|
||
|
||
(defmacro wisent-with-context (name &rest body)
|
||
"Bind variables in context NAME then eval BODY."
|
||
(declare (indent 1))
|
||
(declare-function wisent-context-bindings nil (name))
|
||
`(dlet ,(wisent-context-bindings name)
|
||
,@body))
|
||
|
||
(defsubst wisent-pad-string (s n &optional left)
|
||
"Fill string S with spaces.
|
||
Return a new string of at least N characters. Insert spaces on right.
|
||
If optional LEFT is non-nil insert spaces on left."
|
||
(declare (obsolete string-pad "29.1"))
|
||
(string-pad s n nil left))
|
||
|
||
|
||
;;;; ------------------------
|
||
;;;; Environment dependencies
|
||
;;;; ------------------------
|
||
|
||
;; FIXME: Use bignums or bool-vectors?
|
||
|
||
(defconst wisent-BITS-PER-WORD (logcount most-positive-fixnum))
|
||
|
||
(defsubst wisent-WORDSIZE (n)
|
||
"(N + BITS-PER-WORD - 1) / BITS-PER-WORD."
|
||
(/ (1- (+ n wisent-BITS-PER-WORD)) wisent-BITS-PER-WORD))
|
||
|
||
(defsubst wisent-SETBIT (x i)
|
||
"X[I/BITS-PER-WORD] |= 1 << (I % BITS-PER-WORD)."
|
||
(let ((k (/ i wisent-BITS-PER-WORD)))
|
||
(aset x k (logior (aref x k)
|
||
(ash 1 (% i wisent-BITS-PER-WORD))))))
|
||
|
||
(defsubst wisent-RESETBIT (x i)
|
||
"X[I/BITS-PER-WORD] &= ~(1 << (I % BITS-PER-WORD))."
|
||
(let ((k (/ i wisent-BITS-PER-WORD)))
|
||
(aset x k (logand (aref x k)
|
||
(lognot (ash 1 (% i wisent-BITS-PER-WORD)))))))
|
||
|
||
(defsubst wisent-BITISSET (x i)
|
||
"(X[I/BITS-PER-WORD] & (1 << (I % BITS-PER-WORD))) != 0."
|
||
(not (zerop (logand (aref x (/ i wisent-BITS-PER-WORD))
|
||
(ash 1 (% i wisent-BITS-PER-WORD))))))
|
||
|
||
(defvar wisent-debug-flag nil
|
||
"Non-nil means enable some debug stuff.")
|
||
|
||
;;;; --------------
|
||
;;;; Logging/Output
|
||
;;;; --------------
|
||
(defconst wisent-log-buffer-name "*wisent-log*"
|
||
"Name of the log buffer.")
|
||
|
||
(defvar wisent-new-log-flag nil
|
||
"Non-nil means to start a new report.")
|
||
|
||
(defcustom wisent-verbose-flag nil
|
||
"Non-nil means to report verbose information on generated parser."
|
||
:group 'wisent
|
||
:type 'boolean)
|
||
|
||
(defun wisent-toggle-verbose-flag ()
|
||
"Toggle whether to report verbose information on generated parser."
|
||
(interactive)
|
||
(setq wisent-verbose-flag (not wisent-verbose-flag))
|
||
(when (called-interactively-p 'interactive)
|
||
(message "Verbose report %sabled"
|
||
(if wisent-verbose-flag "en" "dis"))))
|
||
|
||
(defmacro wisent-log-buffer ()
|
||
"Return the log buffer.
|
||
Its name is defined in constant `wisent-log-buffer-name'."
|
||
'(get-buffer-create wisent-log-buffer-name))
|
||
|
||
(defmacro wisent-clear-log ()
|
||
"Delete the entire contents of the log buffer."
|
||
'(with-current-buffer (wisent-log-buffer)
|
||
(erase-buffer)))
|
||
|
||
(defun wisent-source ()
|
||
"Return the current source file name or nil."
|
||
(let ((source (macroexp-file-name)))
|
||
(if source
|
||
(file-relative-name source))))
|
||
|
||
(defun wisent-new-log ()
|
||
"Start a new entry into the log buffer."
|
||
(setq wisent-new-log-flag nil)
|
||
(let ((text (format "\n\n*** Wisent %s - %s\n\n"
|
||
(or (wisent-source) (buffer-name))
|
||
(format-time-string "%Y-%m-%d %R"))))
|
||
(with-current-buffer (wisent-log-buffer)
|
||
(goto-char (point-max))
|
||
(insert text))))
|
||
|
||
(defsubst wisent-log (&rest args)
|
||
"Insert text into the log buffer.
|
||
`format-message' is applied to ARGS and the result string is inserted into the
|
||
log buffer returned by the function `wisent-log-buffer'."
|
||
(and wisent-new-log-flag (wisent-new-log))
|
||
(with-current-buffer (wisent-log-buffer)
|
||
(insert (apply #'format-message args))))
|
||
|
||
(defconst wisent-log-file "wisent.output"
|
||
"The log file.
|
||
Used when running without interactive terminal.")
|
||
|
||
(defun wisent-append-to-log-file ()
|
||
"Append contents of logging buffer to `wisent-log-file'."
|
||
(if (get-buffer wisent-log-buffer-name)
|
||
(condition-case err
|
||
(with-current-buffer (wisent-log-buffer)
|
||
(widen)
|
||
(if (> (point-max) (point-min))
|
||
(write-region (point-min) (point-max)
|
||
wisent-log-file t)))
|
||
(error
|
||
(message "*** %s" (error-message-string err))))))
|
||
|
||
;;;; -----------------------------------
|
||
;;;; Representation of the grammar rules
|
||
;;;; -----------------------------------
|
||
|
||
;; ntokens is the number of tokens, and nvars is the number of
|
||
;; variables (nonterminals). nsyms is the total number, ntokens +
|
||
;; nvars.
|
||
|
||
;; Each symbol (either token or variable) receives a symbol number.
|
||
;; Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are
|
||
;; for variables. Symbol number zero is the end-of-input token. This
|
||
;; token is counted in ntokens.
|
||
|
||
;; The rules receive rule numbers 1 to nrules in the order they are
|
||
;; written. Actions and guards are accessed via the rule number.
|
||
|
||
;; The rules themselves are described by three arrays: rrhs, rlhs and
|
||
;; ritem. rlhs[R] is the symbol number of the left hand side of rule
|
||
;; R. The right hand side is stored as symbol numbers in a portion of
|
||
;; ritem. rrhs[R] contains the index in ritem of the beginning of the
|
||
;; portion for rule R.
|
||
|
||
;; The length of the portion is one greater than the number of symbols
|
||
;; in the rule's right hand side. The last element in the portion
|
||
;; contains minus R, which identifies it as the end of a portion and
|
||
;; says which rule it is for.
|
||
|
||
;; The portions of ritem come in order of increasing rule number and
|
||
;; are followed by an element which is nil to mark the end. nitems is
|
||
;; the total length of ritem, not counting the final nil. Each
|
||
;; element of ritem is called an "item" and its index in ritem is an
|
||
;; item number.
|
||
|
||
;; Item numbers are used in the finite state machine to represent
|
||
;; places that parsing can get to.
|
||
|
||
;; The vector rprec contains for each rule, the item number of the
|
||
;; symbol giving its precedence level to this rule. The precedence
|
||
;; level and associativity of each symbol is recorded in respectively
|
||
;; the properties 'wisent--prec and 'wisent--assoc.
|
||
|
||
;; Precedence levels are assigned in increasing order starting with 1
|
||
;; so that numerically higher precedence values mean tighter binding
|
||
;; as they ought to. nil as a symbol or rule's precedence means none
|
||
;; is assigned.
|
||
|
||
(defcustom wisent-state-table-size 1009
|
||
"The size of the state table."
|
||
:type 'integer
|
||
:group 'wisent)
|
||
|
||
;; These variables only exist locally in the function
|
||
;; `wisent-compile-grammar' and are shared by all other nested
|
||
;; callees.
|
||
(wisent-defcontext compile-grammar
|
||
F LA LAruleno accessing-symbol conflicts consistent default-prec
|
||
derives err-table fderives final-state first-reduction first-shift
|
||
first-state firsts from-state goto-map includes itemset nitemset
|
||
kernel-base kernel-end kernel-items last-reduction last-shift
|
||
last-state lookaheads lookaheadset lookback maxrhs ngotos nitems
|
||
nrules nshifts nstates nsyms ntokens nullable nvars rassoc redset
|
||
reduction-table ritem rlhs rprec rrc-count rrc-total rrhs ruseful
|
||
rcode ruleset rulesetsize shift-symbol shift-table shiftset
|
||
src-count src-total start-table state-table tags this-state to-state
|
||
tokensetsize ;; nb of words req. to hold a bit for each rule
|
||
varsetsize ;; nb of words req. to hold a bit for each variable
|
||
error-token-number start-symbol token-list var-list
|
||
N P V V1 nuseless-nonterminals nuseless-productions
|
||
ptable ;; symbols & characters properties
|
||
)
|
||
|
||
(defmacro wisent-ISTOKEN (s)
|
||
"Return non-nil if item number S defines a token (terminal).
|
||
That is if S < `ntokens'."
|
||
`(< ,s ntokens))
|
||
|
||
(defmacro wisent-ISVAR(s)
|
||
"Return non-nil if item number S defines a nonterminal.
|
||
That is if S >= `ntokens'."
|
||
`(>= ,s ntokens))
|
||
|
||
(defsubst wisent-tag (s)
|
||
"Return printable form of item number S."
|
||
(wisent-item-to-string (aref tags s)))
|
||
|
||
;; Symbol and character properties
|
||
|
||
(defsubst wisent-put (object propname value)
|
||
"Store OBJECT's PROPNAME property with value VALUE.
|
||
Use `eq' to locate OBJECT."
|
||
(let ((entry (assq object ptable)))
|
||
(or entry (setq entry (list object) ptable (cons entry ptable)))
|
||
(setcdr entry (plist-put (cdr entry) propname value))))
|
||
|
||
(defsubst wisent-get (object propname)
|
||
"Return the value of OBJECT's PROPNAME property.
|
||
Use `eq' to locate OBJECT."
|
||
(plist-get (cdr (assq object ptable)) propname))
|
||
|
||
(defsubst wisent-item-number (x)
|
||
"Return the item number of symbol X."
|
||
(wisent-get x 'wisent--item-no))
|
||
|
||
(defsubst wisent-set-item-number (x n)
|
||
"Set the item number of symbol X to N."
|
||
(wisent-put x 'wisent--item-no n))
|
||
|
||
(defsubst wisent-assoc (x)
|
||
"Return the associativity of symbol X."
|
||
(wisent-get x 'wisent--assoc))
|
||
|
||
(defsubst wisent-set-assoc (x a)
|
||
"Set the associativity of symbol X to A."
|
||
(wisent-put x 'wisent--assoc a))
|
||
|
||
(defsubst wisent-prec (x)
|
||
"Return the precedence level of symbol X."
|
||
(wisent-get x 'wisent--prec))
|
||
|
||
(defsubst wisent-set-prec (x p)
|
||
"Set the precedence level of symbol X to P."
|
||
(wisent-put x 'wisent--prec p))
|
||
|
||
;;;; ----------------------------------------------------------
|
||
;;;; Type definitions for nondeterministic finite state machine
|
||
;;;; ----------------------------------------------------------
|
||
|
||
;; These type definitions are used to represent a nondeterministic
|
||
;; finite state machine that parses the specified grammar. This
|
||
;; information is generated by the function `wisent-generate-states'.
|
||
|
||
;; Each state of the machine is described by a set of items --
|
||
;; particular positions in particular rules -- that are the possible
|
||
;; places where parsing could continue when the machine is in this
|
||
;; state. These symbols at these items are the allowable inputs that
|
||
;; can follow now.
|
||
|
||
;; A core represents one state. States are numbered in the number
|
||
;; field. When `wisent-generate-states' is finished, the starting
|
||
;; state is state 0 and `nstates' is the number of states. (A
|
||
;; transition to a state whose state number is `nstates' indicates
|
||
;; termination.) All the cores are chained together and `first-state'
|
||
;; points to the first one (state 0).
|
||
|
||
;; For each state there is a particular symbol which must have been
|
||
;; the last thing accepted to reach that state. It is the
|
||
;; accessing-symbol of the core.
|
||
|
||
;; Each core contains a vector of `nitems' items which are the indices
|
||
;; in the `ritems' vector of the items that are selected in this
|
||
;; state.
|
||
|
||
;; The link field is used for chaining buckets that hash states by
|
||
;; their itemsets. This is for recognizing equivalent states and
|
||
;; combining them when the states are generated.
|
||
|
||
;; The two types of transitions are shifts (push the lookahead token
|
||
;; and read another) and reductions (combine the last n things on the
|
||
;; stack via a rule, replace them with the symbol that the rule
|
||
;; derives, and leave the lookahead token alone). When the states are
|
||
;; generated, these transitions are represented in two other lists.
|
||
|
||
;; Each shifts structure describes the possible shift transitions out
|
||
;; of one state, the state whose number is in the number field. The
|
||
;; shifts structures are linked through next and first-shift points to
|
||
;; them. Each contains a vector of numbers of the states that shift
|
||
;; transitions can go to. The accessing-symbol fields of those
|
||
;; states' cores say what kind of input leads to them.
|
||
|
||
;; A shift to state zero should be ignored. Conflict resolution
|
||
;; deletes shifts by changing them to zero.
|
||
|
||
;; Each reductions structure describes the possible reductions at the
|
||
;; state whose number is in the number field. The data is a list of
|
||
;; nreds rules, represented by their rule numbers. `first-reduction'
|
||
;; points to the list of these structures.
|
||
|
||
;; Conflict resolution can decide that certain tokens in certain
|
||
;; states should explicitly be errors (for implementing %nonassoc).
|
||
;; For each state, the tokens that are errors for this reason are
|
||
;; recorded in an errs structure, which has the state number in its
|
||
;; number field. The rest of the errs structure is full of token
|
||
;; numbers.
|
||
|
||
;; There is at least one shift transition present in state zero. It
|
||
;; leads to a next-to-final state whose accessing-symbol is the
|
||
;; grammar's start symbol. The next-to-final state has one shift to
|
||
;; the final state, whose accessing-symbol is zero (end of input).
|
||
;; The final state has one shift, which goes to the termination state
|
||
;; (whose number is `nstates'-1).
|
||
;; The reason for the extra state at the end is to placate the
|
||
;; parser's strategy of making all decisions one token ahead of its
|
||
;; actions.
|
||
|
||
;; FIXME: Use `wisent-' prefix to fix namespace pollution!
|
||
|
||
(cl-defstruct (core
|
||
(:constructor make-core ()))
|
||
next ; -> core
|
||
link ; -> core
|
||
(number 0)
|
||
(accessing-symbol 0)
|
||
(nitems 0)
|
||
(items [0]))
|
||
|
||
(cl-defstruct (shifts
|
||
(:constructor make-shifts ()))
|
||
next ; -> shifts
|
||
(number 0)
|
||
(nshifts 0)
|
||
(shifts [0]))
|
||
|
||
(cl-defstruct (reductions
|
||
(:constructor make-reductions ()))
|
||
next ; -> reductions
|
||
(number 0)
|
||
(nreds 0)
|
||
(rules [0]))
|
||
|
||
(cl-defstruct (errs
|
||
(:constructor make-errs ()))
|
||
(nerrs 0)
|
||
(errs [0]))
|
||
|
||
;;;; --------------------------------------------------------
|
||
;;;; Find unreachable terminals, nonterminals and productions
|
||
;;;; --------------------------------------------------------
|
||
|
||
(defun wisent-bits-equal (L R n)
|
||
"Visit L and R and return non-nil if their first N elements are `='.
|
||
L and R must be vectors of integers."
|
||
(let* ((i (1- n))
|
||
(iseq t))
|
||
(while (and iseq (natnump i))
|
||
(setq iseq (= (aref L i) (aref R i))
|
||
i (1- i)))
|
||
iseq))
|
||
|
||
(defun wisent-nbits (i)
|
||
"Return number of bits set in integer I."
|
||
(let ((count 0))
|
||
(while (not (zerop i))
|
||
;; i ^= (i & ((unsigned) (-(int) i)))
|
||
(setq i (logxor i (logand i (- i)))
|
||
count (1+ count)))
|
||
count))
|
||
|
||
(defun wisent-bits-size (S n)
|
||
"In vector S count the total of bits set in first N elements.
|
||
S must be a vector of integers."
|
||
(let* ((i (1- n))
|
||
(count 0))
|
||
(while (natnump i)
|
||
(setq count (+ count (wisent-nbits (aref S i)))
|
||
i (1- i)))
|
||
count))
|
||
|
||
(defun wisent-useful-production (i N0)
|
||
"Return non-nil if production I is in useful set N0."
|
||
(let* ((useful t)
|
||
(r (aref rrhs i))
|
||
n)
|
||
(while (and useful (> (setq n (aref ritem r)) 0))
|
||
(if (wisent-ISVAR n)
|
||
(setq useful (wisent-BITISSET N0 (- n ntokens))))
|
||
(setq r (1+ r)))
|
||
useful))
|
||
|
||
(defun wisent-useless-nonterminals ()
|
||
"Find out which nonterminals are used."
|
||
(let (Np Ns i n break)
|
||
;; N is set as built. Np is set being built this iteration. P is
|
||
;; set of all productions which have a RHS all in N.
|
||
(setq n (wisent-WORDSIZE nvars)
|
||
Np (make-vector n 0))
|
||
|
||
;; The set being computed is a set of nonterminals which can
|
||
;; derive the empty string or strings consisting of all
|
||
;; terminals. At each iteration a nonterminal is added to the set
|
||
;; if there is a production with that nonterminal as its LHS for
|
||
;; which all the nonterminals in its RHS are already in the set.
|
||
;; Iterate until the set being computed remains unchanged. Any
|
||
;; nonterminals not in the set at that point are useless in that
|
||
;; they will never be used in deriving a sentence of the language.
|
||
|
||
;; This iteration doesn't use any special traversal over the
|
||
;; productions. A set is kept of all productions for which all
|
||
;; the nonterminals in the RHS are in useful. Only productions
|
||
;; not in this set are scanned on each iteration. At the end,
|
||
;; this set is saved to be used when finding useful productions:
|
||
;; only productions in this set will appear in the final grammar.
|
||
|
||
(while (not break)
|
||
(setq i (1- n))
|
||
(while (natnump i)
|
||
;; Np[i] = N[i]
|
||
(aset Np i (aref N i))
|
||
(setq i (1- i)))
|
||
|
||
(setq i 1)
|
||
(while (<= i nrules)
|
||
(if (not (wisent-BITISSET P i))
|
||
(when (wisent-useful-production i N)
|
||
(wisent-SETBIT Np (- (aref rlhs i) ntokens))
|
||
(wisent-SETBIT P i)))
|
||
(setq i (1+ i)))
|
||
(if (wisent-bits-equal N Np n)
|
||
(setq break t)
|
||
(setq Ns Np
|
||
Np N
|
||
N Ns)))
|
||
(setq N Np)))
|
||
|
||
(defun wisent-inaccessible-symbols ()
|
||
"Find out which productions are reachable and which symbols are used."
|
||
;; Starting with an empty set of productions and a set of symbols
|
||
;; which only has the start symbol in it, iterate over all
|
||
;; productions until the set of productions remains unchanged for an
|
||
;; iteration. For each production which has a LHS in the set of
|
||
;; reachable symbols, add the production to the set of reachable
|
||
;; productions, and add all of the nonterminals in the RHS of the
|
||
;; production to the set of reachable symbols.
|
||
|
||
;; Consider only the (partially) reduced grammar which has only
|
||
;; nonterminals in N and productions in P.
|
||
|
||
;; The result is the set P of productions in the reduced grammar,
|
||
;; and the set V of symbols in the reduced grammar.
|
||
|
||
;; Although this algorithm also computes the set of terminals which
|
||
;; are reachable, no terminal will be deleted from the grammar. Some
|
||
;; terminals might not be in the grammar but might be generated by
|
||
;; semantic routines, and so the user might want them available with
|
||
;; specified numbers. (Is this true?) However, the non reachable
|
||
;; terminals are printed (if running in verbose mode) so that the
|
||
;; user can know.
|
||
(let (Vp Vs Pp i tt r n m break)
|
||
(setq n (wisent-WORDSIZE nsyms)
|
||
m (wisent-WORDSIZE (1+ nrules))
|
||
Vp (make-vector n 0)
|
||
Pp (make-vector m 0))
|
||
|
||
;; If the start symbol isn't useful, then nothing will be useful.
|
||
(when (wisent-BITISSET N (- start-symbol ntokens))
|
||
(wisent-SETBIT V start-symbol)
|
||
(while (not break)
|
||
(setq i (1- n))
|
||
(while (natnump i)
|
||
(aset Vp i (aref V i))
|
||
(setq i (1- i)))
|
||
(setq i 1)
|
||
(while (<= i nrules)
|
||
(when (and (not (wisent-BITISSET Pp i))
|
||
(wisent-BITISSET P i)
|
||
(wisent-BITISSET V (aref rlhs i)))
|
||
(setq r (aref rrhs i))
|
||
(while (natnump (setq tt (aref ritem r)))
|
||
(if (or (wisent-ISTOKEN tt)
|
||
(wisent-BITISSET N (- tt ntokens)))
|
||
(wisent-SETBIT Vp tt))
|
||
(setq r (1+ r)))
|
||
(wisent-SETBIT Pp i))
|
||
(setq i (1+ i)))
|
||
(if (wisent-bits-equal V Vp n)
|
||
(setq break t)
|
||
(setq Vs Vp
|
||
Vp V
|
||
V Vs))))
|
||
(setq V Vp)
|
||
|
||
;; Tokens 0, 1 are internal to Wisent. Consider them useful.
|
||
(wisent-SETBIT V 0) ;; end-of-input token
|
||
(wisent-SETBIT V 1) ;; error token
|
||
(setq P Pp)
|
||
|
||
(setq nuseless-productions (- nrules (wisent-bits-size P m))
|
||
nuseless-nonterminals nvars
|
||
i ntokens)
|
||
(while (< i nsyms)
|
||
(if (wisent-BITISSET V i)
|
||
(setq nuseless-nonterminals (1- nuseless-nonterminals)))
|
||
(setq i (1+ i)))
|
||
|
||
;; A token that was used in %prec should not be warned about.
|
||
(setq i 1)
|
||
(while (<= i nrules)
|
||
(if (aref rprec i)
|
||
(wisent-SETBIT V1 (aref rprec i)))
|
||
(setq i (1+ i)))
|
||
))
|
||
|
||
(defun wisent-reduce-grammar-tables ()
|
||
"Disable useless productions."
|
||
(if (> nuseless-productions 0)
|
||
(let ((pn 1))
|
||
(while (<= pn nrules)
|
||
(aset ruseful pn (wisent-BITISSET P pn))
|
||
(setq pn (1+ pn))))))
|
||
|
||
(defun wisent-nonterminals-reduce ()
|
||
"Remove useless nonterminals."
|
||
(let (i n r item nontermmap tags-sorted)
|
||
;; Map the nonterminals to their new index: useful first, useless
|
||
;; afterwards. Kept for later report.
|
||
(setq nontermmap (make-vector nvars 0)
|
||
n ntokens
|
||
i ntokens)
|
||
(while (< i nsyms)
|
||
(when (wisent-BITISSET V i)
|
||
(aset nontermmap (- i ntokens) n)
|
||
(setq n (1+ n)))
|
||
(setq i (1+ i)))
|
||
(setq i ntokens)
|
||
(while (< i nsyms)
|
||
(unless (wisent-BITISSET V i)
|
||
(aset nontermmap (- i ntokens) n)
|
||
(setq n (1+ n)))
|
||
(setq i (1+ i)))
|
||
;; Shuffle elements of tables indexed by symbol number
|
||
(setq tags-sorted (make-vector nvars nil)
|
||
i ntokens)
|
||
(while (< i nsyms)
|
||
(setq n (aref nontermmap (- i ntokens)))
|
||
(aset tags-sorted (- n ntokens) (aref tags i))
|
||
(setq i (1+ i)))
|
||
(setq i ntokens)
|
||
(while (< i nsyms)
|
||
(aset tags i (aref tags-sorted (- i ntokens)))
|
||
(setq i (1+ i)))
|
||
;; Replace all symbol numbers in valid data structures.
|
||
(setq i 1)
|
||
(while (<= i nrules)
|
||
(aset rlhs i (aref nontermmap (- (aref rlhs i) ntokens)))
|
||
(setq i (1+ i)))
|
||
(setq r 0)
|
||
(while (setq item (aref ritem r))
|
||
(if (wisent-ISVAR item)
|
||
(aset ritem r (aref nontermmap (- item ntokens))))
|
||
(setq r (1+ r)))
|
||
(setq start-symbol (aref nontermmap (- start-symbol ntokens))
|
||
nsyms (- nsyms nuseless-nonterminals)
|
||
nvars (- nvars nuseless-nonterminals))
|
||
))
|
||
|
||
(defun wisent-total-useless ()
|
||
"Report number of useless nonterminals and productions."
|
||
(let* ((src (wisent-source))
|
||
(src (if src (concat " in " src) ""))
|
||
(msg (format "Grammar%s contains" src)))
|
||
(if (> nuseless-nonterminals 0)
|
||
(setq msg (format "%s %d useless nonterminal%s"
|
||
msg nuseless-nonterminals
|
||
(if (> nuseless-nonterminals 0) "s" ""))))
|
||
(if (and (> nuseless-nonterminals 0) (> nuseless-productions 0))
|
||
(setq msg (format "%s and" msg)))
|
||
(if (> nuseless-productions 0)
|
||
(setq msg (format "%s %d useless rule%s"
|
||
msg nuseless-productions
|
||
(if (> nuseless-productions 0) "s" ""))))
|
||
(message msg)))
|
||
|
||
(defun wisent-reduce-grammar ()
|
||
"Find unreachable terminals, nonterminals and productions."
|
||
;; Allocate the global sets used to compute the reduced grammar
|
||
(setq N (make-vector (wisent-WORDSIZE nvars) 0)
|
||
P (make-vector (wisent-WORDSIZE (1+ nrules)) 0)
|
||
V (make-vector (wisent-WORDSIZE nsyms) 0)
|
||
V1 (make-vector (wisent-WORDSIZE nsyms) 0)
|
||
nuseless-nonterminals 0
|
||
nuseless-productions 0)
|
||
|
||
(wisent-useless-nonterminals)
|
||
(wisent-inaccessible-symbols)
|
||
|
||
(when (> (+ nuseless-nonterminals nuseless-productions) 0)
|
||
(wisent-total-useless)
|
||
(or (wisent-BITISSET N (- start-symbol ntokens))
|
||
(error "Start symbol `%s' does not derive any sentence"
|
||
(wisent-tag start-symbol)))
|
||
(wisent-reduce-grammar-tables)
|
||
(if (> nuseless-nonterminals 0)
|
||
(wisent-nonterminals-reduce))))
|
||
|
||
(defun wisent-print-useless ()
|
||
"Output the detailed results of the reductions."
|
||
(let (i b r)
|
||
(when (> nuseless-nonterminals 0)
|
||
;; Useless nonterminals have been moved after useful ones.
|
||
(wisent-log "\n\nUseless nonterminals:\n\n")
|
||
(setq i 0)
|
||
(while (< i nuseless-nonterminals)
|
||
(wisent-log " %s\n" (wisent-tag (+ nsyms i)))
|
||
(setq i (1+ i))))
|
||
(setq b nil
|
||
i 0)
|
||
(while (< i ntokens)
|
||
(unless (or (wisent-BITISSET V i) (wisent-BITISSET V1 i))
|
||
(or b
|
||
(wisent-log "\n\nTerminals which are not used:\n\n"))
|
||
(setq b t)
|
||
(wisent-log " %s\n" (wisent-tag i)))
|
||
(setq i (1+ i)))
|
||
(when (> nuseless-productions 0)
|
||
(wisent-log "\n\nUseless rules:\n\n")
|
||
(setq i 1)
|
||
(while (<= i nrules)
|
||
(unless (aref ruseful i)
|
||
(wisent-log "#%s " (string-pad (format "%d" i) 4))
|
||
(wisent-log "%s:" (wisent-tag (aref rlhs i)))
|
||
(setq r (aref rrhs i))
|
||
(while (natnump (aref ritem r))
|
||
(wisent-log " %s" (wisent-tag (aref ritem r)))
|
||
(setq r (1+ r)))
|
||
(wisent-log ";\n"))
|
||
(setq i (1+ i))))
|
||
(if (or b (> nuseless-nonterminals 0) (> nuseless-productions 0))
|
||
(wisent-log "\n\n"))
|
||
))
|
||
|
||
;;;; -----------------------------
|
||
;;;; Match rules with nonterminals
|
||
;;;; -----------------------------
|
||
|
||
(defun wisent-set-derives ()
|
||
"Find, for each variable (nonterminal), which rules can derive it.
|
||
It sets up the value of DERIVES so that DERIVES[i - NTOKENS] points to
|
||
a list of rule numbers, terminated with -1."
|
||
(let (i lhs p q dset delts)
|
||
(setq dset (make-vector nvars nil)
|
||
delts (make-vector (1+ nrules) 0))
|
||
(setq p 0 ;; p = delts
|
||
i nrules)
|
||
(while (> i 0)
|
||
(when (aref ruseful i)
|
||
(setq lhs (aref rlhs i))
|
||
;; p->next = dset[lhs];
|
||
;; p->value = i;
|
||
(aset delts p (cons i (aref dset (- lhs ntokens)))) ;; (value . next)
|
||
(aset dset (- lhs ntokens) p) ;; dset[lhs] = p
|
||
(setq p (1+ p)) ;; p++
|
||
)
|
||
(setq i (1- i)))
|
||
|
||
(setq derives (make-vector nvars nil)
|
||
i ntokens)
|
||
|
||
(while (< i nsyms)
|
||
(setq q nil
|
||
p (aref dset (- i ntokens))) ;; p = dset[i]
|
||
|
||
(while p
|
||
(setq p (aref delts p)
|
||
q (cons (car p) q) ;;q++ = p->value
|
||
p (cdr p))) ;; p = p->next
|
||
(setq q (nreverse (cons -1 q))) ;; *q++ = -1
|
||
(aset derives (- i ntokens) q) ;; derives[i] = q
|
||
(setq i (1+ i)))
|
||
))
|
||
|
||
;;;; --------------------------------------------------------
|
||
;;;; Find which nonterminals can expand into the null string.
|
||
;;;; --------------------------------------------------------
|
||
|
||
(defun wisent-print-nullable ()
|
||
"Print NULLABLE."
|
||
(let (i)
|
||
(wisent-log "NULLABLE\n")
|
||
(setq i ntokens)
|
||
(while (< i nsyms)
|
||
(wisent-log "\t%s: %s\n" (wisent-tag i)
|
||
(if (aref nullable (- i ntokens))
|
||
"yes" : "no"))
|
||
(setq i (1+ i)))
|
||
(wisent-log "\n\n")))
|
||
|
||
(defun wisent-set-nullable ()
|
||
"Set up NULLABLE.
|
||
A vector saying which nonterminals can expand into the null string.
|
||
NULLABLE[i - NTOKENS] is nil if symbol I can do so."
|
||
(let (ruleno s1 s2 p r squeue rcount rsets relts item any-tokens)
|
||
(setq squeue (make-vector nvars 0)
|
||
rcount (make-vector (1+ nrules) 0)
|
||
rsets (make-vector nvars nil) ;; - ntokens
|
||
relts (make-vector (+ nitems nvars 1) nil)
|
||
nullable (make-vector nvars nil)) ;; - ntokens
|
||
(setq s1 0 s2 0 ;; s1 = s2 = squeue
|
||
p 0 ;; p = relts
|
||
ruleno 1)
|
||
(while (<= ruleno nrules)
|
||
(when (aref ruseful ruleno)
|
||
(if (> (aref ritem (aref rrhs ruleno)) 0)
|
||
(progn
|
||
;; This rule has a non empty RHS.
|
||
(setq any-tokens nil
|
||
r (aref rrhs ruleno))
|
||
(while (> (aref ritem r) 0)
|
||
(if (wisent-ISTOKEN (aref ritem r))
|
||
(setq any-tokens t))
|
||
(setq r (1+ r)))
|
||
|
||
;; This rule has only nonterminals: schedule it for the
|
||
;; second pass.
|
||
(unless any-tokens
|
||
(setq r (aref rrhs ruleno))
|
||
(while (> (setq item (aref ritem r)) 0)
|
||
(aset rcount ruleno (1+ (aref rcount ruleno)))
|
||
;; p->next = rsets[item];
|
||
;; p->value = ruleno;
|
||
(aset relts p (cons ruleno (aref rsets (- item ntokens))))
|
||
;; rsets[item] = p;
|
||
(aset rsets (- item ntokens) p)
|
||
(setq p (1+ p)
|
||
r (1+ r)))))
|
||
;; This rule has an empty RHS.
|
||
;; assert (ritem[rrhs[ruleno]] == -ruleno)
|
||
(when (and (aref ruseful ruleno)
|
||
(setq item (aref rlhs ruleno))
|
||
(not (aref nullable (- item ntokens))))
|
||
(aset nullable (- item ntokens) t)
|
||
(aset squeue s2 item)
|
||
(setq s2 (1+ s2)))
|
||
)
|
||
)
|
||
(setq ruleno (1+ ruleno)))
|
||
|
||
(while (< s1 s2)
|
||
;; p = rsets[*s1++]
|
||
(setq p (aref rsets (- (aref squeue s1) ntokens))
|
||
s1 (1+ s1))
|
||
(while p
|
||
(setq p (aref relts p)
|
||
ruleno (car p)
|
||
p (cdr p)) ;; p = p->next
|
||
;; if (--rcount[ruleno] == 0)
|
||
(when (zerop (aset rcount ruleno (1- (aref rcount ruleno))))
|
||
(setq item (aref rlhs ruleno))
|
||
(aset nullable (- item ntokens) t)
|
||
(aset squeue s2 item)
|
||
(setq s2 (1+ s2)))))
|
||
|
||
(if wisent-debug-flag
|
||
(wisent-print-nullable))
|
||
))
|
||
|
||
;;;; -----------
|
||
;;;; Subroutines
|
||
;;;; -----------
|
||
|
||
(defun wisent-print-fderives ()
|
||
"Print FDERIVES."
|
||
(let (i j rp)
|
||
(wisent-log "\n\n\nFDERIVES\n")
|
||
(setq i ntokens)
|
||
(while (< i nsyms)
|
||
(wisent-log "\n\n%s derives\n\n" (wisent-tag i))
|
||
(setq rp (aref fderives (- i ntokens))
|
||
j 0)
|
||
(while (<= j nrules)
|
||
(if (wisent-BITISSET rp j)
|
||
(wisent-log " %d\n" j))
|
||
(setq j (1+ j)))
|
||
(setq i (1+ i)))))
|
||
|
||
(defun wisent-set-fderives ()
|
||
"Set up FDERIVES.
|
||
An NVARS by NRULES matrix of bits indicating which rules can help
|
||
derive the beginning of the data for each nonterminal. For example,
|
||
if symbol 5 can be derived as the sequence of symbols 8 3 20, and one
|
||
of the rules for deriving symbol 8 is rule 4, then the
|
||
[5 - NTOKENS, 4] bit in FDERIVES is set."
|
||
(let (i j k)
|
||
(setq fderives (make-vector nvars nil))
|
||
(setq i 0)
|
||
(while (< i nvars)
|
||
(aset fderives i (make-vector rulesetsize 0))
|
||
(setq i (1+ i)))
|
||
|
||
(wisent-set-firsts)
|
||
|
||
(setq i ntokens)
|
||
(while (< i nsyms)
|
||
(setq j ntokens)
|
||
(while (< j nsyms)
|
||
;; if (BITISSET (FIRSTS (i), j - ntokens))
|
||
(when (wisent-BITISSET (aref firsts (- i ntokens)) (- j ntokens))
|
||
(setq k (aref derives (- j ntokens)))
|
||
(while (> (car k) 0) ;; derives[j][k] > 0
|
||
;; SETBIT (FDERIVES (i), derives[j][k]);
|
||
(wisent-SETBIT (aref fderives (- i ntokens)) (car k))
|
||
(setq k (cdr k))))
|
||
(setq j (1+ j)))
|
||
(setq i (1+ i)))
|
||
|
||
(if wisent-debug-flag
|
||
(wisent-print-fderives))
|
||
))
|
||
|
||
(defun wisent-print-firsts ()
|
||
"Print FIRSTS."
|
||
(let (i j v)
|
||
(wisent-log "\n\n\nFIRSTS\n\n")
|
||
(setq i ntokens)
|
||
(while (< i nsyms)
|
||
(wisent-log "\n\n%s firsts\n\n" (wisent-tag i))
|
||
(setq v (aref firsts (- i ntokens))
|
||
j 0)
|
||
(while (< j nvars)
|
||
(if (wisent-BITISSET v j)
|
||
(wisent-log "\t\t%d (%s)\n"
|
||
(+ j ntokens) (wisent-tag (+ j ntokens))))
|
||
(setq j (1+ j)))
|
||
(setq i (1+ i)))))
|
||
|
||
(defun wisent-TC (R n)
|
||
"Transitive closure.
|
||
Given R an N by N matrix of bits, modify its contents to be the
|
||
transitive closure of what was given."
|
||
(let (i j k)
|
||
;; R (J, I) && R (I, K) => R (J, K).
|
||
;; I *must* be the outer loop.
|
||
(setq i 0)
|
||
(while (< i n)
|
||
(setq j 0)
|
||
(while (< j n)
|
||
(when (wisent-BITISSET (aref R j) i)
|
||
(setq k 0)
|
||
(while (< k n)
|
||
(if (wisent-BITISSET (aref R i) k)
|
||
(wisent-SETBIT (aref R j) k))
|
||
(setq k (1+ k))))
|
||
(setq j (1+ j)))
|
||
(setq i (1+ i)))))
|
||
|
||
(defun wisent-RTC (R n)
|
||
"Reflexive Transitive Closure.
|
||
Same as `wisent-TC' and then set all the bits on the diagonal of R, an
|
||
N by N matrix of bits."
|
||
(let (i)
|
||
(wisent-TC R n)
|
||
(setq i 0)
|
||
(while (< i n)
|
||
(wisent-SETBIT (aref R i) i)
|
||
(setq i (1+ i)))))
|
||
|
||
(defun wisent-set-firsts ()
|
||
"Set up FIRSTS.
|
||
An NVARS by NVARS bit matrix indicating which items can represent the
|
||
beginning of the input corresponding to which other items. For
|
||
example, if some rule expands symbol 5 into the sequence of symbols 8
|
||
3 20, the symbol 8 can be the beginning of the data for symbol 5, so
|
||
the bit [8 - NTOKENS, 5 - NTOKENS] in FIRSTS is set."
|
||
(let (row symbol sp rowsize i)
|
||
(setq rowsize (wisent-WORDSIZE nvars)
|
||
varsetsize rowsize
|
||
firsts (make-vector nvars nil)
|
||
i 0)
|
||
(while (< i nvars)
|
||
(aset firsts i (make-vector rowsize 0))
|
||
(setq i (1+ i)))
|
||
|
||
(setq row 0 ;; row = firsts
|
||
i ntokens)
|
||
(while (< i nsyms)
|
||
(setq sp (aref derives (- i ntokens)))
|
||
(while (>= (car sp) 0)
|
||
(setq symbol (aref ritem (aref rrhs (car sp)))
|
||
sp (cdr sp))
|
||
(when (wisent-ISVAR symbol)
|
||
(setq symbol (- symbol ntokens))
|
||
(wisent-SETBIT (aref firsts row) symbol)
|
||
))
|
||
(setq row (1+ row)
|
||
i (1+ i)))
|
||
|
||
(wisent-RTC firsts nvars)
|
||
|
||
(if wisent-debug-flag
|
||
(wisent-print-firsts))
|
||
))
|
||
|
||
(defun wisent-initialize-closure (n)
|
||
"Allocate the ITEMSET and RULESET vectors.
|
||
And precompute useful data so that `wisent-closure' can be called.
|
||
N is the number of elements to allocate for ITEMSET."
|
||
(setq itemset (make-vector n 0)
|
||
rulesetsize (wisent-WORDSIZE (1+ nrules))
|
||
ruleset (make-vector rulesetsize 0))
|
||
|
||
(wisent-set-fderives))
|
||
|
||
(defun wisent-print-closure ()
|
||
"Print ITEMSET."
|
||
(let (i)
|
||
(wisent-log "\n\nclosure n = %d\n\n" nitemset)
|
||
(setq i 0) ;; isp = itemset
|
||
(while (< i nitemset)
|
||
(wisent-log " %d\n" (aref itemset i))
|
||
(setq i (1+ i)))))
|
||
|
||
(defun wisent-closure (core n)
|
||
"Set up RULESET and ITEMSET for the transitions out of CORE state.
|
||
Given a vector of item numbers items, of length N, set up RULESET and
|
||
ITEMSET to indicate what rules could be run and which items could be
|
||
accepted when those items are the active ones.
|
||
|
||
RULESET contains a bit for each rule. `wisent-closure' sets the bits
|
||
for all rules which could potentially describe the next input to be
|
||
read.
|
||
|
||
ITEMSET is a vector of item numbers; NITEMSET is the number of items
|
||
in ITEMSET. `wisent-closure' places there the indices of all items
|
||
which represent units of input that could arrive next."
|
||
(let (c r v symbol ruleno itemno)
|
||
(if (zerop n)
|
||
(progn
|
||
(setq r 0
|
||
v (aref fderives (- start-symbol ntokens)))
|
||
(while (< r rulesetsize)
|
||
;; ruleset[r] = FDERIVES (start-symbol)[r];
|
||
(aset ruleset r (aref v r))
|
||
(setq r (1+ r)))
|
||
)
|
||
(fillarray ruleset 0)
|
||
(setq c 0)
|
||
(while (< c n)
|
||
(setq symbol (aref ritem (aref core c)))
|
||
(when (wisent-ISVAR symbol)
|
||
(setq r 0
|
||
v (aref fderives (- symbol ntokens)))
|
||
(while (< r rulesetsize)
|
||
;; ruleset[r] |= FDERIVES (ritem[core[c]])[r];
|
||
(aset ruleset r (logior (aref ruleset r) (aref v r)))
|
||
(setq r (1+ r))))
|
||
(setq c (1+ c)))
|
||
)
|
||
(setq nitemset 0
|
||
c 0
|
||
ruleno 0
|
||
r (* rulesetsize wisent-BITS-PER-WORD))
|
||
(while (< ruleno r)
|
||
(when (wisent-BITISSET ruleset ruleno)
|
||
(setq itemno (aref rrhs ruleno))
|
||
(while (and (< c n) (< (aref core c) itemno))
|
||
(aset itemset nitemset (aref core c))
|
||
(setq nitemset (1+ nitemset)
|
||
c (1+ c)))
|
||
(aset itemset nitemset itemno)
|
||
(setq nitemset (1+ nitemset)))
|
||
(setq ruleno (1+ ruleno)))
|
||
|
||
(while (< c n)
|
||
(aset itemset nitemset (aref core c))
|
||
(setq nitemset (1+ nitemset)
|
||
c (1+ c)))
|
||
|
||
(if wisent-debug-flag
|
||
(wisent-print-closure))
|
||
))
|
||
|
||
;;;; --------------------------------------------------
|
||
;;;; Generate the nondeterministic finite state machine
|
||
;;;; --------------------------------------------------
|
||
|
||
(defun wisent-allocate-itemsets ()
|
||
"Allocate storage for itemsets."
|
||
(let (symbol i count symbol-count)
|
||
;; Count the number of occurrences of all the symbols in RITEMS.
|
||
;; Note that useless productions (hence useless nonterminals) are
|
||
;; browsed too, hence we need to allocate room for _all_ the
|
||
;; symbols.
|
||
(setq count 0
|
||
symbol-count (make-vector (+ nsyms nuseless-nonterminals) 0)
|
||
i 0)
|
||
(while (setq symbol (aref ritem i))
|
||
(when (> symbol 0)
|
||
(setq count (1+ count))
|
||
(aset symbol-count symbol (1+ (aref symbol-count symbol))))
|
||
(setq i (1+ i)))
|
||
;; See comments before `wisent-new-itemsets'. All the vectors of
|
||
;; items live inside kernel-items. The number of active items
|
||
;; after some symbol cannot be more than the number of times that
|
||
;; symbol appears as an item, which is symbol-count[symbol]. We
|
||
;; allocate that much space for each symbol.
|
||
(setq kernel-base (make-vector nsyms nil)
|
||
kernel-items (make-vector count 0)
|
||
count 0
|
||
i 0)
|
||
(while (< i nsyms)
|
||
(aset kernel-base i count)
|
||
(setq count (+ count (aref symbol-count i))
|
||
i (1+ i)))
|
||
(setq shift-symbol symbol-count
|
||
kernel-end (make-vector nsyms nil))
|
||
))
|
||
|
||
(defun wisent-allocate-storage ()
|
||
"Allocate storage for the state machine."
|
||
(wisent-allocate-itemsets)
|
||
(setq shiftset (make-vector nsyms 0)
|
||
redset (make-vector (1+ nrules) 0)
|
||
state-table (make-vector wisent-state-table-size nil)))
|
||
|
||
(defun wisent-new-itemsets ()
|
||
"Find which symbols can be shifted in the current state.
|
||
And for each one record which items would be active after that shift.
|
||
Uses the contents of ITEMSET. SHIFT-SYMBOL is set to a vector of the
|
||
symbols that can be shifted. For each symbol in the grammar,
|
||
KERNEL-BASE[symbol] points to a vector of item numbers activated if
|
||
that symbol is shifted, and KERNEL-END[symbol] points after the end of
|
||
that vector."
|
||
(let (i shiftcount isp ksp symbol)
|
||
(fillarray kernel-end nil)
|
||
(setq shiftcount 0
|
||
isp 0)
|
||
(while (< isp nitemset)
|
||
(setq i (aref itemset isp)
|
||
isp (1+ isp)
|
||
symbol (aref ritem i))
|
||
(when (> symbol 0)
|
||
(setq ksp (aref kernel-end symbol))
|
||
(when (not ksp)
|
||
;; shift-symbol[shiftcount++] = symbol;
|
||
(aset shift-symbol shiftcount symbol)
|
||
(setq shiftcount (1+ shiftcount)
|
||
ksp (aref kernel-base symbol)))
|
||
;; *ksp++ = i + 1;
|
||
(aset kernel-items ksp (1+ i))
|
||
(setq ksp (1+ ksp))
|
||
(aset kernel-end symbol ksp)))
|
||
(setq nshifts shiftcount)))
|
||
|
||
(defun wisent-new-state (symbol)
|
||
"Create a new state for those items, if necessary.
|
||
SYMBOL is the core accessing-symbol.
|
||
Subroutine of `wisent-get-state'."
|
||
(let (n p isp1 isp2 iend items)
|
||
(setq isp1 (aref kernel-base symbol)
|
||
iend (aref kernel-end symbol)
|
||
n (- iend isp1)
|
||
p (make-core)
|
||
items (make-vector n 0))
|
||
(setf (core-accessing-symbol p) symbol)
|
||
(setf (core-number p) nstates)
|
||
(setf (core-nitems p) n)
|
||
(setf (core-items p) items)
|
||
(setq isp2 0) ;; isp2 = p->items
|
||
(while (< isp1 iend)
|
||
;; *isp2++ = *isp1++;
|
||
(aset items isp2 (aref kernel-items isp1))
|
||
(setq isp1 (1+ isp1)
|
||
isp2 (1+ isp2)))
|
||
(setf (core-next last-state) p)
|
||
(setq last-state p
|
||
nstates (1+ nstates))
|
||
p))
|
||
|
||
(defun wisent-get-state (symbol)
|
||
"Find the state we would get to by shifting SYMBOL.
|
||
Return the state number for the state we would get to (from the
|
||
current state) by shifting SYMBOL. Create a new state if no
|
||
equivalent one exists already. Used by `wisent-append-states'."
|
||
(let (key isp1 isp2 iend sp sp2 found n)
|
||
(setq isp1 (aref kernel-base symbol)
|
||
iend (aref kernel-end symbol)
|
||
n (- iend isp1)
|
||
key 0)
|
||
;; Add up the target state's active item numbers to get a hash key
|
||
(while (< isp1 iend)
|
||
(setq key (+ key (aref kernel-items isp1))
|
||
isp1 (1+ isp1)))
|
||
(setq key (% key wisent-state-table-size)
|
||
sp (aref state-table key))
|
||
(if sp
|
||
(progn
|
||
(setq found nil)
|
||
(while (not found)
|
||
(when (= (core-nitems sp) n)
|
||
(setq found t
|
||
isp1 (aref kernel-base symbol)
|
||
;; isp2 = sp->items;
|
||
sp2 (core-items sp)
|
||
isp2 0)
|
||
|
||
(while (and found (< isp1 iend))
|
||
;; if (*isp1++ != *isp2++)
|
||
(if (not (= (aref kernel-items isp1)
|
||
(aref sp2 isp2)))
|
||
(setq found nil))
|
||
(setq isp1 (1+ isp1)
|
||
isp2 (1+ isp2))))
|
||
(if (not found)
|
||
(if (core-link sp)
|
||
(setq sp (core-link sp))
|
||
;; sp = sp->link = new-state(symbol)
|
||
(setq sp (setf (core-link sp) (wisent-new-state symbol))
|
||
found t)))))
|
||
;; bucket is empty
|
||
;; state-table[key] = sp = new-state(symbol)
|
||
(setq sp (wisent-new-state symbol))
|
||
(aset state-table key sp))
|
||
;; return (sp->number);
|
||
(core-number sp)))
|
||
|
||
(defun wisent-append-states ()
|
||
"Find or create the core structures for states.
|
||
Use the information computed by `wisent-new-itemsets' to find the
|
||
state numbers reached by each shift transition from the current state.
|
||
SHIFTSET is set up as a vector of state numbers of those states."
|
||
(let (i j symbol)
|
||
;; First sort shift-symbol into increasing order
|
||
(setq i 1)
|
||
(while (< i nshifts)
|
||
(setq symbol (aref shift-symbol i)
|
||
j i)
|
||
(while (and (> j 0) (> (aref shift-symbol (1- j)) symbol))
|
||
(aset shift-symbol j (aref shift-symbol (1- j)))
|
||
(setq j (1- j)))
|
||
(aset shift-symbol j symbol)
|
||
(setq i (1+ i)))
|
||
(setq i 0)
|
||
(while (< i nshifts)
|
||
(setq symbol (aref shift-symbol i))
|
||
(aset shiftset i (wisent-get-state symbol))
|
||
(setq i (1+ i)))
|
||
))
|
||
|
||
(defun wisent-initialize-states ()
|
||
"Initialize states."
|
||
(let ((p (make-core)))
|
||
(setq first-state p
|
||
last-state p
|
||
this-state p
|
||
nstates 1)))
|
||
|
||
(defun wisent-save-shifts ()
|
||
"Save the NSHIFTS of SHIFTSET into the current linked list."
|
||
(let (p i shifts)
|
||
(setq p (make-shifts)
|
||
shifts (make-vector nshifts 0)
|
||
i 0)
|
||
(setf (shifts-number p) (core-number this-state))
|
||
(setf (shifts-nshifts p) nshifts)
|
||
(setf (shifts-shifts p) shifts)
|
||
(while (< i nshifts)
|
||
;; (p->shifts)[i] = shiftset[i];
|
||
(aset shifts i (aref shiftset i))
|
||
(setq i (1+ i)))
|
||
|
||
(setf (if last-shift
|
||
(shifts-next last-shift)
|
||
first-shift)
|
||
p)
|
||
(setq last-shift p)))
|
||
|
||
(defun wisent-insert-start-shift ()
|
||
"Create the next-to-final state.
|
||
That is the state to which a shift has already been made in the
|
||
initial state. Subroutine of `wisent-augment-automaton'."
|
||
(let (statep sp)
|
||
(setq statep (make-core))
|
||
(setf (core-number statep) nstates)
|
||
(setf (core-accessing-symbol statep) start-symbol)
|
||
(setf (core-next last-state) statep)
|
||
(setq last-state statep)
|
||
;; Make a shift from this state to (what will be) the final state.
|
||
(setq sp (make-shifts))
|
||
(setf (shifts-number sp) nstates)
|
||
(setq nstates (1+ nstates))
|
||
(setf (shifts-nshifts sp) 1)
|
||
(setf (shifts-shifts sp) (vector nstates))
|
||
(setf (shifts-next last-shift) sp)
|
||
(setq last-shift sp)))
|
||
|
||
(defun wisent-augment-automaton ()
|
||
"Set up initial and final states as parser wants them.
|
||
Make sure that the initial state has a shift that accepts the
|
||
grammar's start symbol and goes to the next-to-final state, which has
|
||
a shift going to the final state, which has a shift to the termination
|
||
state. Create such states and shifts if they don't happen to exist
|
||
already."
|
||
(let (i k statep sp sp2 sp1 shifts)
|
||
(setq sp first-shift)
|
||
(if sp
|
||
(progn
|
||
(if (zerop (shifts-number sp))
|
||
(progn
|
||
(setq k (shifts-nshifts sp)
|
||
statep (core-next first-state))
|
||
;; The states reached by shifts from first-state are
|
||
;; numbered 1...K. Look for one reached by
|
||
;; START-SYMBOL.
|
||
(while (and (< (core-accessing-symbol statep) start-symbol)
|
||
(< (core-number statep) k))
|
||
(setq statep (core-next statep)))
|
||
(if (= (core-accessing-symbol statep) start-symbol)
|
||
(progn
|
||
;; We already have a next-to-final state. Make
|
||
;; sure it has a shift to what will be the final
|
||
;; state.
|
||
(setq k (core-number statep))
|
||
(while (and sp (< (shifts-number sp) k))
|
||
(setq sp1 sp
|
||
sp (shifts-next sp)))
|
||
(if (and sp (= (shifts-number sp) k))
|
||
(progn
|
||
(setq i (shifts-nshifts sp)
|
||
sp2 (make-shifts)
|
||
shifts (make-vector (1+ i) 0))
|
||
(setf (shifts-number sp2) k)
|
||
(setf (shifts-nshifts sp2) (1+ i))
|
||
(setf (shifts-shifts sp2) shifts)
|
||
(aset shifts 0 nstates)
|
||
(while (> i 0)
|
||
;; sp2->shifts[i] = sp->shifts[i - 1];
|
||
(aset shifts i (aref (shifts-shifts sp) (1- i)))
|
||
(setq i (1- i)))
|
||
;; Patch sp2 into the chain of shifts in
|
||
;; place of sp, following sp1.
|
||
(setf (shifts-next sp2) (shifts-next sp))
|
||
(setf (shifts-next sp1) sp2)
|
||
(if (eq sp last-shift)
|
||
(setq last-shift sp2))
|
||
)
|
||
(setq sp2 (make-shifts))
|
||
(setf (shifts-number sp2) k)
|
||
(setf (shifts-nshifts sp2) 1)
|
||
(setf (shifts-shifts sp2) (vector nstates))
|
||
;; Patch sp2 into the chain of shifts between
|
||
;; sp1 and sp.
|
||
(setf (shifts-next sp2) sp)
|
||
(setf (shifts-next sp1) sp2)
|
||
(if (not sp)
|
||
(setq last-shift sp2))
|
||
)
|
||
)
|
||
;; There is no next-to-final state as yet.
|
||
;; Add one more shift in FIRST-SHIFT, going to the
|
||
;; next-to-final state (yet to be made).
|
||
(setq sp first-shift
|
||
sp2 (make-shifts)
|
||
i (shifts-nshifts sp)
|
||
shifts (make-vector (1+ i) 0))
|
||
(setf (shifts-nshifts sp2) (1+ i))
|
||
(setf (shifts-shifts sp2) shifts)
|
||
;; Stick this shift into the vector at the proper place.
|
||
(setq statep (core-next first-state)
|
||
k 0
|
||
i 0)
|
||
(while (< i (shifts-nshifts sp))
|
||
(when (and (> (core-accessing-symbol statep) start-symbol)
|
||
(= i k))
|
||
(aset shifts k nstates)
|
||
(setq k (1+ k)))
|
||
(aset shifts k (aref (shifts-shifts sp) i))
|
||
(setq statep (core-next statep))
|
||
(setq i (1+ i)
|
||
k (1+ k)))
|
||
(when (= i k)
|
||
(aset shifts k nstates)
|
||
(setq k (1+ k)))
|
||
;; Patch sp2 into the chain of shifts in place of
|
||
;; sp, at the beginning.
|
||
(setf (shifts-next sp2) (shifts-next sp))
|
||
(setq first-shift sp2)
|
||
(if (eq last-shift sp)
|
||
(setq last-shift sp2))
|
||
;; Create the next-to-final state, with shift to
|
||
;; what will be the final state.
|
||
(wisent-insert-start-shift)))
|
||
;; The initial state didn't even have any shifts. Give it
|
||
;; one shift, to the next-to-final state.
|
||
(setq sp (make-shifts))
|
||
(setf (shifts-nshifts sp) 1)
|
||
(setf (shifts-shifts sp) (vector nstates))
|
||
;; Patch sp into the chain of shifts at the beginning.
|
||
(setf (shifts-next sp) first-shift)
|
||
(setq first-shift sp)
|
||
;; Create the next-to-final state, with shift to what will
|
||
;; be the final state.
|
||
(wisent-insert-start-shift)))
|
||
;; There are no shifts for any state. Make one shift, from the
|
||
;; initial state to the next-to-final state.
|
||
(setq sp (make-shifts))
|
||
(setf (shifts-nshifts sp) 1)
|
||
(setf (shifts-shifts sp) (vector nstates))
|
||
;; Initialize the chain of shifts with sp.
|
||
(setq first-shift sp
|
||
last-shift sp)
|
||
;; Create the next-to-final state, with shift to what will be
|
||
;; the final state.
|
||
(wisent-insert-start-shift))
|
||
;; Make the final state--the one that follows a shift from the
|
||
;; next-to-final state. The symbol for that shift is 0
|
||
;; (end-of-file).
|
||
(setq statep (make-core))
|
||
(setf (core-number statep) nstates)
|
||
(setf (core-next last-state) statep)
|
||
(setq last-state statep)
|
||
;; Make the shift from the final state to the termination state.
|
||
(setq sp (make-shifts))
|
||
(setf (shifts-number sp) nstates)
|
||
(setq nstates (1+ nstates))
|
||
(setf (shifts-nshifts sp) 1)
|
||
(setf (shifts-shifts sp) (vector nstates))
|
||
(setf (shifts-next last-shift) sp)
|
||
(setq last-shift sp)
|
||
;; Note that the variable FINAL-STATE refers to what we sometimes
|
||
;; call the termination state.
|
||
(setq final-state nstates)
|
||
;; Make the termination state.
|
||
(setq statep (make-core))
|
||
(setf (core-number statep) nstates)
|
||
(setq nstates (1+ nstates))
|
||
(setf (core-next last-state) statep)
|
||
(setq last-state statep)))
|
||
|
||
(defun wisent-save-reductions ()
|
||
"Make a reductions structure.
|
||
Find which rules can be used for reduction transitions from the
|
||
current state and make a reductions structure for the state to record
|
||
their rule numbers."
|
||
(let (i item count p rules)
|
||
;; Find and count the active items that represent ends of rules.
|
||
(setq count 0
|
||
i 0)
|
||
(while (< i nitemset)
|
||
(setq item (aref ritem (aref itemset i)))
|
||
(when (< item 0)
|
||
(aset redset count (- item))
|
||
(setq count (1+ count)))
|
||
(setq i (1+ i)))
|
||
;; Make a reductions structure and copy the data into it.
|
||
(when (> count 0)
|
||
(setq p (make-reductions)
|
||
rules (make-vector count 0))
|
||
(setf (reductions-number p) (core-number this-state))
|
||
(setf (reductions-nreds p) count)
|
||
(setf (reductions-rules p) rules)
|
||
(setq i 0)
|
||
(while (< i count)
|
||
;; (p->rules)[i] = redset[i]
|
||
(aset rules i (aref redset i))
|
||
(setq i (1+ i)))
|
||
(setf (if last-reduction
|
||
(reductions-next last-reduction)
|
||
first-reduction)
|
||
p)
|
||
(setq last-reduction p))))
|
||
|
||
(defun wisent-generate-states ()
|
||
"Compute the nondeterministic finite state machine from the grammar."
|
||
(wisent-allocate-storage)
|
||
(wisent-initialize-closure nitems)
|
||
(wisent-initialize-states)
|
||
(while this-state
|
||
;; Set up RULESET and ITEMSET for the transitions out of this
|
||
;; state. RULESET gets a 1 bit for each rule that could reduce
|
||
;; now. ITEMSET gets a vector of all the items that could be
|
||
;; accepted next.
|
||
(wisent-closure (core-items this-state) (core-nitems this-state))
|
||
;; Record the reductions allowed out of this state.
|
||
(wisent-save-reductions)
|
||
;; Find the itemsets of the states that shifts can reach.
|
||
(wisent-new-itemsets)
|
||
;; Find or create the core structures for those states.
|
||
(wisent-append-states)
|
||
;; Create the shifts structures for the shifts to those states,
|
||
;; now that the state numbers transitioning to are known.
|
||
(if (> nshifts 0)
|
||
(wisent-save-shifts))
|
||
;; States are queued when they are created; process them all.
|
||
(setq this-state (core-next this-state)))
|
||
;; Set up initial and final states as parser wants them.
|
||
(wisent-augment-automaton))
|
||
|
||
;;;; ---------------------------
|
||
;;;; Compute look-ahead criteria
|
||
;;;; ---------------------------
|
||
|
||
;; Compute how to make the finite state machine deterministic; find
|
||
;; which rules need lookahead in each state, and which lookahead
|
||
;; tokens they accept.
|
||
|
||
;; `wisent-lalr', the entry point, builds these data structures:
|
||
|
||
;; GOTO-MAP, FROM-STATE and TO-STATE record each shift transition
|
||
;; which accepts a variable (a nonterminal). NGOTOS is the number of
|
||
;; such transitions.
|
||
;; FROM-STATE[t] is the state number which a transition leads from and
|
||
;; TO-STATE[t] is the state number it leads to.
|
||
;; All the transitions that accept a particular variable are grouped
|
||
;; together and GOTO-MAP[i - NTOKENS] is the index in FROM-STATE and
|
||
;; TO-STATE of the first of them.
|
||
|
||
;; CONSISTENT[s] is non-nil if no lookahead is needed to decide what
|
||
;; to do in state s.
|
||
|
||
;; LARULENO is a vector which records the rules that need lookahead in
|
||
;; various states. The elements of LARULENO that apply to state s are
|
||
;; those from LOOKAHEADS[s] through LOOKAHEADS[s+1]-1. Each element
|
||
;; of LARULENO is a rule number.
|
||
|
||
;; If LR is the length of LARULENO, then a number from 0 to LR-1 can
|
||
;; specify both a rule and a state where the rule might be applied.
|
||
;; LA is a LR by NTOKENS matrix of bits.
|
||
;; LA[l, i] is 1 if the rule LARULENO[l] is applicable in the
|
||
;; appropriate state when the next token is symbol i.
|
||
;; If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict.
|
||
|
||
(wisent-defcontext digraph
|
||
INDEX R VERTICES
|
||
infinity top)
|
||
|
||
(defun wisent-traverse (i)
|
||
"Traverse I."
|
||
(let (j k height Ri Fi break)
|
||
(setq top (1+ top)
|
||
height top)
|
||
(aset VERTICES top i) ;; VERTICES[++top] = i
|
||
(aset INDEX i top) ;; INDEX[i] = height = top
|
||
|
||
(setq Ri (aref R i))
|
||
(when Ri
|
||
(setq j 0)
|
||
(while (>= (aref Ri j) 0)
|
||
(if (zerop (aref INDEX (aref Ri j)))
|
||
(wisent-traverse (aref Ri j)))
|
||
;; if (INDEX[i] > INDEX[R[i][j]])
|
||
(if (> (aref INDEX i) (aref INDEX (aref Ri j)))
|
||
;; INDEX[i] = INDEX[R[i][j]];
|
||
(aset INDEX i (aref INDEX (aref Ri j))))
|
||
(setq Fi (aref F i)
|
||
k 0)
|
||
(while (< k tokensetsize)
|
||
;; F (i)[k] |= F (R[i][j])[k];
|
||
(aset Fi k (logior (aref Fi k)
|
||
(aref (aref F (aref Ri j)) k)))
|
||
(setq k (1+ k)))
|
||
(setq j (1+ j))))
|
||
|
||
(when (= (aref INDEX i) height)
|
||
(setq break nil)
|
||
(while (not break)
|
||
(setq j (aref VERTICES top) ;; j = VERTICES[top--]
|
||
top (1- top))
|
||
(aset INDEX j infinity)
|
||
(if (= i j)
|
||
(setq break t)
|
||
(setq k 0)
|
||
(while (< k tokensetsize)
|
||
;; F (j)[k] = F (i)[k];
|
||
(aset (aref F j) k (aref (aref F i) k))
|
||
(setq k (1+ k))))))
|
||
))
|
||
|
||
(defun wisent-digraph (relation)
|
||
"Digraph RELATION."
|
||
(wisent-with-context digraph
|
||
(setq infinity (+ ngotos 2)
|
||
INDEX (make-vector (1+ ngotos) 0)
|
||
VERTICES (make-vector (1+ ngotos) 0)
|
||
top 0
|
||
R relation)
|
||
(let ((i 0))
|
||
(while (< i ngotos)
|
||
(if (and (= (aref INDEX i) 0) (aref R i))
|
||
(wisent-traverse i))
|
||
(setq i (1+ i))))))
|
||
|
||
(defun wisent-set-state-table ()
|
||
"Build state table."
|
||
(let (sp)
|
||
(setq state-table (make-vector nstates nil)
|
||
sp first-state)
|
||
(while sp
|
||
(aset state-table (core-number sp) sp)
|
||
(setq sp (core-next sp)))))
|
||
|
||
(defun wisent-set-accessing-symbol ()
|
||
"Build accessing symbol table."
|
||
(let (sp)
|
||
(setq accessing-symbol (make-vector nstates 0)
|
||
sp first-state)
|
||
(while sp
|
||
(aset accessing-symbol (core-number sp) (core-accessing-symbol sp))
|
||
(setq sp (core-next sp)))))
|
||
|
||
(defun wisent-set-shift-table ()
|
||
"Build shift table."
|
||
(let (sp)
|
||
(setq shift-table (make-vector nstates nil)
|
||
sp first-shift)
|
||
(while sp
|
||
(aset shift-table (shifts-number sp) sp)
|
||
(setq sp (shifts-next sp)))))
|
||
|
||
(defun wisent-set-reduction-table ()
|
||
"Build reduction table."
|
||
(let (rp)
|
||
(setq reduction-table (make-vector nstates nil)
|
||
rp first-reduction)
|
||
(while rp
|
||
(aset reduction-table (reductions-number rp) rp)
|
||
(setq rp (reductions-next rp)))))
|
||
|
||
(defun wisent-set-maxrhs ()
|
||
"Setup MAXRHS length."
|
||
(let (i len max)
|
||
(setq len 0
|
||
max 0
|
||
i 0)
|
||
(while (aref ritem i)
|
||
(if (> (aref ritem i) 0)
|
||
(setq len (1+ len))
|
||
(if (> len max)
|
||
(setq max len))
|
||
(setq len 0))
|
||
(setq i (1+ i)))
|
||
(setq maxrhs max)))
|
||
|
||
(defun wisent-initialize-LA ()
|
||
"Set up LA."
|
||
(let (i j k count rp sp np v)
|
||
(setq consistent (make-vector nstates nil)
|
||
lookaheads (make-vector (1+ nstates) 0)
|
||
count 0
|
||
i 0)
|
||
(while (< i nstates)
|
||
(aset lookaheads i count)
|
||
(setq rp (aref reduction-table i)
|
||
sp (aref shift-table i))
|
||
;; if (rp &&
|
||
;; (rp->nreds > 1
|
||
;; || (sp && ! ISVAR(accessing-symbol[sp->shifts[0]]))))
|
||
(if (and rp
|
||
(or (> (reductions-nreds rp) 1)
|
||
(and sp
|
||
(not (wisent-ISVAR
|
||
(aref accessing-symbol
|
||
(aref (shifts-shifts sp) 0)))))))
|
||
(setq count (+ count (reductions-nreds rp)))
|
||
(aset consistent i t))
|
||
|
||
(when sp
|
||
(setq k 0
|
||
j (shifts-nshifts sp)
|
||
v (shifts-shifts sp))
|
||
(while (< k j)
|
||
(when (= (aref accessing-symbol (aref v k))
|
||
error-token-number)
|
||
(aset consistent i nil)
|
||
(setq k j)) ;; break
|
||
(setq k (1+ k))))
|
||
(setq i (1+ i)))
|
||
|
||
(aset lookaheads nstates count)
|
||
|
||
(if (zerop count)
|
||
(progn
|
||
(setq LA (make-vector 1 nil)
|
||
LAruleno (make-vector 1 0)
|
||
lookback (make-vector 1 nil)))
|
||
(setq LA (make-vector count nil)
|
||
LAruleno (make-vector count 0)
|
||
lookback (make-vector count nil)))
|
||
(setq i 0 j (length LA))
|
||
(while (< i j)
|
||
(aset LA i (make-vector tokensetsize 0))
|
||
(setq i (1+ i)))
|
||
|
||
(setq np 0
|
||
i 0)
|
||
(while (< i nstates)
|
||
(when (not (aref consistent i))
|
||
(setq rp (aref reduction-table i))
|
||
(when rp
|
||
(setq j 0
|
||
k (reductions-nreds rp)
|
||
v (reductions-rules rp))
|
||
(while (< j k)
|
||
(aset LAruleno np (aref v j))
|
||
(setq np (1+ np)
|
||
j (1+ j)))))
|
||
(setq i (1+ i)))))
|
||
|
||
(defun wisent-set-goto-map ()
|
||
"Set up GOTO-MAP."
|
||
(let (sp i j symbol k temp-map state1 state2 v)
|
||
(setq goto-map (make-vector (1+ nvars) 0)
|
||
temp-map (make-vector (1+ nvars) 0))
|
||
|
||
(setq ngotos 0
|
||
sp first-shift)
|
||
(while sp
|
||
(setq i (1- (shifts-nshifts sp))
|
||
v (shifts-shifts sp))
|
||
(while (>= i 0)
|
||
(setq symbol (aref accessing-symbol (aref v i)))
|
||
(if (wisent-ISTOKEN symbol)
|
||
(setq i 0) ;; break
|
||
(setq ngotos (1+ ngotos))
|
||
;; goto-map[symbol]++;
|
||
(aset goto-map (- symbol ntokens)
|
||
(1+ (aref goto-map (- symbol ntokens)))))
|
||
(setq i (1- i)))
|
||
(setq sp (shifts-next sp)))
|
||
|
||
(setq k 0
|
||
i ntokens
|
||
j 0)
|
||
(while (< i nsyms)
|
||
(aset temp-map j k)
|
||
(setq k (+ k (aref goto-map j))
|
||
i (1+ i)
|
||
j (1+ j)))
|
||
(setq i ntokens
|
||
j 0)
|
||
(while (< i nsyms)
|
||
(aset goto-map j (aref temp-map j))
|
||
(setq i (1+ i)
|
||
j (1+ j)))
|
||
;; goto-map[nsyms] = ngotos;
|
||
;; temp-map[nsyms] = ngotos;
|
||
(aset goto-map j ngotos)
|
||
(aset temp-map j ngotos)
|
||
|
||
(setq from-state (make-vector ngotos 0)
|
||
to-state (make-vector ngotos 0)
|
||
sp first-shift)
|
||
(while sp
|
||
(setq state1 (shifts-number sp)
|
||
v (shifts-shifts sp)
|
||
i (1- (shifts-nshifts sp)))
|
||
(while (>= i 0)
|
||
(setq state2 (aref v i)
|
||
symbol (aref accessing-symbol state2))
|
||
(if (wisent-ISTOKEN symbol)
|
||
(setq i 0) ;; break
|
||
;; k = temp-map[symbol]++;
|
||
(setq k (aref temp-map (- symbol ntokens)))
|
||
(aset temp-map (- symbol ntokens) (1+ k))
|
||
(aset from-state k state1)
|
||
(aset to-state k state2))
|
||
(setq i (1- i)))
|
||
(setq sp (shifts-next sp)))
|
||
))
|
||
|
||
(defun wisent-map-goto (state symbol)
|
||
"Map a STATE/SYMBOL pair into its numeric representation."
|
||
(let (high low middle s result)
|
||
;; low = goto-map[symbol];
|
||
;; high = goto-map[symbol + 1] - 1;
|
||
(setq low (aref goto-map (- symbol ntokens))
|
||
high (1- (aref goto-map (- (1+ symbol) ntokens))))
|
||
(while (and (not result) (<= low high))
|
||
(setq middle (/ (+ low high) 2)
|
||
s (aref from-state middle))
|
||
(cond
|
||
((= s state)
|
||
(setq result middle))
|
||
((< s state)
|
||
(setq low (1+ middle)))
|
||
(t
|
||
(setq high (1- middle)))))
|
||
(or result
|
||
(error "Internal error in `wisent-map-goto'"))
|
||
))
|
||
|
||
(defun wisent-initialize-F ()
|
||
"Set up F."
|
||
(let (i j k sp edge rowp rp reads nedges stateno symbol v break)
|
||
(setq F (make-vector ngotos nil)
|
||
i 0)
|
||
(while (< i ngotos)
|
||
(aset F i (make-vector tokensetsize 0))
|
||
(setq i (1+ i)))
|
||
|
||
(setq reads (make-vector ngotos nil)
|
||
edge (make-vector (1+ ngotos) 0)
|
||
nedges 0
|
||
rowp 0 ;; rowp = F
|
||
i 0)
|
||
(while (< i ngotos)
|
||
(setq stateno (aref to-state i)
|
||
sp (aref shift-table stateno))
|
||
(when sp
|
||
(setq k (shifts-nshifts sp)
|
||
v (shifts-shifts sp)
|
||
j 0
|
||
break nil)
|
||
(while (and (not break) (< j k))
|
||
;; symbol = accessing-symbol[sp->shifts[j]];
|
||
(setq symbol (aref accessing-symbol (aref v j)))
|
||
(if (wisent-ISVAR symbol)
|
||
(setq break t) ;; break
|
||
(wisent-SETBIT (aref F rowp) symbol)
|
||
(setq j (1+ j))))
|
||
|
||
(while (< j k)
|
||
;; symbol = accessing-symbol[sp->shifts[j]];
|
||
(setq symbol (aref accessing-symbol (aref v j)))
|
||
(when (aref nullable (- symbol ntokens))
|
||
(aset edge nedges (wisent-map-goto stateno symbol))
|
||
(setq nedges (1+ nedges)))
|
||
(setq j (1+ j)))
|
||
|
||
(when (> nedges 0)
|
||
;; reads[i] = rp = NEW2(nedges + 1, short);
|
||
(setq rp (make-vector (1+ nedges) 0)
|
||
j 0)
|
||
(aset reads i rp)
|
||
(while (< j nedges)
|
||
;; rp[j] = edge[j];
|
||
(aset rp j (aref edge j))
|
||
(setq j (1+ j)))
|
||
(aset rp nedges -1)
|
||
(setq nedges 0)))
|
||
(setq rowp (1+ rowp))
|
||
(setq i (1+ i)))
|
||
(wisent-digraph reads)
|
||
))
|
||
|
||
(defun wisent-add-lookback-edge (stateno ruleno gotono)
|
||
"Add a lookback edge.
|
||
STATENO, RULENO, GOTONO are self-explanatory."
|
||
(let (i k found)
|
||
(setq i (aref lookaheads stateno)
|
||
k (aref lookaheads (1+ stateno))
|
||
found nil)
|
||
(while (and (not found) (< i k))
|
||
(if (= (aref LAruleno i) ruleno)
|
||
(setq found t)
|
||
(setq i (1+ i))))
|
||
|
||
(or found
|
||
(error "Internal error in `wisent-add-lookback-edge'"))
|
||
|
||
;; value . next
|
||
;; lookback[i] = (gotono . lookback[i])
|
||
(aset lookback i (cons gotono (aref lookback i)))))
|
||
|
||
(defun wisent-transpose (R-arg n)
|
||
"Return the transpose of R-ARG, of size N.
|
||
Destroy R-ARG, as it is replaced with the result. R-ARG[I] is nil or
|
||
a -1 terminated list of numbers. RESULT[NUM] is nil or the -1
|
||
terminated list of the I such as NUM is in R-ARG[I]."
|
||
(let (i j new-R end-R nedges v sp)
|
||
(setq new-R (make-vector n nil)
|
||
end-R (make-vector n nil)
|
||
nedges (make-vector n 0))
|
||
|
||
;; Count.
|
||
(setq i 0)
|
||
(while (< i n)
|
||
(setq v (aref R-arg i))
|
||
(when v
|
||
(setq j 0)
|
||
(while (>= (aref v j) 0)
|
||
(aset nedges (aref v j) (1+ (aref nedges (aref v j))))
|
||
(setq j (1+ j))))
|
||
(setq i (1+ i)))
|
||
|
||
;; Allocate.
|
||
(setq i 0)
|
||
(while (< i n)
|
||
(when (> (aref nedges i) 0)
|
||
(setq sp (make-vector (1+ (aref nedges i)) 0))
|
||
(aset sp (aref nedges i) -1)
|
||
(aset new-R i sp)
|
||
(aset end-R i 0))
|
||
(setq i (1+ i)))
|
||
|
||
;; Store.
|
||
(setq i 0)
|
||
(while (< i n)
|
||
(setq v (aref R-arg i))
|
||
(when v
|
||
(setq j 0)
|
||
(while (>= (aref v j) 0)
|
||
(aset (aref new-R (aref v j)) (aref end-R (aref v j)) i)
|
||
(aset end-R (aref v j) (1+ (aref end-R (aref v j))))
|
||
(setq j (1+ j))))
|
||
(setq i (1+ i)))
|
||
|
||
new-R))
|
||
|
||
(defun wisent-build-relations ()
|
||
"Build relations."
|
||
(let (i j k rulep rp sp length nedges done state1 stateno
|
||
symbol1 symbol2 edge states v)
|
||
(setq includes (make-vector ngotos nil)
|
||
edge (make-vector (1+ ngotos) 0)
|
||
states (make-vector (1+ maxrhs) 0)
|
||
i 0)
|
||
|
||
(while (< i ngotos)
|
||
(setq nedges 0
|
||
state1 (aref from-state i)
|
||
symbol1 (aref accessing-symbol (aref to-state i))
|
||
rulep (aref derives (- symbol1 ntokens)))
|
||
|
||
(while (> (car rulep) 0)
|
||
(aset states 0 state1)
|
||
(setq length 1
|
||
stateno state1
|
||
rp (aref rrhs (car rulep))) ;; rp = ritem + rrhs[*rulep]
|
||
(while (> (aref ritem rp) 0) ;; *rp > 0
|
||
(setq symbol2 (aref ritem rp)
|
||
sp (aref shift-table stateno)
|
||
k (shifts-nshifts sp)
|
||
v (shifts-shifts sp)
|
||
j 0)
|
||
(while (< j k)
|
||
(setq stateno (aref v j))
|
||
(if (= (aref accessing-symbol stateno) symbol2)
|
||
(setq j k) ;; break
|
||
(setq j (1+ j))))
|
||
;; states[length++] = stateno;
|
||
(aset states length stateno)
|
||
(setq length (1+ length))
|
||
(setq rp (1+ rp)))
|
||
|
||
(if (not (aref consistent stateno))
|
||
(wisent-add-lookback-edge stateno (car rulep) i))
|
||
|
||
(setq length (1- length)
|
||
done nil)
|
||
(while (not done)
|
||
(setq done t
|
||
rp (1- rp))
|
||
(when (and (>= rp 0) (wisent-ISVAR (aref ritem rp)))
|
||
;; stateno = states[--length];
|
||
(setq length (1- length)
|
||
stateno (aref states length))
|
||
(aset edge nedges (wisent-map-goto stateno (aref ritem rp)))
|
||
(setq nedges (1+ nedges))
|
||
(if (aref nullable (- (aref ritem rp) ntokens))
|
||
(setq done nil))))
|
||
(setq rulep (cdr rulep)))
|
||
|
||
(when (> nedges 0)
|
||
(setq v (make-vector (1+ nedges) 0)
|
||
j 0)
|
||
(aset includes i v)
|
||
(while (< j nedges)
|
||
(aset v j (aref edge j))
|
||
(setq j (1+ j)))
|
||
(aset v nedges -1))
|
||
(setq i (1+ i)))
|
||
|
||
(setq includes (wisent-transpose includes ngotos))
|
||
))
|
||
|
||
(defun wisent-compute-FOLLOWS ()
|
||
"Compute follows."
|
||
(wisent-digraph includes))
|
||
|
||
(defun wisent-compute-lookaheads ()
|
||
"Compute lookaheads."
|
||
(let (i j n v1 v2 sp)
|
||
(setq n (aref lookaheads nstates)
|
||
i 0)
|
||
(while (< i n)
|
||
(setq sp (aref lookback i))
|
||
(while sp
|
||
(setq v1 (aref LA i)
|
||
v2 (aref F (car sp))
|
||
j 0)
|
||
(while (< j tokensetsize)
|
||
;; LA (i)[j] |= F (sp->value)[j]
|
||
(aset v1 j (logior (aref v1 j) (aref v2 j)))
|
||
(setq j (1+ j)))
|
||
(setq sp (cdr sp)))
|
||
(setq i (1+ i)))))
|
||
|
||
(defun wisent-lalr ()
|
||
"Make the nondeterministic finite state machine deterministic."
|
||
(setq tokensetsize (wisent-WORDSIZE ntokens))
|
||
(wisent-set-state-table)
|
||
(wisent-set-accessing-symbol)
|
||
(wisent-set-shift-table)
|
||
(wisent-set-reduction-table)
|
||
(wisent-set-maxrhs)
|
||
(wisent-initialize-LA)
|
||
(wisent-set-goto-map)
|
||
(wisent-initialize-F)
|
||
(wisent-build-relations)
|
||
(wisent-compute-FOLLOWS)
|
||
(wisent-compute-lookaheads))
|
||
|
||
;;;; -----------------------------------------------
|
||
;;;; Find and resolve or report look-ahead conflicts
|
||
;;;; -----------------------------------------------
|
||
|
||
(defsubst wisent-log-resolution (state LAno token resolution)
|
||
"Log a shift-reduce conflict resolution.
|
||
In specified STATE between rule pointed by lookahead number LANO and
|
||
TOKEN, resolved as RESOLUTION."
|
||
(if (or wisent-verbose-flag wisent-debug-flag)
|
||
(wisent-log
|
||
"Conflict in state %d between rule %d and token %s resolved as %s.\n"
|
||
state (aref LAruleno LAno) (wisent-tag token) resolution)))
|
||
|
||
(defun wisent-flush-shift (state token)
|
||
"Turn off the shift recorded in the specified STATE for TOKEN.
|
||
Used when we resolve a shift-reduce conflict in favor of the reduction."
|
||
(let (shiftp i k v)
|
||
(when (setq shiftp (aref shift-table state))
|
||
(setq k (shifts-nshifts shiftp)
|
||
v (shifts-shifts shiftp)
|
||
i 0)
|
||
(while (< i k)
|
||
(if (and (not (zerop (aref v i)))
|
||
(= token (aref accessing-symbol (aref v i))))
|
||
(aset v i 0))
|
||
(setq i (1+ i))))))
|
||
|
||
(defun wisent-resolve-sr-conflict (state lookaheadnum)
|
||
"Attempt to resolve shift-reduce conflict for one rule.
|
||
Resolve by means of precedence declarations. The conflict occurred in
|
||
specified STATE for the rule pointed by the lookahead symbol
|
||
LOOKAHEADNUM. It has already been checked that the rule has a
|
||
precedence. A conflict is resolved by modifying the shift or reduce
|
||
tables so that there is no longer a conflict."
|
||
(let (i redprec errp errs nerrs token sprec sassoc)
|
||
;; Find the rule to reduce by to get precedence of reduction
|
||
(setq token (aref tags (aref rprec (aref LAruleno lookaheadnum)))
|
||
redprec (wisent-prec token)
|
||
errp (make-errs)
|
||
errs (make-vector ntokens 0)
|
||
nerrs 0
|
||
i 0)
|
||
(setf (errs-errs errp) errs)
|
||
(while (< i ntokens)
|
||
(setq token (aref tags i))
|
||
(when (and (wisent-BITISSET (aref LA lookaheadnum) i)
|
||
(wisent-BITISSET lookaheadset i)
|
||
(setq sprec (wisent-prec token)))
|
||
;; Shift-reduce conflict occurs for token number I and it has
|
||
;; a precedence. The precedence of shifting is that of token
|
||
;; I.
|
||
(cond
|
||
((< sprec redprec)
|
||
(wisent-log-resolution state lookaheadnum i "reduce")
|
||
;; Flush the shift for this token
|
||
(wisent-RESETBIT lookaheadset i)
|
||
(wisent-flush-shift state i)
|
||
)
|
||
((> sprec redprec)
|
||
(wisent-log-resolution state lookaheadnum i "shift")
|
||
;; Flush the reduce for this token
|
||
(wisent-RESETBIT (aref LA lookaheadnum) i)
|
||
)
|
||
(t
|
||
;; Matching precedence levels.
|
||
;; For left association, keep only the reduction.
|
||
;; For right association, keep only the shift.
|
||
;; For nonassociation, keep neither.
|
||
(setq sassoc (wisent-assoc token))
|
||
(cond
|
||
((eq sassoc 'right)
|
||
(wisent-log-resolution state lookaheadnum i "shift"))
|
||
((eq sassoc 'left)
|
||
(wisent-log-resolution state lookaheadnum i "reduce"))
|
||
((eq sassoc 'nonassoc)
|
||
(wisent-log-resolution state lookaheadnum i "an error"))
|
||
)
|
||
(when (not (eq sassoc 'right))
|
||
;; Flush the shift for this token
|
||
(wisent-RESETBIT lookaheadset i)
|
||
(wisent-flush-shift state i))
|
||
(when (not (eq sassoc 'left))
|
||
;; Flush the reduce for this token
|
||
(wisent-RESETBIT (aref LA lookaheadnum) i))
|
||
(when (eq sassoc 'nonassoc)
|
||
;; Record an explicit error for this token
|
||
(aset errs nerrs i)
|
||
(setq nerrs (1+ nerrs)))
|
||
)))
|
||
(setq i (1+ i)))
|
||
(when (> nerrs 0)
|
||
(setf (errs-nerrs errp) nerrs)
|
||
(aset err-table state errp))
|
||
))
|
||
|
||
(defun wisent-set-conflicts (state)
|
||
"Find and attempt to resolve conflicts in specified STATE."
|
||
(let (i j k v shiftp symbol)
|
||
(unless (aref consistent state)
|
||
(fillarray lookaheadset 0)
|
||
|
||
(when (setq shiftp (aref shift-table state))
|
||
(setq k (shifts-nshifts shiftp)
|
||
v (shifts-shifts shiftp)
|
||
i 0)
|
||
(while (and (< i k)
|
||
(wisent-ISTOKEN
|
||
(setq symbol (aref accessing-symbol (aref v i)))))
|
||
(or (zerop (aref v i))
|
||
(wisent-SETBIT lookaheadset symbol))
|
||
(setq i (1+ i))))
|
||
|
||
;; Loop over all rules which require lookahead in this state
|
||
;; first check for shift-reduce conflict, and try to resolve
|
||
;; using precedence
|
||
(setq i (aref lookaheads state)
|
||
k (aref lookaheads (1+ state)))
|
||
(while (< i k)
|
||
(when (aref rprec (aref LAruleno i))
|
||
(setq v (aref LA i)
|
||
j 0)
|
||
(while (< j tokensetsize)
|
||
(if (zerop (logand (aref v j) (aref lookaheadset j)))
|
||
(setq j (1+ j))
|
||
;; if (LA (i)[j] & lookaheadset[j])
|
||
(wisent-resolve-sr-conflict state i)
|
||
(setq j tokensetsize)))) ;; break
|
||
(setq i (1+ i)))
|
||
|
||
;; Loop over all rules which require lookahead in this state
|
||
;; Check for conflicts not resolved above.
|
||
(setq i (aref lookaheads state))
|
||
(while (< i k)
|
||
(setq v (aref LA i)
|
||
j 0)
|
||
(while (< j tokensetsize)
|
||
;; if (LA (i)[j] & lookaheadset[j])
|
||
(if (not (zerop (logand (aref v j) (aref lookaheadset j))))
|
||
(aset conflicts state t))
|
||
(setq j (1+ j)))
|
||
(setq j 0)
|
||
(while (< j tokensetsize)
|
||
;; lookaheadset[j] |= LA (i)[j];
|
||
(aset lookaheadset j (logior (aref lookaheadset j)
|
||
(aref v j)))
|
||
(setq j (1+ j)))
|
||
(setq i (1+ i)))
|
||
)))
|
||
|
||
(defun wisent-resolve-conflicts ()
|
||
"Find and resolve conflicts."
|
||
(let (i)
|
||
(setq conflicts (make-vector nstates nil)
|
||
shiftset (make-vector tokensetsize 0)
|
||
lookaheadset (make-vector tokensetsize 0)
|
||
err-table (make-vector nstates nil)
|
||
i 0)
|
||
(while (< i nstates)
|
||
(wisent-set-conflicts i)
|
||
(setq i (1+ i)))))
|
||
|
||
(defun wisent-count-sr-conflicts (state)
|
||
"Count the number of shift/reduce conflicts in specified STATE."
|
||
(let (i j k shiftp symbol v)
|
||
(setq src-count 0
|
||
shiftp (aref shift-table state))
|
||
(when shiftp
|
||
(fillarray shiftset 0)
|
||
(fillarray lookaheadset 0)
|
||
(setq k (shifts-nshifts shiftp)
|
||
v (shifts-shifts shiftp)
|
||
i 0)
|
||
(while (< i k)
|
||
(when (not (zerop (aref v i)))
|
||
(setq symbol (aref accessing-symbol (aref v i)))
|
||
(if (wisent-ISVAR symbol)
|
||
(setq i k) ;; break
|
||
(wisent-SETBIT shiftset symbol)))
|
||
(setq i (1+ i)))
|
||
|
||
(setq k (aref lookaheads (1+ state))
|
||
i (aref lookaheads state))
|
||
(while (< i k)
|
||
(setq v (aref LA i)
|
||
j 0)
|
||
(while (< j tokensetsize)
|
||
;; lookaheadset[j] |= LA (i)[j]
|
||
(aset lookaheadset j (logior (aref lookaheadset j)
|
||
(aref v j)))
|
||
(setq j (1+ j)))
|
||
(setq i (1+ i)))
|
||
|
||
(setq k 0)
|
||
(while (< k tokensetsize)
|
||
;; lookaheadset[k] &= shiftset[k];
|
||
(aset lookaheadset k (logand (aref lookaheadset k)
|
||
(aref shiftset k)))
|
||
(setq k (1+ k)))
|
||
|
||
(setq i 0)
|
||
(while (< i ntokens)
|
||
(if (wisent-BITISSET lookaheadset i)
|
||
(setq src-count (1+ src-count)))
|
||
(setq i (1+ i))))
|
||
src-count))
|
||
|
||
(defun wisent-count-rr-conflicts (state)
|
||
"Count the number of reduce/reduce conflicts in specified STATE."
|
||
(let (i j count n m)
|
||
(setq rrc-count 0
|
||
m (aref lookaheads state)
|
||
n (aref lookaheads (1+ state)))
|
||
(when (>= (- n m) 2)
|
||
(setq i 0)
|
||
(while (< i ntokens)
|
||
(setq count 0
|
||
j m)
|
||
(while (< j n)
|
||
(if (wisent-BITISSET (aref LA j) i)
|
||
(setq count (1+ count)))
|
||
(setq j (1+ j)))
|
||
|
||
(if (>= count 2)
|
||
(setq rrc-count (1+ rrc-count)))
|
||
(setq i (1+ i))))
|
||
rrc-count))
|
||
|
||
(defcustom wisent-expected-conflicts nil
|
||
"If non-nil suppress the warning about shift/reduce conflicts.
|
||
It is a decimal integer N that says there should be no warning if
|
||
there are N shift/reduce conflicts and no reduce/reduce conflicts. A
|
||
warning is given if there are either more or fewer conflicts, or if
|
||
there are any reduce/reduce conflicts."
|
||
:group 'wisent
|
||
:type '(choice (const nil) integer))
|
||
(make-obsolete-variable 'wisent-expected-conflicts
|
||
"use %expectedconflicts in the .wy file instead"
|
||
"27.1")
|
||
|
||
(defun wisent-total-conflicts ()
|
||
"Report the total number of conflicts."
|
||
(let* ((src (wisent-source))
|
||
(symbol
|
||
;; Source files may specify how many expected conflicts
|
||
;; there are. If the number is the expected number, don't
|
||
;; output warnings.
|
||
(and src
|
||
(intern (format "wisent-%s--expected-conflicts"
|
||
(replace-regexp-in-string "\\.el\\'" "" src))))))
|
||
(when (or (not (zerop rrc-total))
|
||
(and (not (zerop src-total))
|
||
(not (= src-total (or wisent-expected-conflicts 0)))
|
||
(or (null symbol)
|
||
(not (boundp symbol))
|
||
(not (equal (symbol-value symbol) src-total)))))
|
||
(let* ((src (if src (concat " in " src) ""))
|
||
(msg (format "Grammar%s contains" src)))
|
||
(when (and (> src-total 0))
|
||
(setq msg (format "%s %d shift/reduce conflict%s"
|
||
msg src-total (if (> src-total 1)
|
||
"s" ""))))
|
||
(if (and (> src-total 0) (> rrc-total 0))
|
||
(setq msg (format "%s and" msg)))
|
||
(if (> rrc-total 0)
|
||
(setq msg (format "%s %d reduce/reduce conflict%s"
|
||
msg rrc-total (if (> rrc-total 1)
|
||
"s" ""))))
|
||
(message msg)))))
|
||
|
||
(defun wisent-print-conflicts ()
|
||
"Report conflicts."
|
||
(let (i)
|
||
(setq src-total 0
|
||
rrc-total 0
|
||
i 0)
|
||
(while (< i nstates)
|
||
(when (aref conflicts i)
|
||
(wisent-count-sr-conflicts i)
|
||
(wisent-count-rr-conflicts i)
|
||
(setq src-total (+ src-total src-count)
|
||
rrc-total (+ rrc-total rrc-count))
|
||
(when (or wisent-verbose-flag wisent-debug-flag)
|
||
(wisent-log "State %d contains" i)
|
||
(if (> src-count 0)
|
||
(wisent-log " %d shift/reduce conflict%s"
|
||
src-count (if (> src-count 1) "s" "")))
|
||
|
||
(if (and (> src-count 0) (> rrc-count 0))
|
||
(wisent-log " and"))
|
||
|
||
(if (> rrc-count 0)
|
||
(wisent-log " %d reduce/reduce conflict%s"
|
||
rrc-count (if (> rrc-count 1) "s" "")))
|
||
|
||
(wisent-log ".\n")))
|
||
(setq i (1+ i)))
|
||
(wisent-total-conflicts)))
|
||
|
||
;;;; --------------------------------------
|
||
;;;; Report information on generated parser
|
||
;;;; --------------------------------------
|
||
(defun wisent-print-grammar ()
|
||
"Print grammar."
|
||
(let (i j r break left-count right-count)
|
||
|
||
(wisent-log "\n\nGrammar\n\n Number, Rule\n")
|
||
(setq i 1)
|
||
(while (<= i nrules)
|
||
;; Don't print rules disabled in `wisent-reduce-grammar-tables'.
|
||
(when (aref ruseful i)
|
||
(wisent-log " %s %s ->"
|
||
(string-pad (number-to-string i) 6)
|
||
(wisent-tag (aref rlhs i)))
|
||
(setq r (aref rrhs i))
|
||
(if (> (aref ritem r) 0)
|
||
(while (> (aref ritem r) 0)
|
||
(wisent-log " %s" (wisent-tag (aref ritem r)))
|
||
(setq r (1+ r)))
|
||
(wisent-log " /* empty */"))
|
||
(wisent-log "\n"))
|
||
(setq i (1+ i)))
|
||
|
||
(wisent-log "\n\nTerminals, with rules where they appear\n\n")
|
||
(wisent-log "%s (-1)\n" (wisent-tag 0))
|
||
(setq i 1)
|
||
(while (< i ntokens)
|
||
(wisent-log "%s (%d)" (wisent-tag i) i)
|
||
(setq j 1)
|
||
(while (<= j nrules)
|
||
(setq r (aref rrhs j)
|
||
break nil)
|
||
(while (and (not break) (> (aref ritem r) 0))
|
||
(if (setq break (= (aref ritem r) i))
|
||
(wisent-log " %d" j)
|
||
(setq r (1+ r))))
|
||
(setq j (1+ j)))
|
||
(wisent-log "\n")
|
||
(setq i (1+ i)))
|
||
|
||
(wisent-log "\n\nNonterminals, with rules where they appear\n\n")
|
||
(setq i ntokens)
|
||
(while (< i nsyms)
|
||
(setq left-count 0
|
||
right-count 0
|
||
j 1)
|
||
(while (<= j nrules)
|
||
(if (= (aref rlhs j) i)
|
||
(setq left-count (1+ left-count)))
|
||
(setq r (aref rrhs j)
|
||
break nil)
|
||
(while (and (not break) (> (aref ritem r) 0))
|
||
(if (= (aref ritem r) i)
|
||
(setq right-count (1+ right-count)
|
||
break t)
|
||
(setq r (1+ r))))
|
||
(setq j (1+ j)))
|
||
(wisent-log "%s (%d)\n " (wisent-tag i) i)
|
||
(when (> left-count 0)
|
||
(wisent-log " on left:")
|
||
(setq j 1)
|
||
(while (<= j nrules)
|
||
(if (= (aref rlhs j) i)
|
||
(wisent-log " %d" j))
|
||
(setq j (1+ j))))
|
||
(when (> right-count 0)
|
||
(if (> left-count 0)
|
||
(wisent-log ","))
|
||
(wisent-log " on right:")
|
||
(setq j 1)
|
||
(while (<= j nrules)
|
||
(setq r (aref rrhs j)
|
||
break nil)
|
||
(while (and (not break) (> (aref ritem r) 0))
|
||
(if (setq break (= (aref ritem r) i))
|
||
(wisent-log " %d" j)
|
||
(setq r (1+ r))))
|
||
(setq j (1+ j))))
|
||
(wisent-log "\n")
|
||
(setq i (1+ i)))
|
||
))
|
||
|
||
(defun wisent-print-reductions (state)
|
||
"Print reductions on STATE."
|
||
(let (i j k v symbol m n defaulted
|
||
default-LA default-rule cmax count shiftp errp nodefault)
|
||
(setq nodefault nil
|
||
i 0)
|
||
(fillarray shiftset 0)
|
||
|
||
(setq shiftp (aref shift-table state))
|
||
(when shiftp
|
||
(setq k (shifts-nshifts shiftp)
|
||
v (shifts-shifts shiftp)
|
||
i 0)
|
||
(while (< i k)
|
||
(when (not (zerop (aref v i)))
|
||
(setq symbol (aref accessing-symbol (aref v i)))
|
||
(if (wisent-ISVAR symbol)
|
||
(setq i k) ;; break
|
||
;; If this state has a shift for the error token, don't
|
||
;; use a default rule.
|
||
(if (= symbol error-token-number)
|
||
(setq nodefault t))
|
||
(wisent-SETBIT shiftset symbol)))
|
||
(setq i (1+ i))))
|
||
|
||
(setq errp (aref err-table state))
|
||
(when errp
|
||
(setq k (errs-nerrs errp)
|
||
v (errs-errs errp)
|
||
i 0)
|
||
(while (< i k)
|
||
(if (not (zerop (setq symbol (aref v i))))
|
||
(wisent-SETBIT shiftset symbol))
|
||
(setq i (1+ i))))
|
||
|
||
(setq m (aref lookaheads state)
|
||
n (aref lookaheads (1+ state)))
|
||
|
||
(cond
|
||
((and (= (- n m) 1) (not nodefault))
|
||
(setq default-rule (aref LAruleno m)
|
||
v (aref LA m)
|
||
k 0)
|
||
(while (< k tokensetsize)
|
||
(aset lookaheadset k (logand (aref v k)
|
||
(aref shiftset k)))
|
||
(setq k (1+ k)))
|
||
|
||
(setq i 0)
|
||
(while (< i ntokens)
|
||
(if (wisent-BITISSET lookaheadset i)
|
||
(wisent-log " %s\t[reduce using rule %d (%s)]\n"
|
||
(wisent-tag i) default-rule
|
||
(wisent-tag (aref rlhs default-rule))))
|
||
(setq i (1+ i)))
|
||
(wisent-log " $default\treduce using rule %d (%s)\n\n"
|
||
default-rule
|
||
(wisent-tag (aref rlhs default-rule)))
|
||
)
|
||
((>= (- n m) 1)
|
||
(setq cmax 0
|
||
default-LA -1
|
||
default-rule 0)
|
||
(when (not nodefault)
|
||
(setq i m)
|
||
(while (< i n)
|
||
(setq v (aref LA i)
|
||
count 0
|
||
k 0)
|
||
(while (< k tokensetsize)
|
||
;; lookaheadset[k] = LA (i)[k] & ~shiftset[k]
|
||
(aset lookaheadset k
|
||
(logand (aref v k)
|
||
(lognot (aref shiftset k))))
|
||
(setq k (1+ k)))
|
||
(setq j 0)
|
||
(while (< j ntokens)
|
||
(if (wisent-BITISSET lookaheadset j)
|
||
(setq count (1+ count)))
|
||
(setq j (1+ j)))
|
||
(if (> count cmax)
|
||
(setq cmax count
|
||
default-LA i
|
||
default-rule (aref LAruleno i)))
|
||
(setq k 0)
|
||
(while (< k tokensetsize)
|
||
(aset shiftset k (logior (aref shiftset k)
|
||
(aref lookaheadset k)))
|
||
(setq k (1+ k)))
|
||
(setq i (1+ i))))
|
||
|
||
(fillarray shiftset 0)
|
||
|
||
(when shiftp
|
||
(setq k (shifts-nshifts shiftp)
|
||
v (shifts-shifts shiftp)
|
||
i 0)
|
||
(while (< i k)
|
||
(when (not (zerop (aref v i)))
|
||
(setq symbol (aref accessing-symbol (aref v i)))
|
||
(if (wisent-ISVAR symbol)
|
||
(setq i k) ;; break
|
||
(wisent-SETBIT shiftset symbol)))
|
||
(setq i (1+ i))))
|
||
|
||
(setq i 0)
|
||
(while (< i ntokens)
|
||
(setq defaulted nil
|
||
count (if (wisent-BITISSET shiftset i) 1 0)
|
||
j m)
|
||
(while (< j n)
|
||
(when (wisent-BITISSET (aref LA j) i)
|
||
(if (zerop count)
|
||
(progn
|
||
(if (not (= j default-LA))
|
||
(wisent-log
|
||
" %s\treduce using rule %d (%s)\n"
|
||
(wisent-tag i) (aref LAruleno j)
|
||
(wisent-tag (aref rlhs (aref LAruleno j))))
|
||
(setq defaulted t))
|
||
(setq count (1+ count)))
|
||
(if defaulted
|
||
(wisent-log
|
||
" %s\treduce using rule %d (%s)\n"
|
||
(wisent-tag i) (aref LAruleno default-LA)
|
||
(wisent-tag (aref rlhs (aref LAruleno default-LA)))))
|
||
(setq defaulted nil)
|
||
(wisent-log
|
||
" %s\t[reduce using rule %d (%s)]\n"
|
||
(wisent-tag i) (aref LAruleno j)
|
||
(wisent-tag (aref rlhs (aref LAruleno j))))))
|
||
(setq j (1+ j)))
|
||
(setq i (1+ i)))
|
||
|
||
(if (>= default-LA 0)
|
||
(wisent-log
|
||
" $default\treduce using rule %d (%s)\n"
|
||
default-rule
|
||
(wisent-tag (aref rlhs default-rule))))
|
||
))))
|
||
|
||
(defun wisent-print-actions (state)
|
||
"Print actions on STATE."
|
||
(let (i j k v state1 symbol shiftp errp redp rule nerrs break)
|
||
(setq shiftp (aref shift-table state)
|
||
redp (aref reduction-table state)
|
||
errp (aref err-table state))
|
||
(if (and (not shiftp) (not redp))
|
||
(if (= final-state state)
|
||
(wisent-log " $default\taccept\n")
|
||
(wisent-log " NO ACTIONS\n"))
|
||
(if (not shiftp)
|
||
(setq i 0
|
||
k 0)
|
||
(setq k (shifts-nshifts shiftp)
|
||
v (shifts-shifts shiftp)
|
||
i 0
|
||
break nil)
|
||
(while (and (not break) (< i k))
|
||
(if (zerop (setq state1 (aref v i)))
|
||
(setq i (1+ i))
|
||
(setq symbol (aref accessing-symbol state1))
|
||
;; The following line used to be turned off.
|
||
(if (wisent-ISVAR symbol)
|
||
(setq break t) ;; break
|
||
(wisent-log " %s\tshift, and go to state %d\n"
|
||
(wisent-tag symbol) state1)
|
||
(setq i (1+ i)))))
|
||
(if (> i 0)
|
||
(wisent-log "\n")))
|
||
|
||
(when errp
|
||
(setq nerrs (errs-nerrs errp)
|
||
v (errs-errs errp)
|
||
j 0)
|
||
(while (< j nerrs)
|
||
(if (aref v j)
|
||
(wisent-log " %s\terror (nonassociative)\n"
|
||
(wisent-tag (aref v j))))
|
||
(setq j (1+ j)))
|
||
(if (> j 0)
|
||
(wisent-log "\n")))
|
||
|
||
(cond
|
||
((and (aref consistent state) redp)
|
||
(setq rule (aref (reductions-rules redp) 0)
|
||
symbol (aref rlhs rule))
|
||
(wisent-log " $default\treduce using rule %d (%s)\n\n"
|
||
rule (wisent-tag symbol))
|
||
)
|
||
(redp
|
||
(wisent-print-reductions state)
|
||
))
|
||
|
||
(when (< i k)
|
||
(setq v (shifts-shifts shiftp))
|
||
(while (< i k)
|
||
(when (setq state1 (aref v i))
|
||
(setq symbol (aref accessing-symbol state1))
|
||
(wisent-log " %s\tgo to state %d\n"
|
||
(wisent-tag symbol) state1))
|
||
(setq i (1+ i)))
|
||
(wisent-log "\n"))
|
||
)))
|
||
|
||
(defun wisent-print-core (state)
|
||
"Print STATE core."
|
||
(let (i k rule statep sp sp1)
|
||
(setq statep (aref state-table state)
|
||
k (core-nitems statep))
|
||
(when (> k 0)
|
||
(setq i 0)
|
||
(while (< i k)
|
||
;; sp1 = sp = ritem + statep->items[i];
|
||
(setq sp1 (aref (core-items statep) i)
|
||
sp sp1)
|
||
(while (> (aref ritem sp) 0)
|
||
(setq sp (1+ sp)))
|
||
|
||
(setq rule (- (aref ritem sp)))
|
||
(wisent-log " %s -> " (wisent-tag (aref rlhs rule)))
|
||
|
||
(setq sp (aref rrhs rule))
|
||
(while (< sp sp1)
|
||
(wisent-log "%s " (wisent-tag (aref ritem sp)))
|
||
(setq sp (1+ sp)))
|
||
(wisent-log ".")
|
||
(while (> (aref ritem sp) 0)
|
||
(wisent-log " %s" (wisent-tag (aref ritem sp)))
|
||
(setq sp (1+ sp)))
|
||
(wisent-log " (rule %d)\n" rule)
|
||
(setq i (1+ i)))
|
||
(wisent-log "\n"))))
|
||
|
||
(defun wisent-print-state (state)
|
||
"Print information on STATE."
|
||
(wisent-log "\n\nstate %d\n\n" state)
|
||
(wisent-print-core state)
|
||
(wisent-print-actions state))
|
||
|
||
(defun wisent-print-states ()
|
||
"Print information on states."
|
||
(let ((i 0))
|
||
(while (< i nstates)
|
||
(wisent-print-state i)
|
||
(setq i (1+ i)))))
|
||
|
||
(defun wisent-print-results ()
|
||
"Print information on generated parser.
|
||
Report detailed information if `wisent-verbose-flag' or
|
||
`wisent-debug-flag' are non-nil."
|
||
(when (or wisent-verbose-flag wisent-debug-flag)
|
||
(wisent-print-useless))
|
||
(wisent-print-conflicts)
|
||
(when (or wisent-verbose-flag wisent-debug-flag)
|
||
(wisent-print-grammar)
|
||
(wisent-print-states))
|
||
;; Append output to log file when running in batch mode
|
||
(when noninteractive
|
||
(wisent-append-to-log-file)
|
||
(wisent-clear-log)))
|
||
|
||
;;;; ---------------------------------
|
||
;;;; Build the generated parser tables
|
||
;;;; ---------------------------------
|
||
|
||
(defun wisent-action-row (state actrow)
|
||
"Figure out the actions for the specified STATE.
|
||
Decide what to do for each type of token if seen as the lookahead
|
||
token in specified state. The value returned is used as the default
|
||
action for the state. In addition, ACTROW is filled with what to do
|
||
for each kind of token, index by symbol number, with nil meaning do
|
||
the default action. The value `error', means this situation is an
|
||
error. The parser recognizes this value specially.
|
||
|
||
This is where conflicts are resolved. The loop over lookahead rules
|
||
considered lower-numbered rules last, and the last rule considered
|
||
that likes a token gets to handle it."
|
||
(let (i j k m n v default-rule nreds rule max count
|
||
shift-state symbol redp shiftp errp nodefault)
|
||
|
||
(fillarray actrow nil)
|
||
|
||
(setq default-rule 0
|
||
nodefault nil ;; nil inhibit having any default reduction
|
||
nreds 0
|
||
m 0
|
||
n 0
|
||
redp (aref reduction-table state))
|
||
|
||
(when redp
|
||
(setq nreds (reductions-nreds redp))
|
||
(when (>= nreds 1)
|
||
;; loop over all the rules available here which require
|
||
;; lookahead
|
||
(setq m (aref lookaheads state)
|
||
n (aref lookaheads (1+ state))
|
||
i (1- n))
|
||
(while (>= i m)
|
||
;; and find each token which the rule finds acceptable to
|
||
;; come next
|
||
(setq j 0)
|
||
(while (< j ntokens)
|
||
;; and record this rule as the rule to use if that token
|
||
;; follows.
|
||
(if (wisent-BITISSET (aref LA i) j)
|
||
(aset actrow j (- (aref LAruleno i)))
|
||
)
|
||
(setq j (1+ j)))
|
||
(setq i (1- i)))))
|
||
|
||
;; Now see which tokens are allowed for shifts in this state. For
|
||
;; them, record the shift as the thing to do. So shift is
|
||
;; preferred to reduce.
|
||
(setq shiftp (aref shift-table state))
|
||
(when shiftp
|
||
(setq k (shifts-nshifts shiftp)
|
||
v (shifts-shifts shiftp)
|
||
i 0)
|
||
(while (< i k)
|
||
(setq shift-state (aref v i))
|
||
(if (zerop shift-state)
|
||
nil ;; continue
|
||
(setq symbol (aref accessing-symbol shift-state))
|
||
(if (wisent-ISVAR symbol)
|
||
(setq i k) ;; break
|
||
(aset actrow symbol shift-state)
|
||
;; Do not use any default reduction if there is a shift
|
||
;; for error
|
||
(if (= symbol error-token-number)
|
||
(setq nodefault t))))
|
||
(setq i (1+ i))))
|
||
|
||
;; See which tokens are an explicit error in this state (due to
|
||
;; %nonassoc). For them, record error as the action.
|
||
(setq errp (aref err-table state))
|
||
(when errp
|
||
(setq k (errs-nerrs errp)
|
||
v (errs-errs errp)
|
||
i 0)
|
||
(while (< i k)
|
||
(aset actrow (aref v i) wisent-error-tag)
|
||
(setq i (1+ i))))
|
||
|
||
;; Now find the most common reduction and make it the default
|
||
;; action for this state.
|
||
(when (and (>= nreds 1) (not nodefault))
|
||
(if (aref consistent state)
|
||
(setq default-rule (- (aref (reductions-rules redp) 0)))
|
||
(setq max 0
|
||
i m)
|
||
(while (< i n)
|
||
(setq count 0
|
||
rule (- (aref LAruleno i))
|
||
j 0)
|
||
(while (< j ntokens)
|
||
(if (and (numberp (aref actrow j))
|
||
(= (aref actrow j) rule))
|
||
(setq count (1+ count)))
|
||
(setq j (1+ j)))
|
||
(if (> count max)
|
||
(setq max count
|
||
default-rule rule))
|
||
(setq i (1+ i)))
|
||
;; actions which match the default are replaced with zero,
|
||
;; which means "use the default"
|
||
(when (> max 0)
|
||
(setq j 0)
|
||
(while (< j ntokens)
|
||
(if (and (numberp (aref actrow j))
|
||
(= (aref actrow j) default-rule))
|
||
(aset actrow j nil))
|
||
(setq j (1+ j)))
|
||
)))
|
||
|
||
;; If have no default rule, if this is the final state the default
|
||
;; is accept else it is an error. So replace any action which
|
||
;; says "error" with "use default".
|
||
(when (zerop default-rule)
|
||
(if (= final-state state)
|
||
(setq default-rule wisent-accept-tag)
|
||
(setq j 0)
|
||
(while (< j ntokens)
|
||
(if (eq (aref actrow j) wisent-error-tag)
|
||
(aset actrow j nil))
|
||
(setq j (1+ j)))
|
||
(setq default-rule wisent-error-tag)))
|
||
default-rule))
|
||
|
||
(defconst wisent-default-tag 'default
|
||
"Tag used in an action table to indicate a default action.")
|
||
|
||
;; These variables only exist locally in the function
|
||
;; `wisent-state-actions' and are shared by all other nested callees.
|
||
(wisent-defcontext semantic-actions
|
||
;; Uninterned symbols used in code generation.
|
||
stack sp gotos state
|
||
;; Name of the current semantic action
|
||
NAME)
|
||
|
||
(defun wisent-state-actions ()
|
||
"Figure out the actions for every state.
|
||
Return the action table."
|
||
;; Store the semantic action obarray in (unused) RCODE[0].
|
||
(aset rcode 0 (obarray-make 13))
|
||
(let (i j action-table actrow action)
|
||
(setq action-table (make-vector nstates nil)
|
||
actrow (make-vector ntokens nil)
|
||
i 0)
|
||
(wisent-with-context semantic-actions
|
||
(setq stack (make-symbol "stack")
|
||
sp (make-symbol "sp")
|
||
gotos (make-symbol "gotos")
|
||
state (make-symbol "state"))
|
||
(while (< i nstates)
|
||
(setq action (wisent-action-row i actrow))
|
||
;; Translate a reduction into semantic action
|
||
(and (integerp action) (< action 0)
|
||
(setq action (wisent-semantic-action (- action))))
|
||
(aset action-table i (list (cons wisent-default-tag action)))
|
||
(setq j 0)
|
||
(while (< j ntokens)
|
||
(when (setq action (aref actrow j))
|
||
;; Translate a reduction into semantic action
|
||
(and (integerp action) (< action 0)
|
||
(setq action (wisent-semantic-action (- action))))
|
||
(aset action-table i (cons (cons (aref tags j) action)
|
||
(aref action-table i)))
|
||
)
|
||
(setq j (1+ j)))
|
||
(aset action-table i (nreverse (aref action-table i)))
|
||
(setq i (1+ i)))
|
||
action-table)))
|
||
|
||
(defun wisent-goto-actions ()
|
||
"Figure out what to do after reducing with each rule.
|
||
Depending on the saved state from before the beginning of parsing the
|
||
data that matched this rule. Return the goto table."
|
||
(let (i j m n symbol state goto-table)
|
||
(setq goto-table (make-vector nstates nil)
|
||
i ntokens)
|
||
(while (< i nsyms)
|
||
(setq symbol (- i ntokens)
|
||
m (aref goto-map symbol)
|
||
n (aref goto-map (1+ symbol))
|
||
j m)
|
||
(while (< j n)
|
||
(setq state (aref from-state j))
|
||
(aset goto-table state
|
||
(cons (cons (aref tags i) (aref to-state j))
|
||
(aref goto-table state)))
|
||
(setq j (1+ j)))
|
||
(setq i (1+ i)))
|
||
goto-table))
|
||
|
||
(defsubst wisent-quote-p (sym)
|
||
"Return non-nil if SYM is bound to the `quote' function."
|
||
(condition-case nil
|
||
(eq (indirect-function sym)
|
||
(indirect-function 'quote))
|
||
(error nil)))
|
||
|
||
(defsubst wisent-backquote-p (sym)
|
||
"Return non-nil if SYM is bound to the `backquote' function."
|
||
(condition-case nil
|
||
(eq (indirect-function sym)
|
||
(indirect-function 'backquote))
|
||
(error nil)))
|
||
|
||
(defun wisent-check-$N (x m)
|
||
"Return non-nil if X is a valid $N or $regionN symbol.
|
||
That is if X is a $N or $regionN symbol with N >= 1 and N <= M.
|
||
Also warn if X is a $N or $regionN symbol with N < 1 or N > M."
|
||
(when (symbolp x)
|
||
(let* ((n (symbol-name x))
|
||
(i (and (string-match "\\`\\$\\(region\\)?\\([0-9]+\\)\\'" n)
|
||
(string-to-number (match-string 2 n)))))
|
||
(when i
|
||
(if (and (>= i 1) (<= i m))
|
||
t
|
||
(message
|
||
"*** In %s, %s might be a free variable (rule has %s)"
|
||
NAME x (format (cond ((< m 1) "no component")
|
||
((= m 1) "%d component")
|
||
("%d components"))
|
||
m))
|
||
nil)))))
|
||
|
||
(defun wisent-semantic-action-expand-body (body n &optional found)
|
||
"Parse BODY of semantic action.
|
||
N is the maximum number of $N variables that can be referenced in
|
||
BODY. Warn on references out of permitted range.
|
||
Optional argument FOUND is the accumulated list of $N references
|
||
encountered so far.
|
||
Return a cons (FOUND . XBODY), where FOUND is the list of $N
|
||
references found in BODY, and XBODY is BODY expression with
|
||
`backquote' forms expanded."
|
||
(if (not (listp body))
|
||
;; BODY is an atom, no expansion needed
|
||
(progn
|
||
(if (wisent-check-$N body n)
|
||
;; Accumulate $i symbol
|
||
(cl-pushnew body found :test #'equal))
|
||
(cons found body))
|
||
;; BODY is a list, expand inside it
|
||
(let (xbody sexpr)
|
||
;; If backquote expand it first
|
||
(if (wisent-backquote-p (car body))
|
||
(setq body (macroexpand body)))
|
||
(while body
|
||
(setq sexpr (car body)
|
||
body (cdr body))
|
||
(cond
|
||
;; Function call excepted quote expression
|
||
((and (consp sexpr)
|
||
(not (wisent-quote-p (car sexpr))))
|
||
(setq sexpr (wisent-semantic-action-expand-body sexpr n found)
|
||
found (car sexpr)
|
||
sexpr (cdr sexpr)))
|
||
;; $i symbol
|
||
((wisent-check-$N sexpr n)
|
||
;; Accumulate $i symbol
|
||
(cl-pushnew sexpr found :test #'equal))
|
||
)
|
||
;; Accumulate expanded forms
|
||
(setq xbody (nconc xbody (list sexpr))))
|
||
(cons found xbody))))
|
||
|
||
(defun wisent-semantic-action (r)
|
||
"Set up the Elisp function for semantic action at rule R.
|
||
On entry RCODE[R] contains a vector [BODY N (NTERM I)] where BODY is the
|
||
body of the semantic action, N is the maximum number of values
|
||
available in the parser's stack, NTERM is the nonterminal the semantic
|
||
action belongs to, and I is the index of the semantic action inside
|
||
NTERM definition. Return the semantic action symbol.
|
||
The semantic action function accepts three arguments:
|
||
|
||
- the state/value stack
|
||
- the top-of-stack index
|
||
- the goto table
|
||
|
||
And returns the updated top-of-stack index."
|
||
(if (not (aref ruseful r))
|
||
(aset rcode r nil)
|
||
(let* ((actn (aref rcode r))
|
||
(n (aref actn 1)) ; nb of val avail. in stack
|
||
(NAME (apply #'format "%s:%d" (aref actn 2)))
|
||
(form (wisent-semantic-action-expand-body (aref actn 0) n))
|
||
($l (car form)) ; list of $vars used in body
|
||
(form (cdr form)) ; expanded form of body
|
||
(nt (aref rlhs r)) ; nonterminal item no.
|
||
(bl nil) ; `let*' binding list
|
||
$v i j)
|
||
|
||
;; Compute $N and $regionN bindings
|
||
(setq i n)
|
||
(while (> i 0)
|
||
(setq j (1+ (* 2 (- n i))))
|
||
;; Only bind $regionI if used in action
|
||
(setq $v (intern (format "$region%d" i)))
|
||
(if (memq $v $l)
|
||
(setq bl (cons `(,$v (cdr (aref ,stack (- ,sp ,j)))) bl)))
|
||
;; Only bind $I if used in action
|
||
(setq $v (intern (format "$%d" i)))
|
||
(if (memq $v $l)
|
||
(setq bl (cons `(,$v (car (aref ,stack (- ,sp ,j)))) bl)))
|
||
(setq i (1- i)))
|
||
|
||
;; Compute J, the length of rule's RHS. It will give the
|
||
;; current parser state at STACK[SP - 2*J], and where to push
|
||
;; the new semantic value and the next state, respectively at:
|
||
;; STACK[SP - 2*J + 1] and STACK[SP - 2*J + 2]. Generally N,
|
||
;; the maximum number of values available in the stack, is equal
|
||
;; to J. But, for mid-rule actions, N is the number of rule
|
||
;; elements before the action and J is always 0 (empty rule).
|
||
(setq i (aref rrhs r)
|
||
j 0)
|
||
(while (> (aref ritem i) 0)
|
||
(setq j (1+ j)
|
||
i (1+ i)))
|
||
|
||
;; Create the semantic action symbol.
|
||
(setq actn (intern NAME (aref rcode 0)))
|
||
|
||
;; Store source code in function cell of the semantic action
|
||
;; symbol. It will be byte-compiled at automaton's compilation
|
||
;; time. Using a byte-compiled automaton can significantly
|
||
;; speed up parsing!
|
||
(fset actn
|
||
`(lambda (,stack ,sp ,gotos)
|
||
(let* (,@bl
|
||
($region
|
||
,(cond
|
||
((= n 1)
|
||
(if (assq '$region1 bl)
|
||
'$region1
|
||
`(cdr (aref ,stack (1- ,sp)))))
|
||
((> n 1)
|
||
`(wisent-production-bounds
|
||
,stack (- ,sp ,(1- (* 2 n))) (1- ,sp)))))
|
||
($action ,NAME)
|
||
($nterm ',(aref tags nt))
|
||
,@(and (> j 0) `((,sp (- ,sp ,(* j 2)))))
|
||
(,state (cdr (assq $nterm
|
||
(aref ,gotos
|
||
(aref ,stack ,sp))))))
|
||
(setq ,sp (+ ,sp 2))
|
||
;; push semantic value
|
||
(aset ,stack (1- ,sp) (cons ,form $region))
|
||
;; push next state
|
||
(aset ,stack ,sp ,state)
|
||
;; return new top of stack
|
||
,sp)))
|
||
|
||
;; Return the semantic action symbol
|
||
actn)))
|
||
|
||
;;;; ----------------------------
|
||
;;;; Build parser LALR automaton.
|
||
;;;; ----------------------------
|
||
|
||
(defun wisent-parser-automaton ()
|
||
"Compute and return LALR(1) automaton from GRAMMAR.
|
||
GRAMMAR is in internal format. GRAM/ACTS are grammar rules
|
||
in internal format. STARTS defines the start symbols."
|
||
;; Check for useless stuff
|
||
(wisent-reduce-grammar)
|
||
|
||
(wisent-set-derives)
|
||
(wisent-set-nullable)
|
||
;; convert to nondeterministic finite state machine.
|
||
(wisent-generate-states)
|
||
;; make it deterministic.
|
||
(wisent-lalr)
|
||
;; Find and record any conflicts: places where one token of
|
||
;; lookahead is not enough to disambiguate the parsing. Also
|
||
;; resolve s/r conflicts based on precedence declarations.
|
||
(wisent-resolve-conflicts)
|
||
(wisent-print-results)
|
||
|
||
(vector (wisent-state-actions) ; action table
|
||
(wisent-goto-actions) ; goto table
|
||
start-table ; start symbols
|
||
(aref rcode 0) ; sem. action symbol obarray
|
||
)
|
||
)
|
||
|
||
;;;; -------------------
|
||
;;;; Parse input grammar
|
||
;;;; -------------------
|
||
|
||
(defconst wisent-reserved-symbols (list wisent-error-term)
|
||
"The list of reserved symbols.
|
||
Also all symbols starting with a character defined in
|
||
`wisent-reserved-capitals' are reserved for internal use.")
|
||
|
||
(defconst wisent-reserved-capitals '(?\$ ?\@)
|
||
"The list of reserved capital letters.
|
||
All symbol starting with one of these letters are reserved for
|
||
internal use.")
|
||
|
||
(defconst wisent-starts-nonterm '$STARTS
|
||
"Main start symbol.
|
||
It gives the rules for start symbols.")
|
||
|
||
(defvar wisent-single-start-flag nil
|
||
"Non-nil means allows only one start symbol like in Bison.
|
||
That is don't add extra start rules to the grammar. This is
|
||
useful to compare the Wisent's generated automaton with the Bison's
|
||
one.")
|
||
|
||
(defsubst wisent-ISVALID-VAR (x)
|
||
"Return non-nil if X is a character or an allowed symbol."
|
||
(and x (symbolp x)
|
||
(not (memq (aref (symbol-name x) 0) wisent-reserved-capitals))
|
||
(not (memq x wisent-reserved-symbols))))
|
||
|
||
(defsubst wisent-ISVALID-TOKEN (x)
|
||
"Return non-nil if X is a character or an allowed symbol."
|
||
(or (characterp x)
|
||
(wisent-ISVALID-VAR x)))
|
||
|
||
(defun wisent-push-token (symbol &optional nocheck)
|
||
"Push a new SYMBOL in the list of tokens.
|
||
Bypass checking if NOCHECK is non-nil."
|
||
;; Check
|
||
(or nocheck (wisent-ISVALID-TOKEN symbol)
|
||
(error "Invalid terminal symbol: %S" symbol))
|
||
(if (memq symbol token-list)
|
||
(message "*** duplicate terminal `%s' ignored" symbol)
|
||
;; Set up properties
|
||
(wisent-set-prec symbol nil)
|
||
(wisent-set-assoc symbol nil)
|
||
(wisent-set-item-number symbol ntokens)
|
||
;; Add
|
||
(setq ntokens (1+ ntokens)
|
||
token-list (cons symbol token-list))))
|
||
|
||
(defun wisent-push-var (symbol &optional nocheck)
|
||
"Push a new SYMBOL in the list of nonterminals.
|
||
Bypass checking if NOCHECK is non-nil."
|
||
;; Check
|
||
(unless nocheck
|
||
(or (wisent-ISVALID-VAR symbol)
|
||
(error "Invalid nonterminal symbol: %S" symbol))
|
||
(if (memq symbol var-list)
|
||
(error "Nonterminal `%s' already defined" symbol)))
|
||
;; Set up properties
|
||
(wisent-set-item-number symbol nvars)
|
||
;; Add
|
||
(setq nvars (1+ nvars)
|
||
var-list (cons symbol var-list)))
|
||
|
||
(defun wisent-parse-nonterminals (defs)
|
||
"Parse nonterminal definitions in DEFS.
|
||
Fill in each element of the global arrays RPREC, RCODE, RUSEFUL with
|
||
respectively rule precedence level, semantic action code and
|
||
usefulness flag. Return a list of rules of the form (LHS . RHS) where
|
||
LHS and RHS are respectively the Left Hand Side and Right Hand Side of
|
||
the rule."
|
||
(setq rprec nil
|
||
rcode nil
|
||
nitems 0
|
||
nrules 0)
|
||
(let (def nonterm rlist rule rules rhs rest item items
|
||
rhl plevel semact @n @count iactn)
|
||
(setq @count 0)
|
||
(while defs
|
||
(setq def (car defs)
|
||
defs (cdr defs)
|
||
nonterm (car def)
|
||
rlist (cdr def)
|
||
iactn 0)
|
||
(or (consp rlist)
|
||
(error "Invalid nonterminal definition syntax: %S" def))
|
||
(while rlist
|
||
(setq rule (car rlist)
|
||
rlist (cdr rlist)
|
||
items (car rule)
|
||
rest (cdr rule)
|
||
rhl 0
|
||
rhs nil)
|
||
|
||
;; Check & count items
|
||
(setq nitems (1+ nitems)) ;; LHS item
|
||
(while items
|
||
(setq item (car items)
|
||
items (cdr items)
|
||
nitems (1+ nitems)) ;; RHS items
|
||
(if (listp item)
|
||
;; Mid-rule action
|
||
(progn
|
||
(setq @count (1+ @count)
|
||
@n (intern (format "@%d" @count)))
|
||
(wisent-push-var @n t)
|
||
;; Push a new empty rule with the mid-rule action
|
||
(setq semact (vector item rhl (list nonterm iactn))
|
||
iactn (1+ iactn)
|
||
plevel nil
|
||
rcode (cons semact rcode)
|
||
rprec (cons plevel rprec)
|
||
item @n ;; Replace action by @N nonterminal
|
||
rules (cons (list item) rules)
|
||
nitems (1+ nitems)
|
||
nrules (1+ nrules)))
|
||
;; Check terminal or nonterminal symbol
|
||
(cond
|
||
((or (memq item token-list) (memq item var-list)))
|
||
;; Create new literal character token
|
||
((characterp item) (wisent-push-token item t))
|
||
((error "Symbol `%s' is used, but is not defined as a token and has no rules"
|
||
item))))
|
||
(setq rhl (1+ rhl)
|
||
rhs (cons item rhs)))
|
||
|
||
;; Check & collect rule precedence level
|
||
(setq plevel (when (vectorp (car rest))
|
||
(setq item (car rest)
|
||
rest (cdr rest))
|
||
(if (and (= (length item) 1)
|
||
(memq (aref item 0) token-list)
|
||
(wisent-prec (aref item 0)))
|
||
(wisent-item-number (aref item 0))
|
||
(error "Invalid rule precedence level syntax: %S" item)))
|
||
rprec (cons plevel rprec))
|
||
|
||
;; Check & collect semantic action body
|
||
(setq semact (vector
|
||
(if rest
|
||
(if (cdr rest)
|
||
(error "Invalid semantic action syntax: %S" rest)
|
||
(car rest))
|
||
;; Give a default semantic action body: nil
|
||
;; for an empty rule or $1, the value of the
|
||
;; first symbol in the rule, otherwise.
|
||
(if (> rhl 0) '$1 '()))
|
||
rhl
|
||
(list nonterm iactn))
|
||
iactn (1+ iactn)
|
||
rcode (cons semact rcode))
|
||
(setq rules (cons (cons nonterm (nreverse rhs)) rules)
|
||
nrules (1+ nrules))))
|
||
|
||
(setq ruseful (make-vector (1+ nrules) t)
|
||
rprec (vconcat (cons nil (nreverse rprec)))
|
||
rcode (vconcat (cons nil (nreverse rcode))))
|
||
(nreverse rules)
|
||
))
|
||
|
||
(defun wisent-parse-grammar (grammar &optional start-list)
|
||
"Parse GRAMMAR and build a suitable internal representation.
|
||
Optional argument START-LIST defines the start symbols.
|
||
GRAMMAR is a list of form: (TOKENS ASSOCS . NONTERMS)
|
||
|
||
TOKENS is a list of terminal symbols (tokens).
|
||
|
||
ASSOCS is nil or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
|
||
describing the associativity of TOKENS. ASSOC-TYPE must be one of the
|
||
`default-prec' `nonassoc', `left' or `right' symbols. When ASSOC-TYPE
|
||
is `default-prec', ASSOC-VALUE must be nil or t (the default).
|
||
Otherwise it is a list of tokens which must have been previously
|
||
declared in TOKENS.
|
||
|
||
NONTERMS is the list of non terminal definitions (see function
|
||
`wisent-parse-nonterminals')."
|
||
(or (and (consp grammar) (> (length grammar) 2))
|
||
(error "Bad input grammar"))
|
||
|
||
(let (i r rhs pre dpre lst start-var assoc rules item
|
||
token var def tokens defs ep-token ep-var ep-def)
|
||
|
||
;; Built-in tokens
|
||
(setq ntokens 0 nvars 0)
|
||
(wisent-push-token wisent-eoi-term t)
|
||
(wisent-push-token wisent-error-term t)
|
||
|
||
;; Check/collect terminals
|
||
(setq lst (car grammar))
|
||
(while lst
|
||
(wisent-push-token (car lst))
|
||
(setq lst (cdr lst)))
|
||
|
||
;; Check/Set up tokens precedence & associativity
|
||
(setq lst (nth 1 grammar)
|
||
pre 0
|
||
defs nil
|
||
dpre nil
|
||
default-prec t)
|
||
(while lst
|
||
(setq def (car lst)
|
||
assoc (car def)
|
||
tokens (cdr def)
|
||
lst (cdr lst))
|
||
(if (eq assoc 'default-prec)
|
||
(progn
|
||
(or (null (cdr tokens))
|
||
(memq (car tokens) '(t nil))
|
||
(error "Invalid default-prec value: %S" tokens))
|
||
(setq default-prec (car tokens))
|
||
(if dpre
|
||
(message "*** redefining default-prec to %s"
|
||
default-prec))
|
||
(setq dpre t))
|
||
(or (memq assoc '(left right nonassoc))
|
||
(error "Invalid associativity syntax: %S" assoc))
|
||
(setq pre (1+ pre))
|
||
(while tokens
|
||
(setq token (car tokens)
|
||
tokens (cdr tokens))
|
||
(if (memq token defs)
|
||
(message "*** redefining precedence of `%s'" token))
|
||
(or (memq token token-list)
|
||
;; Define token not previously declared.
|
||
(wisent-push-token token))
|
||
(setq defs (cons token defs))
|
||
;; Record the precedence and associativity of the terminal.
|
||
(wisent-set-prec token pre)
|
||
(wisent-set-assoc token assoc))))
|
||
|
||
;; Check/Collect nonterminals
|
||
(setq lst (nthcdr 2 grammar)
|
||
defs nil)
|
||
(while lst
|
||
(setq def (car lst)
|
||
lst (cdr lst))
|
||
(or (consp def)
|
||
(error "Invalid nonterminal definition: %S" def))
|
||
(if (memq (car def) token-list)
|
||
(error "Nonterminal `%s' already defined as token" (car def)))
|
||
(wisent-push-var (car def))
|
||
(setq defs (cons def defs)))
|
||
(or defs
|
||
(error "No input grammar"))
|
||
(setq defs (nreverse defs))
|
||
|
||
;; Set up the start symbol.
|
||
(setq start-table nil)
|
||
(cond
|
||
|
||
;; 1. START-LIST is nil, the start symbol is the first
|
||
;; nonterminal defined in the grammar (Bison like).
|
||
((null start-list)
|
||
(setq start-var (caar defs)))
|
||
|
||
;; 2. START-LIST contains only one element, it is the start
|
||
;; symbol (Bison like).
|
||
((or wisent-single-start-flag (null (cdr start-list)))
|
||
(setq start-var (car start-list))
|
||
(or (assq start-var defs)
|
||
(error "Start symbol `%s' has no rule" start-var)))
|
||
|
||
;; 3. START-LIST contains more than one element. All defines
|
||
;; potential start symbols. One of them (the first one by
|
||
;; default) will be given at parse time to be the parser goal.
|
||
;; If `wisent-single-start-flag' is non-nil that feature is
|
||
;; disabled and the first nonterminal in START-LIST defines
|
||
;; the start symbol, like in case 2 above.
|
||
((not wisent-single-start-flag)
|
||
|
||
;; START-LIST is a list of nonterminals '(nt0 ... ntN).
|
||
;; Build and push ad hoc start rules in the grammar:
|
||
|
||
;; ($STARTS ((nt0) $1) ((nt1) $1) ... ((ntN) $1))
|
||
;; ($nt1 (($$nt1 nt1) $2))
|
||
;; ...
|
||
;; ($ntN (($$ntN ntN) $2))
|
||
|
||
;; Where internal symbols $ntI and $$ntI are respectively
|
||
;; nonterminals and terminals.
|
||
|
||
;; The internal start symbol $STARTS is used to build the
|
||
;; LALR(1) automaton. The true default start symbol used by the
|
||
;; parser is the first nonterminal in START-LIST (nt0).
|
||
(setq start-var wisent-starts-nonterm
|
||
lst (nreverse start-list))
|
||
(while lst
|
||
(setq var (car lst)
|
||
lst (cdr lst))
|
||
(or (memq var var-list)
|
||
(error "Start symbol `%s' has no rule" var))
|
||
(unless (assq var start-table) ;; Ignore duplicates
|
||
;; For each nt start symbol
|
||
(setq ep-var (intern (format "$%s" var))
|
||
ep-token (intern (format "$$%s" var)))
|
||
(wisent-push-token ep-token t)
|
||
(wisent-push-var ep-var t)
|
||
(setq
|
||
;; Add entry (nt . $$nt) to start-table
|
||
start-table (cons (cons var ep-token) start-table)
|
||
;; Add rule ($nt (($$nt nt) $2))
|
||
defs (cons (list ep-var (list (list ep-token var) '$2)) defs)
|
||
;; Add start rule (($nt) $1)
|
||
ep-def (cons (list (list ep-var) '$1) ep-def))
|
||
))
|
||
(wisent-push-var start-var t)
|
||
(setq defs (cons (cons start-var ep-def) defs))))
|
||
|
||
;; Set up rules main data structure & RPREC, RCODE, RUSEFUL
|
||
(setq rules (wisent-parse-nonterminals defs))
|
||
|
||
;; Set up the terminal & nonterminal lists.
|
||
(setq nsyms (+ ntokens nvars)
|
||
token-list (nreverse token-list)
|
||
lst var-list
|
||
var-list nil)
|
||
(while lst
|
||
(setq var (car lst)
|
||
lst (cdr lst)
|
||
var-list (cons var var-list))
|
||
(wisent-set-item-number ;; adjust nonterminal item number to
|
||
var (+ ntokens (wisent-item-number var)))) ;; I += NTOKENS
|
||
|
||
;; Store special item numbers
|
||
(setq error-token-number (wisent-item-number wisent-error-term)
|
||
start-symbol (wisent-item-number start-var))
|
||
|
||
;; Keep symbols in the TAGS vector so that TAGS[I] is the symbol
|
||
;; associated to item number I.
|
||
(setq tags (vconcat token-list var-list))
|
||
;; Set up RLHS RRHS & RITEM data structures from list of rules
|
||
;; (LHS . RHS) received from `wisent-parse-nonterminals'.
|
||
(setq rlhs (make-vector (1+ nrules) nil)
|
||
rrhs (make-vector (1+ nrules) nil)
|
||
ritem (make-vector (1+ nitems) nil)
|
||
i 0
|
||
r 1)
|
||
(while rules
|
||
(aset rlhs r (wisent-item-number (caar rules)))
|
||
(aset rrhs r i)
|
||
(setq rhs (cdar rules)
|
||
pre nil)
|
||
(while rhs
|
||
(setq item (wisent-item-number (car rhs)))
|
||
;; Get default precedence level of rule, that is the
|
||
;; precedence of the last terminal in it.
|
||
(and (wisent-ISTOKEN item)
|
||
default-prec
|
||
(setq pre item))
|
||
|
||
(aset ritem i item)
|
||
(setq i (1+ i)
|
||
rhs (cdr rhs)))
|
||
;; Setup the precedence level of the rule, that is the one
|
||
;; specified by %prec or the default one.
|
||
(and (not (aref rprec r)) ;; Already set by %prec
|
||
pre
|
||
(wisent-prec (aref tags pre))
|
||
(aset rprec r pre))
|
||
(aset ritem i (- r))
|
||
(setq i (1+ i)
|
||
r (1+ r))
|
||
(setq rules (cdr rules)))
|
||
))
|
||
|
||
;;;; ---------------------
|
||
;;;; Compile input grammar
|
||
;;;; ---------------------
|
||
|
||
(defun wisent--compile-grammar (grammar start-list)
|
||
"Compile the LALR(1) GRAMMAR.
|
||
|
||
GRAMMAR is a list (TOKENS ASSOCS . NONTERMS) where:
|
||
|
||
- TOKENS is a list of terminal symbols (tokens).
|
||
|
||
- ASSOCS is nil, or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
|
||
describing the associativity of TOKENS. ASSOC-TYPE must be one of
|
||
the `default-prec' `nonassoc', `left' or `right' symbols. When
|
||
ASSOC-TYPE is `default-prec', ASSOC-VALUE must be nil or t (the
|
||
default). Otherwise it is a list of tokens which must have been
|
||
previously declared in TOKENS.
|
||
|
||
- NONTERMS is a list of nonterminal definitions.
|
||
|
||
Optional argument START-LIST specify the possible grammar start
|
||
symbols. This is a list of nonterminals which must have been
|
||
previously declared in GRAMMAR's NONTERMS form. By default, the start
|
||
symbol is the first nonterminal defined. When START-LIST contains
|
||
only one element, it is the start symbol. Otherwise, all elements are
|
||
possible start symbols, unless `wisent-single-start-flag' is non-nil.
|
||
In that case, the first element is the start symbol, and others are
|
||
ignored.
|
||
|
||
Return an automaton as a vector: [ACTIONS GOTOS STARTS FUNCTIONS]
|
||
where:
|
||
|
||
- ACTIONS is a state/token matrix telling the parser what to do at
|
||
every state based on the current lookahead token. That is shift,
|
||
reduce, accept or error.
|
||
|
||
- GOTOS is a state/nonterminal matrix telling the parser the next
|
||
state to go to after reducing with each rule.
|
||
|
||
- STARTS is an alist which maps the allowed start nonterminal symbols
|
||
to tokens that will be first shifted into the parser stack.
|
||
|
||
- FUNCTIONS is an obarray of semantic action symbols. Each symbol's
|
||
function definition is the semantic action lambda expression."
|
||
(if (wisent-automaton-p grammar)
|
||
grammar ;; Grammar already compiled just return it
|
||
(wisent-with-context compile-grammar
|
||
(let* ((gc-cons-threshold (max gc-cons-threshold 1000000)))
|
||
(garbage-collect)
|
||
(setq wisent-new-log-flag t)
|
||
;; Parse input grammar
|
||
(wisent-parse-grammar grammar start-list)
|
||
;; Generate the LALR(1) automaton
|
||
(wisent-parser-automaton)))))
|
||
|
||
;;;; --------------------------
|
||
;;;; Obsolete byte compile support
|
||
;;;; --------------------------
|
||
|
||
(require 'bytecomp)
|
||
|
||
(defun wisent-byte-compile-grammar (form)
|
||
"Byte compile the `wisent-compile-grammar' FORM.
|
||
Automatically called by the Emacs Lisp byte compiler as a
|
||
`byte-compile' handler."
|
||
(byte-compile-form
|
||
(macroexpand-all
|
||
(wisent-automaton-lisp-form (eval form t)))))
|
||
|
||
(defun wisent-compile-grammar (grammar &optional start-list)
|
||
;; This is kept for compatibility with FOO-wy.el files generated
|
||
;; with older Emacsen.
|
||
(declare (obsolete wisent-compiled-grammar "Mar 2021"))
|
||
(wisent--compile-grammar grammar start-list))
|
||
|
||
(put 'wisent-compile-grammar 'byte-compile #'wisent-byte-compile-grammar)
|
||
|
||
;;;; --------------------------
|
||
;;;; Byte compile input grammar
|
||
;;;; --------------------------
|
||
|
||
;; `wisent--compile-grammar' generates the actual parse table
|
||
;; we need at run-time, but in order to be able to compile the code it
|
||
;; contains, we need to "reify" it back into a piece of ELisp code
|
||
;; which (re)builds it.
|
||
;; This is needed for 2 reasons:
|
||
;; - The parse tables include an obarray and these don't survive the print+read
|
||
;; steps involved in generating a `.elc' file and reading it back in.
|
||
;; - Within the parse table vectors/obarrays we have ELisp functions which
|
||
;; we want to byte-compile, but if we were to just `quote' the table
|
||
;; we'd get them with the same non-compiled functions.
|
||
(defun wisent-automaton-lisp-form (automaton)
|
||
"Return a Lisp form that produces AUTOMATON.
|
||
See also `wisent-compile-grammar' for more details on AUTOMATON."
|
||
(or (wisent-automaton-p automaton)
|
||
(signal 'wrong-type-argument
|
||
(list 'wisent-automaton-p automaton)))
|
||
(let ((obn (make-symbol "ob")) ; Generated obarray name
|
||
(obv (aref automaton 3)) ; Semantic actions obarray
|
||
)
|
||
`(let ((,obn (obarray-make 13)))
|
||
;; Generate code to initialize the semantic actions obarray,
|
||
;; in local variable OBN.
|
||
,@(let (obcode)
|
||
(mapatoms
|
||
(lambda (s)
|
||
(setq obcode
|
||
(cons `(fset (intern ,(symbol-name s) ,obn)
|
||
#',(symbol-function s))
|
||
obcode)))
|
||
obv)
|
||
obcode)
|
||
;; Generate code to create the automaton.
|
||
(vector
|
||
;; In code generated to initialize the action table, take
|
||
;; care of symbols that are interned in the semantic actions
|
||
;; obarray.
|
||
(vector
|
||
,@(mapcar
|
||
;; Use name `st' rather than `state' since `state' is
|
||
;; defined as dynbound in `semantic-actions' context above :-( !
|
||
(lambda (st) ;; for each state
|
||
`(list
|
||
,@(mapcar
|
||
(lambda (tr) ;; for each transition
|
||
(let ((k (car tr)) ; token
|
||
(a (cdr tr))) ; action
|
||
(if (and (symbolp a)
|
||
(intern-soft (symbol-name a) obv))
|
||
`(cons ,(if (symbolp k) `(quote ,k) k)
|
||
(intern-soft ,(symbol-name a) ,obn))
|
||
`(quote ,tr))))
|
||
st)))
|
||
(aref automaton 0)))
|
||
;; The code of the goto table is unchanged.
|
||
,(aref automaton 1)
|
||
;; The code of the alist of start symbols is unchanged.
|
||
',(aref automaton 2)
|
||
;; The semantic actions obarray is in the local variable OBN.
|
||
,obn))))
|
||
|
||
(provide 'semantic/wisent/comp)
|
||
|
||
;; Disable messages with regards to lexical scoping, since this will
|
||
;; produce a bunch of 'lacks a prefix' warnings with the
|
||
;; `wisent-defcontext' trickery above.
|
||
|
||
;; Local variables:
|
||
;; byte-compile-warnings: (not lexical)
|
||
;; generated-autoload-load-name: "semantic/wisent/comp"
|
||
;; End:
|
||
|
||
;;; semantic/wisent/comp.el ends here
|