SGML parser library
Copyright (C) 1997 Michel Dagenais
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this library; if not, write to the Free
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
For more information on this program, contact Michel Dagenais at
dagenais@vlsi.polymtl.ca or Electrical Eng. Dept., Ecole Polytechnique
P.O. Box 6079, Station A, Montreal, Quebec, Canada, H3C 3A7.
MODULE FSM;
IMPORT RefSeq, Atom, AtomRefTbl, DeepCopy;
IMPORT AtomPkl; <*NOWARN*> It is used for its side effect
REVEAL
T = BRANDED REF RECORD
firstNode, lastNode: Node;
nodes: RefSeq.T;
END;
Node = BRANDED REF RECORD
replacedBy: Node;
replaces: RefSeq.T;
next: AtomRefTbl.T;
else: Node;
skip: Node;
id: CARDINAL;
END;
As FSM are combined, nodes are merged together (one replacing the other).
The links to merged nodes are not updated until the end to insure a linear
time complexity. Each node remembers the nodes it replaces and
the node it is replaced by. The next
table stores the next state
for each accepted event. The else
at each node handles all events
not otherwise specified in the next
state table. The skip
points
to states which may be entered directly without any event.
PROCEDURE NewNode(): Node =
BEGIN
RETURN NEW(Node, else := NIL, skip := NIL, replacedBy := NIL, replaces :=
NEW(RefSeq.T).init(), next := NEW(AtomRefTbl.Default).init());
END NewNode;
PROCEDURE ReplaceNode(n2, n1: Node) =
BEGIN
<* ASSERT n2.replacedBy = NIL *>
n2.replacedBy := n1;
n1.replaces.addhi(n2);
END ReplaceNode;
A NIL type is used to set the skip
transition used for optional
or repeat constructs.
PROCEDURE New(VAR m: T; type: Atom.T) =
BEGIN
m := NEW(T, firstNode := NewNode(), lastNode := NewNode());
IF type = NIL THEN
m.firstNode.skip := m.lastNode;
ELSE
EVAL m.firstNode.next.put(type,m.lastNode);
END;
END New;
An edge which accepts any event, used for ELSE transitions in Or.
PROCEDURE NewElse(VAR m: T) =
BEGIN
m := NEW(T, firstNode := NewNode(), lastNode := NewNode());
m.firstNode.else := m.lastNode;
END NewElse;
expression for m1
followed by expression for m2
PROCEDURE Sequence(VAR m1, m2, result: T) =
BEGIN
ReplaceNode(m2.firstNode, m1.lastNode);
result := m1;
result.lastNode := m2.lastNode;
m1 := NIL;
m2 := NIL;
END Sequence;
expresion for m1
or expression for m2
PROCEDURE Or(VAR m1, m2, result: T) =
BEGIN
ReplaceNode(m2.firstNode, m1.firstNode);
ReplaceNode(m2.lastNode, m1.lastNode);
result := m1;
m1 := NIL;
m2 := NIL;
END Or;
Zero or one m
PROCEDURE Optional(VAR m, result: T) =
BEGIN
New(result,NIL);
ReplaceNode(m.firstNode, result.firstNode);
ReplaceNode(m.lastNode, result.lastNode);
m := NIL;
END Optional;
Zero or more m
PROCEDURE Repeat(VAR m, result: T) =
BEGIN
New(result,NIL);
ReplaceNode(m.firstNode, result.firstNode);
ReplaceNode(m.lastNode, result.firstNode);
m := NIL;
END Repeat;
PROCEDURE Copy(READONLY m: T; VAR result: T) =
BEGIN
result := DeepCopy.Copy(m);
END Copy;
Relink all the links in next
, else
, and skip
transitions to account
for merged (replaced) nodes.
PROCEDURE Wrap(VAR m: T) RAISES {Error} =
BEGIN
m.nodes := NEW(RefSeq.T).init();
RelinkNode(m,m.firstNode);
END Wrap;
PROCEDURE RelinkNode(m: T; n: Node) RAISES {Error} =
VAR
node, destNode, nextNode: Node;
iter := n.next.iterate();
type: Atom.T;
replaces := n.replaces;
tmp: REFANY;
BEGIN
destNode := n;
(* Store all the non replaced nodes in a sequence for easier acces later.*)
IF n.replacedBy = NIL THEN
n.id := m.nodes.size();
m.nodes.addhi(n);
END;
(* Determine the chain end, the node which really replaces this one
and all the nodes in between in the "replacedBy" chain. *)
WHILE destNode.replacedBy # NIL DO destNode := destNode.replacedBy; END;
(* Make all nodes in the chain point directly to the chain end as
the real "replacedBy". *)
node := n;
WHILE node.replacedBy # NIL DO
nextNode := node.replacedBy;
node.replacedBy := destNode;
node := nextNode;
END;
(* Mark the node as processed, as far as "replacedBy" is concerned. *)
n.replaces := NIL;
(* Prepare for the updated "next" table as links will be inserted from
all replaced nodes. *)
n.next := NEW(AtomRefTbl.Default).init();
(* Process all the replaced nodes *)
FOR i := 0 TO replaces.size() - 1 DO
node := replaces.get(i);
IF node.replaces # NIL THEN RelinkNode(m,node); END;
END;
(* Process the else node and update the else link *)
IF n.else # NIL THEN
IF n.else.replaces # NIL THEN RelinkNode(m,n.else); END;
IF n.else.replacedBy # NIL THEN n.else := n.else.replacedBy; END;
END;
(* Process the skip node and update the skip link *)
IF n.skip # NIL THEN
IF n.skip.replaces # NIL THEN RelinkNode(m,n.skip); END;
IF n.skip.replacedBy # NIL THEN n.skip := n.skip.replacedBy; END;
END;
(* Use the replacing node, if any, as new origin for the links.
Process the replacedBy node, and merge the "else" and "skip"
transitions. *)
IF n.replacedBy # NIL THEN
nextNode := n.replacedBy;
IF nextNode.replaces # NIL THEN RelinkNode(m,nextNode); END;
(* If two merged nodes both specify "else" or "skip" transitions,
there is ambiguity. Raise an error. *)
IF n.else # NIL THEN
IF nextNode.else # NIL THEN
RAISE Error("Too many ANY applicable");
ELSE
nextNode.else := n.else;
END;
END;
IF n.skip # NIL THEN
IF nextNode.skip # NIL THEN
RAISE Error("Too many optional/repeat applicable");
ELSE
nextNode.skip := n.skip;
END;
END;
ELSE
nextNode := n;
END;
(* Insert all the transitions from the "next" table into the new
"next" table with the next node properly updated. *)
WHILE iter.next(type,tmp) DO
node := tmp;
IF node.replaces # NIL THEN RelinkNode(m,node); END;
IF node.replacedBy # NIL THEN node := node.replacedBy; END;
(* Two merged nodes both specified a transition for the same event.
Raise an error. *)
IF nextNode.next.get(type,tmp) THEN
RAISE Error("Duplicate " & Atom.ToText(type));
END;
EVAL nextNode.next.put(type,node);
END;
END RelinkNode;
PROCEDURE StartNode(READONLY m: T): Node =
BEGIN
RETURN m.firstNode;
END StartNode;
PROCEDURE Enter(currNode: Node; type: Atom.T; VAR nextNode: Node): BOOLEAN =
VAR
tmp: REFANY;
BEGIN
LOOP
(* This is an acceptable transition from the current state. *)
IF currNode.next.get(type,tmp) THEN
nextNode := NARROW(tmp,Node);
RETURN TRUE;
END;
(* The else clause accepts anything. *)
IF currNode.else # NIL THEN
nextNode := currNode.else;
RETURN TRUE;
END;
(* Exit the current "optional" or "repeat" construct which cannot
accept the specified event. Perhaps the event is acceptable
further down in the FSM. *)
IF currNode.skip # NIL THEN
currNode := currNode.skip;
ELSE
RETURN FALSE;
END;
END;
END Enter;
PROCEDURE Exit(currNode: Node): BOOLEAN =
BEGIN
LOOP
(* The final FSM exit node was reached *)
IF currNode.next.size() = 0 AND currNode.else = NIL AND
currNode.skip = NIL THEN
RETURN TRUE;
END;
(* Exit the current Optional or Repeat construct to see if the final
FSM exit node may be reached. *)
IF currNode.skip # NIL THEN
currNode := currNode.skip;
ELSE
RETURN FALSE;
END;
END;
END Exit;
PROCEDURE Expect(currNode: Node): Atom.T =
VAR
type: Atom.T;
tmp: REFANY;
BEGIN
(* There is only one type of event accepted at this node. *)
IF currNode.next.size() = 1 AND currNode.else = NIL THEN
EVAL currNode.next.iterate().next(type,tmp);
RETURN type;
ELSE
RETURN NIL;
END;
END Expect;
PROCEDURE GetNodes(m: T; VAR first, last: Node): REF ARRAY OF Node =
VAR
a := NEW(REF ARRAY OF Node,m.nodes.size());
BEGIN
FOR i := 0 TO m.nodes.size() - 1 DO a[i] := m.nodes.get(i); END;
first := m.firstNode;
last := m.lastNode;
RETURN a;
END GetNodes;
PROCEDURE NodeId(<*UNUSED*>m: T; n: Node): CARDINAL =
BEGIN
RETURN n.id;
END NodeId;
PROCEDURE NodeContent(<*UNUSED*>m: T; n: Node; VAR id: CARDINAL;
VAR next: REF ARRAY OF Edge; VAR else, skip: Node) =
VAR
label: Atom.T;
destination: REFANY;
BEGIN
id := n.id;
else := n.else;
skip := n.skip;
next := NEW(REF ARRAY OF Edge,n.next.size());
WITH iter = n.next.iterate() DO
FOR i := 0 TO n.next.size() - 1 DO
EVAL iter.next(label,destination);
next[i] := Edge{label,destination};
END;
END;
END NodeContent;
BEGIN
END FSM.