MODULE---------------------------------------------------------------------------; IMPORT ASCII, Rd, TextSetDef, Text, Thread, Fmt; IMPORT ScanToken, ScanTokenSeq, SMsg AS Msg; SimpleScanner
CONST Backslash = '\\';---------------------------------------------------------------------------
REVEAL T = Public BRANDED "SimpleScanner Type 0.0" OBJECT (* public variables skipComments : BOOLEAN; default TRUE nestingComments : BOOLEAN; default TRUE oneLineComments : BOOLEAN; default TRUE commentOpenSym : TEXT; default `/*' commentCloseSym : TEXT; default `*/' lineCommentSym : TEXT; default '#' stringOpenSym : TEXT; default `"' stringCloseSym : TEXT; default `"' identSymbols : ASCII.Set; default ASCII.AlphaNumerics + {'_'} whiteSpace : ASCII.Set; default ASCII.Spaces specialSymbols : ASCII.Set; default DefaultSpecialChars compoundToken : TextSet.T; default {`:=', `<=', `>=', `->'} keywordToken : TextSet.T; default {} *) endOfFile : BOOLEAN; rd : Rd.T; line, col : CARDINAL; actLen : CARDINAL; actLine : TEXT; actData : TEXT; stack : ScanTokenSeq.T; METHODS nextLine() RAISES {Error} := NextLine; nextChar() : CHAR RAISES {Error} := NextChar; actChar() : CHAR RAISES {Error} := ActChar; pos() : TEXT := Pos; errorState() : TEXT := ErrorState; OVERRIDES init := Init; nextToken := NextToken; pushBack := PushBack; eof := Eof; END;---------------------------------------------------------------------------
PROCEDURE---------------------------------------------------------------------------Init (self : T; inputStream : Rd.T) : T = BEGIN self.skipComments := TRUE; self.nestingComments := TRUE; self.oneLineComments := TRUE; self.commentOpenSym := "/*"; self.commentCloseSym := "*/"; self.lineCommentSym := "#"; self.stringOpenSym := "\""; self.stringCloseSym := "\""; self.identSymbols := ASCII.AlphaNumerics + ASCII.Set{'_'}; self.whiteSpace := ASCII.Spaces; self.compoundToken := NEW(TextSetDef.T).init(); self.keywordToken := NEW(TextSetDef.T).init(); self.rd := inputStream; self.specialSymbols := DefaultSpecialChars; EVAL self.compoundToken.insert(":="); EVAL self.compoundToken.insert("<="); EVAL self.compoundToken.insert(">="); EVAL self.compoundToken.insert("->"); self.line := 0; self.col := 0; self.actLine := ""; self.actLen := 0; self.actData := ""; self.stack := NIL; (* allocate stack only on need *) RETURN self; END Init;
PROCEDURE---------------------------------------------------------------------------Pos (self : T) : TEXT = BEGIN RETURN "line " & Fmt.Int(self.line) & " column " & Fmt.Int(self.col); END Pos;
PROCEDURE---------------------------------------------------------------------------ErrorState (self : T) : TEXT = VAR c : CHAR; cs : TEXT; BEGIN TRY c := self.actChar(); cs := Text.FromChar(c); EXCEPT ELSE cs := "undefined"; END; RETURN "\n current symbol: `" & cs & "'" & "\n current line: `" & self.actLine & "'" & "\n accumulated data: `" & self.actData & "'"; END ErrorState;
PROCEDURE---------------------------------------------------------------------------NextLine (self : T) RAISES {Error} = BEGIN IF self.col >= Text.Length(self.actLine) THEN TRY self.actLine := Rd.GetLine(self.rd) & "\n"; self.col := 0; self.actLen := Text.Length(self.actLine); INC(self.line); EXCEPT Rd.EndOfFile => self.endOfFile := TRUE; | Rd.Failure => RAISE Error("error reading input stream"); | Thread.Alerted => RAISE Error("interrupted while scanning"); END; END; END NextLine;
PROCEDURE---------------------------------------------------------------------------ActChar (self : T) : CHAR RAISES {Error} = BEGIN IF self.col < self.actLen THEN RETURN Text.GetChar(self.actLine, self.col); ELSE RAISE Error(NIL); END; END ActChar;
PROCEDURE---------------------------------------------------------------------------NextChar (self : T) : CHAR RAISES {Error} = VAR c : CHAR; BEGIN WHILE (NOT self.endOfFile) AND (self.col >= self.actLen) DO NextLine(self); END; IF self.endOfFile THEN c := '?'; ELSE c := Text.GetChar(self.actLine, self.col); INC(self.col); END; RETURN c; END NextChar;
PROCEDURE---------------------------------------------------------------------------SkipWhiteSpace (self : T) RAISES {Error} = BEGIN WHILE (NOT self.endOfFile) AND (self.nextChar() IN self.whiteSpace) DO (* nothing *) END; IF self.col > 0 THEN DEC(self.col); END; END SkipWhiteSpace;
PROCEDURE---------------------------------------------------------------------------ReadSymbols (self : T; set : ASCII.Set) RAISES {Error} = VAR c := self.nextChar(); BEGIN WHILE (NOT self.endOfFile) AND (c IN set) DO self.actData := self.actData & Text.FromChar(c); c := self.nextChar(); END; IF self.col > 0 THEN DEC(self.col); ELSE RAISE Error("ReadSymbols: cannot push back character"); END; END ReadSymbols;
PROCEDURE---------------------------------------------------------------------------Adjust (self : T; token : TEXT) RAISES {Error} = BEGIN WITH sub = Text.Length(self.actData) - Text.Length(token) DO IF self.col >= sub THEN DEC(self.col, sub); ELSE RAISE Error("Adjust: cannot push back characters"); END; END; END Adjust;
PROCEDURE---------------------------------------------------------------------------Matches (self : T; token : TEXT) : BOOLEAN = BEGIN WITH tlen = Text.Length(token) DO WITH dlen = Text.Length(self.actData) DO IF tlen > dlen THEN RETURN FALSE; END; RETURN Text.Equal(Text.Sub(self.actData, 0, tlen), token); END; END; END Matches;
PROCEDURE---------------------------------------------------------------------------ReadString (self : T) RAISES {Error} = CONST OctalSet = ASCII.Set{'0'..'7'}; VAR s := ASCII.All; t := ASCII.Set{}; d : TEXT; BEGIN WITH slen = Text.Length(self.stringOpenSym) DO self.actData := Text.Sub(self.actData, slen); END; (* string open symbol stripped off *) IF Text.Length(self.actData) > 0 THEN (* check if the string end has already been read *) (* lots of special symbols may have been accumulated in actData by ReadSymbols, eg. "\\" or "\\\" *) (* We just throw them away (to be read again), because there is no other way to interpret the escape sequences in strings *) Adjust(self, ""); self.actData := ""; END; FOR i := 0 TO Text.Length(self.stringCloseSym) - 1 DO t := t + ASCII.Set{Text.GetChar(self.stringCloseSym, i)}; END; s := s - ASCII.Set{Backslash}; s := s - t; LOOP ReadSymbols(self, s); IF self.actChar() = Backslash THEN EVAL self.nextChar(); VAR n := self.nextChar(); BEGIN IF n = 'n' THEN n := ASCII.NL; ELSIF n = 'r' THEN n := ASCII.CR; ELSIF n = 't' THEN n := ASCII.HT; ELSIF n = 'h' THEN n := ASCII.BS; ELSIF n = 'g' THEN n := ASCII.BEL; ELSIF n = '[' THEN n := ASCII.ESC; ELSIF n IN OctalSet THEN VAR val := ORD(n) - ORD('0'); numOct := 1; BEGIN n := self.nextChar(); WHILE n IN OctalSet AND numOct < 3 DO INC(numOct); val := val * 8 + ORD(n) - ORD('0'); n := self.nextChar(); END; DEC(self.col); (* last char must be read again *) val := val MOD 256; n := VAL(val, CHAR); END; END; self.actData := self.actData & Text.FromChar(n); END; ELSE d := self.actData; self.actData := ""; ReadSymbols(self, t); IF Matches(self, self.stringCloseSym) THEN Adjust(self, self.stringCloseSym); self.actData := d; RETURN; ELSE self.actData := d & Text.Sub(self.actData, 0, 1); Adjust(self, "x"); END; END; END; END ReadString;
PROCEDURE---------------------------------------------------------------------------ReadOneLineComment (self : T) = BEGIN self.actData := Text.Sub(self.actLine, self.col); self.col := self.actLen + 1; END ReadOneLineComment;
PROCEDURE---------------------------------------------------------------------------ReadMultiLineComment (self : T) RAISES {Error} = VAR s := ASCII.All; t := ASCII.Set{}; d : TEXT; level := 1; BEGIN WITH slen = Text.Length(self.commentOpenSym) DO self.actData := Text.Sub(self.actData, slen); END; FOR i := 0 TO Text.Length(self.commentOpenSym) - 1 DO t := t + ASCII.Set{Text.GetChar(self.commentOpenSym, i)}; END; FOR i := 0 TO Text.Length(self.commentCloseSym) - 1 DO t := t + ASCII.Set{Text.GetChar(self.commentCloseSym, i)}; END; s := s - ASCII.Set{Backslash}; s := s - t; LOOP ReadSymbols(self, s); IF self.actChar() = Backslash THEN EVAL self.nextChar(); self.actData := self.actData & Text.FromChar(self.nextChar()); ELSE d := self.actData; self.actData := ""; ReadSymbols(self, t); IF Matches(self, self.commentCloseSym) THEN DEC(level); IF NOT self.nestingComments OR level = 0 THEN Adjust(self, self.commentCloseSym); self.actData := d; RETURN; ELSE Adjust(self, "x"); self.actData := d & Text.Sub(self.actData, 0, 1); END; ELSIF Matches(self, self.commentOpenSym) THEN INC(level); Adjust(self, self.commentOpenSym); self.actData := d & self.commentOpenSym; ELSE Adjust(self, "x"); self.actData := d & Text.Sub(self.actData, 0, 1); END; END; END; END ReadMultiLineComment;
PROCEDURE---------------------------------------------------------------------------NextToken (self : T) : Token RAISES {Error} = VAR token : Token; BEGIN Debug("scanner.nextToken"); (* if there are tokens on the stack, return the top one *) IF self.stack # NIL AND self.stack.size() > 0 THEN token := self.stack.remlo(); DebugToken(token); RETURN token; END; (* stack is empty *) token := NEW(Token); LOOP (* read until a complete syntactical element has been recognized *) SkipWhiteSpace(self); self.actData := ""; token.line := self.line; token.col := self.col; (* new token refers to correct position *) IF self.endOfFile THEN (* first check for end of file *) token.kind := ScanToken.Kind.EndOfFile; token.repr := ""; EXIT; (* no end of file *) ELSIF self.actChar() IN self.identSymbols THEN (* keyword or identifier found *) ReadSymbols(self, self.identSymbols); token.repr := self.actData; IF self.keywordToken.member(token.repr) THEN token.kind := ScanToken.Kind.Keyword; ELSE token.kind := ScanToken.Kind.Ident; END; EXIT; (* no end of file, no keyword, no identifier *) ELSIF self.actChar() IN self.specialSymbols THEN (* last chance: anything consisting of special symbols *) ReadSymbols(self, self.specialSymbols); IF Matches(self, self.lineCommentSym) AND self.oneLineComments THEN (* one line comment found *) ReadOneLineComment(self); IF NOT self.skipComments THEN (* only return it if comments are not to be skipped *) token.kind := ScanToken.Kind.Comment; token.repr := self.actData; EXIT; END; (* continue at start of loop *) ELSIF Matches(self, self.commentOpenSym) THEN (* multi line comment found *) ReadMultiLineComment(self); IF NOT self.skipComments THEN (* only return it if comments are not to be skipped *) token.kind := ScanToken.Kind.Comment; token.repr := self.actData; EXIT; END; (* continue at start of loop *) ELSE (* check for compound symbol *) VAR iter := self.compoundToken.iterate(); tok : TEXT; found := FALSE; BEGIN WHILE iter.next(tok) DO IF Matches(self, tok) THEN token.repr := tok; token.kind := ScanToken.Kind.CompoundSymbol; Adjust(self, tok); found := TRUE; EXIT; END; END; IF found THEN EXIT END; (* no compound token but special symbol *) IF Matches(self, self.stringOpenSym) THEN (* string found *) ReadString(self); token.repr := self.actData; token.kind := ScanToken.Kind.String; ELSE (* single special character found *) token.repr := Text.Sub(self.actData, 0, 1); token.kind := ScanToken.Kind.Other; Adjust(self, token.repr); END; EXIT; END; END; ELSE (* what we found does not belong to any defined lexical class *) VAR err := "Unexpected symbol at " & self.pos() & ": " & self.errorState(); BEGIN INC(self.col); RAISE Error(err); END; END; END; DebugToken(token); RETURN token; END NextToken;
PROCEDURE---------------------------------------------------------------------------PushBack (self : T; t : Token) = BEGIN Debug("scanner.pushBack"); DebugToken(t); IF self.stack = NIL THEN self.stack := NEW(ScanTokenSeq.T).init(5); END; self.stack.addlo(t); END PushBack;
PROCEDURE---------------------------------------------------------------------------Eof (self : T) : BOOLEAN = BEGIN RETURN self.endOfFile; END Eof;
PROCEDURE---------------------------------------------------------------------------Debug (msg : TEXT) = BEGIN IF debugScanner THEN IF Msg.dFlag THEN Msg.D(msg); ELSIF Msg.tFlag THEN Msg.T(msg); ELSE Msg.Debug(msg); END; END; END Debug;
PROCEDURE---------------------------------------------------------------------------DebugToken (token : ScanToken.T) = BEGIN IF debugScanner THEN ScanToken.DebugToken(token); END; END DebugToken;
BEGIN debugScanner := FALSE; END SimpleScanner.