MODULE---------------------------------------------------------------------------; IMPORT Rd, TextRd, FileRd, Text, TextSeq, TextTextTbl, Fmt, Pathname; IMPORT Glob, GlobTree, RegEx, TextUtils, SimpleScanner, ScanToken, PathRepr; IMPORT FindExpr, FindExprSeq; REVEAL T = Public BRANDED "FileClassification 0.0" OBJECT lhs : FindExprSeq.T; rhs : TextSeq.T; idirs : FindExprSeq.T; uppn : BOOLEAN; OVERRIDES init := Init; size := Size; addFromText := AddFromText; addFromFile := AddFromFile; patterns := Patterns; classSpecs := ClassSpecs; pattern := Pattern; classSpec := ClassSpec; ignoreDir := IgnoreDir; matches := Matches; match := Match; substClassSpec := SubstClassSpec; END; FileClassification
PROCEDURE---------------------------------------------------------------------------Init (self : T; usePosixPathnames := FALSE) : T = BEGIN self.idirs := NEW(FindExprSeq.T).init(); self.lhs := NEW(FindExprSeq.T).init(); self.rhs := NEW(TextSeq.T).init(); self.uppn := usePosixPathnames; RETURN self; END Init;
PROCEDURE---------------------------------------------------------------------------Size (self : T) : CARDINAL = BEGIN RETURN self.lhs.size(); END Size;
PROCEDURE---------------------------------------------------------------------------ParseRules (self : T; rd : Rd.T) RAISES {E} = VAR actToken : SimpleScanner.Token := NIL; scanner := NEW(SimpleScanner.T); subExprs := 0; defaultSeen := FALSE; (*-------------------------------------------------------------------------*) PROCEDURE ParseError(e : TEXT) RAISES {E} = VAR t : TEXT; BEGIN t := Fmt.F("parse error at token `%s': %s", actToken.repr, e); RAISE E(t); END ParseError; (*-------------------------------------------------------------------------*) PROCEDURE GetNext() RAISES {E} = BEGIN TRY actToken := scanner.nextToken(); EXCEPT SimpleScanner.Error(e) => ParseError("scanner error: " & e); END; END GetNext; (*-------------------------------------------------------------------------*) PROCEDURE Matches(t1 : TEXT; t2 : TEXT := NIL; kind := ScanToken.Kind.Keyword) : BOOLEAN RAISES {E} = (*-----------------------------------------------------------------------*) PROCEDURE Matches1(t : TEXT) : BOOLEAN = BEGIN RETURN actToken.kind = kind AND Text.Equal(actToken.repr, t); END Matches1; VAR ok := FALSE; memo := actToken; BEGIN IF Matches1(t1) THEN GetNext(); IF t2 = NIL THEN ok := TRUE; ELSE IF Matches1(t2) THEN GetNext(); ok := TRUE; ELSE scanner.pushBack(actToken); actToken := memo; END; END; END; RETURN ok; END Matches; (*-------------------------------------------------------------------------*) PROCEDURE MatchesSpecial(t1 : TEXT; t2 : TEXT := NIL) : BOOLEAN RAISES {E} = (*-----------------------------------------------------------------------*) PROCEDURE Matches1(t : TEXT) : BOOLEAN = BEGIN RETURN (actToken.kind = ScanToken.Kind.CompoundSymbol OR actToken.kind = ScanToken.Kind.Other) AND Text.Equal(actToken.repr, t); END Matches1; VAR ok := FALSE; memo := actToken; BEGIN IF Matches1(t1) THEN GetNext(); IF t2 = NIL THEN ok := TRUE; ELSE IF Matches1(t2) THEN GetNext(); ok := TRUE; ELSE scanner.pushBack(actToken); actToken := memo; END; END; END; RETURN ok; END MatchesSpecial; (*-------------------------------------------------------------------------*) PROCEDURE GetStringOrIdentifier() : TEXT RAISES {E} = VAR id := actToken.repr; BEGIN IF actToken.kind = ScanToken.Kind.String OR actToken.kind = ScanToken.Kind.Ident THEN GetNext(); RETURN id; END; ParseError("expected string or identifier"); <* NOWARN *> END GetStringOrIdentifier; (*-------------------------------------------------------------------------*) PROCEDURE Options(s : TEXT) : Glob.MatchOptions = BEGIN IF s # NIL AND Text.Length(s) > 0 THEN IF Text.GetChar(s, 0) = FindExpr.RegularExpressionPrefix THEN RETURN Glob.MatchOptions{Glob.MatchOption.UseSimpleRE}; END; END; RETURN Glob.MatchOptions{}; END Options; (*-------------------------------------------------------------------------*) PROCEDURE StripREPrefix(s : TEXT) : TEXT = BEGIN IF s # NIL AND Text.Length(s) > 0 THEN IF Text.GetChar(s, 0) = FindExpr.RegularExpressionPrefix THEN RETURN Text.Sub(s, 1); END; END; RETURN s; END StripREPrefix; (*-------------------------------------------------------------------------*) PROCEDURE ParseExpr() : FindExpr.T RAISES {E} = VAR start : FindExpr.T; BEGIN start := ParseTerm(); IF Matches("or") THEN RETURN GlobTree.Or(start, ParseExpr()); ELSIF MatchesSpecial(")") THEN IF subExprs > 0 THEN DEC(subExprs); RETURN start; ELSE ParseError("too many `)'"); END; END; RETURN start; END ParseExpr; (*-------------------------------------------------------------------------*) PROCEDURE ParseTerm() : FindExpr.T RAISES {E} = VAR start : FindExpr.T; BEGIN start := ParseFactor(); IF Matches("and") THEN RETURN GlobTree.And(start, ParseTerm()); ELSIF MatchesSpecial(")") THEN IF subExprs > 0 THEN DEC(subExprs); RETURN start; ELSE ParseError("too many `)'"); END; END; RETURN start; END ParseTerm; (*-------------------------------------------------------------------------*) PROCEDURE ParseFactor() : FindExpr.T RAISES {E} = VAR factor : TEXT; BEGIN IF Matches("true") THEN RETURN GlobTree.True; ELSIF Matches("false") THEN RETURN GlobTree.False; ELSIF Matches("not") THEN RETURN GlobTree.Not(ParseFactor()); ELSIF MatchesSpecial("(") THEN INC(subExprs); RETURN ParseExpr(); ELSIF MatchesSpecial(")") THEN ParseError("unexpected `)'"); ELSE factor := GetStringOrIdentifier(); RETURN GlobTree.Match(StripREPrefix(factor), <*NOWARN*> Options(factor)); END; END ParseFactor; (*-------------------------------------------------------------------------*) PROCEDURE ParseRule() RAISES {E} = VAR lhs : FindExpr.T; rhs : TEXT; BEGIN IF Matches("default") THEN IF NOT MatchesSpecial("=>") THEN ParseError("expected `=>'"); END; rhs := GetStringOrIdentifier(); IF rhs = NIL THEN ParseError("expected string or identifier"); END; IF defaultSeen THEN ParseError("multiple default declarations"); END; defaultSeen := TRUE; TRY lhs := FindExpr.New("\"*\""); EXCEPT ELSE END; self.lhs.addhi(lhs); self.rhs.addhi(rhs); ELSE lhs := ParseExpr(); IF lhs = NIL THEN ParseError("unparseable expression"); END; IF NOT MatchesSpecial("=>") THEN ParseError("expected `=>'"); END; IF Matches("ignoredir") OR Matches("skip") THEN self.idirs.addhi(lhs); ELSE IF Matches("ignore") THEN rhs := NIL; ELSE rhs := GetStringOrIdentifier(); IF rhs = NIL THEN ParseError("expected string or identifier"); END; END; self.lhs.addhi(lhs); self.rhs.addhi(rhs); END; END; END ParseRule; (*-------------------------------------------------------------------------*) PROCEDURE ParseAll() RAISES {E} = BEGIN WHILE NOT scanner.eof() DO ParseRule(); END; END ParseAll; (*-------------------------------------------------------------------------*) BEGIN (* New *) EVAL scanner.init(rd); scanner.skipComments := TRUE; scanner.nestingComments := TRUE; scanner.commentOpenSym := "(*"; scanner.commentCloseSym := "*)"; scanner.lineCommentSym := "#"; EVAL scanner.keywordToken.insert("default"); EVAL scanner.keywordToken.insert("ignore"); EVAL scanner.keywordToken.insert("ignoredir"); EVAL scanner.keywordToken.insert("skip"); EVAL scanner.keywordToken.insert("and"); EVAL scanner.keywordToken.insert("or"); EVAL scanner.keywordToken.insert("not"); EVAL scanner.keywordToken.insert("true"); EVAL scanner.keywordToken.insert("false"); EVAL scanner.compoundToken.insert("=>"); GetNext(); ParseAll(); IF NOT scanner.eof() THEN ParseError("premature end of file classification"); END; END ParseRules;
PROCEDURE---------------------------------------------------------------------------AddFromText (self : T; t : TEXT) RAISES {E} = VAR rd := TextRd.New(t); BEGIN ParseRules(self, rd); TRY Rd.Close(rd); EXCEPT ELSE END; (* we are not interested any more *) END AddFromText;
PROCEDURE---------------------------------------------------------------------------AddFromFile (self : T; fn : Pathname.T) RAISES {E} = VAR rd : FileRd.T; BEGIN TRY rd := FileRd.Open(fn); EXCEPT ELSE RAISE E("cannot open file " & fn); END; ParseRules(self, rd); TRY Rd.Close(rd); EXCEPT ELSE END; (* we are not interested any more *) END AddFromFile;
PROCEDURE---------------------------------------------------------------------------Patterns (self : T) : FindExprSeq.T = BEGIN RETURN self.lhs; END Patterns;
PROCEDURE---------------------------------------------------------------------------ClassSpecs (self : T) : TextSeq.T = BEGIN RETURN self.rhs; END ClassSpecs;
PROCEDURE---------------------------------------------------------------------------Pattern (self : T; n : CARDINAL) : FindExpr.T = BEGIN RETURN self.lhs.get(n); END Pattern;
PROCEDURE---------------------------------------------------------------------------ClassSpec (self : T; n : CARDINAL) : TEXT = BEGIN RETURN self.rhs.get(n); END ClassSpec;
PROCEDURE---------------------------------------------------------------------------IgnoreDir (self : T; dir : Pathname.T) : BOOLEAN = VAR match := FALSE; dirlast := Pathname.Last(dir); BEGIN FOR i := 0 TO self.idirs.size() - 1 DO WITH expr = self.idirs.get(i) DO TRY match := expr.test(dirlast); EXCEPT RegEx.Error => (* skip *) END; IF match THEN RETURN TRUE; END; END; END; RETURN FALSE; END IgnoreDir;
PROCEDURE---------------------------------------------------------------------------Matches (self : T; fn : Pathname.T) : INTEGER RAISES {E} = VAR rhs : TEXT; match : BOOLEAN; fnlast := Pathname.Last(fn); BEGIN FOR i := 0 TO self.lhs.size() - 1 DO WITH expr = self.lhs.get(i) DO IF expr = NIL THEN rhs := self.rhs.get(i); IF rhs = NIL THEN rhs := "NIL"; END; RAISE E("undefined expression for rule " & Fmt.Int(i) & ", rhs = `" & rhs & "'"); END; TRY match := expr.test(fnlast); EXCEPT RegEx.Error => (* skip *) END; IF match THEN IF self.rhs.get(i) = NIL THEN (* ignore rule *) RETURN -1; ELSE RETURN i; END; END; END; END; RETURN -1; END Matches;
PROCEDURE---------------------------------------------------------------------------Match (self : T; fn : Pathname.T; env : TextTextTbl.T := NIL) : TEXT RAISES {E} = VAR n : INTEGER; BEGIN n := Matches(self, fn); IF n = -1 THEN RETURN NIL; END; RETURN SubstClassSpec(self, n, fn, env); END Match;
PROCEDURE---------------------------------------------------------------------------SubstClassSpec (self : T; n : CARDINAL; fn : Pathname.T; env : TextTextTbl.T := NIL) : TEXT RAISES {E} = VAR res, spec : TEXT; BEGIN IF env = NIL THEN env := NEW(TextTextTbl.Default).init(); END; IF self.uppn THEN EVAL env.put("fn", PathRepr.Posix(fn)); EVAL env.put("dir", PathRepr.Posix(Pathname.Prefix(fn))); EVAL env.put("last", PathRepr.Posix(Pathname.Last(fn))); EVAL env.put("base", PathRepr.Posix(Pathname.Base(fn))); EVAL env.put("lastbase", PathRepr.Posix(Pathname.LastBase(fn))); EVAL env.put("ext", PathRepr.Posix(Pathname.LastExt(fn))); ELSE EVAL env.put("fn", fn); EVAL env.put("dir", Pathname.Prefix(fn)); EVAL env.put("last", Pathname.Last(fn)); EVAL env.put("base", Pathname.Base(fn)); EVAL env.put("lastbase", Pathname.LastBase(fn)); EVAL env.put("ext", Pathname.LastExt(fn)); END; IF n < 0 OR n >= self.rhs.size() THEN RAISE E("no such classification rule"); END; spec := self.rhs.get(n); IF spec = NIL THEN RETURN NIL; (* ignore rule *) END; TRY res := TextUtils.SubstituteVariables(spec, env); EXCEPT TextUtils.Error(e) => RAISE E("substitution failed: " & e); END; RETURN res; END SubstClassSpec;
PROCEDURE---------------------------------------------------------------------------New (t : TEXT; usePosixPathnames := FALSE) : T RAISES {E} = VAR fc := NEW(T).init(usePosixPathnames); BEGIN fc.addFromText(t); RETURN fc; END New;
PROCEDURERead (fn : Pathname.T; usePosixPathnames := FALSE) : T RAISES {E} = VAR fc := NEW(T).init(usePosixPathnames); BEGIN fc.addFromFile(fn); RETURN fc; END Read; BEGIN END FileClassification.