fsfind/src/FileClassification.m3


---------------------------------------------------------------------------
MODULE FileClassification;

IMPORT Rd, TextRd, FileRd, Text, TextSeq, TextTextTbl, Fmt, Pathname;
IMPORT Glob, GlobTree, RegEx, TextUtils, SimpleScanner, ScanToken, PathRepr;
IMPORT FindExpr, FindExprSeq;

REVEAL
  T = Public BRANDED "FileClassification 0.0" OBJECT
    lhs   : FindExprSeq.T;
    rhs   : TextSeq.T;
    idirs : FindExprSeq.T;
    uppn  : BOOLEAN;
  OVERRIDES
    init := Init;
    size := Size;
    addFromText := AddFromText;
    addFromFile := AddFromFile;
    patterns := Patterns;
    classSpecs := ClassSpecs;
    pattern := Pattern;
    classSpec := ClassSpec;
    ignoreDir := IgnoreDir;
    matches := Matches;
    match := Match;
    substClassSpec := SubstClassSpec;
  END;
---------------------------------------------------------------------------
PROCEDURE Init(self : T; usePosixPathnames := FALSE) : T =
  BEGIN
    self.idirs := NEW(FindExprSeq.T).init();
    self.lhs := NEW(FindExprSeq.T).init();
    self.rhs := NEW(TextSeq.T).init();
    self.uppn := usePosixPathnames;
    RETURN self;
  END Init;
---------------------------------------------------------------------------
PROCEDURE Size(self : T) : CARDINAL =
  BEGIN
    RETURN self.lhs.size();
  END Size;
---------------------------------------------------------------------------
PROCEDURE ParseRules(self : T; rd : Rd.T) RAISES {E} =
  VAR
    actToken    :  SimpleScanner.Token := NIL;
    scanner     := NEW(SimpleScanner.T);
    subExprs    := 0;
    defaultSeen := FALSE;

  (*-------------------------------------------------------------------------*)
  PROCEDURE ParseError(e : TEXT) RAISES {E} =
    VAR t : TEXT;
    BEGIN
      t := Fmt.F("parse error at token `%s': %s", actToken.repr, e);
      RAISE E(t);
    END ParseError;

  (*-------------------------------------------------------------------------*)
  PROCEDURE GetNext() RAISES {E} =
    BEGIN
      TRY
        actToken := scanner.nextToken();
      EXCEPT
        SimpleScanner.Error(e) => ParseError("scanner error: " & e);
      END;
    END GetNext;

  (*-------------------------------------------------------------------------*)
  PROCEDURE Matches(t1 : TEXT; t2 : TEXT := NIL;
                    kind := ScanToken.Kind.Keyword) : BOOLEAN
    RAISES {E} =

    (*-----------------------------------------------------------------------*)
    PROCEDURE Matches1(t : TEXT) : BOOLEAN =
      BEGIN
        RETURN actToken.kind = kind AND Text.Equal(actToken.repr, t);
      END Matches1;

    VAR ok := FALSE; memo := actToken;
    BEGIN
      IF Matches1(t1) THEN
        GetNext();
        IF t2 = NIL THEN
          ok := TRUE;
        ELSE
          IF Matches1(t2) THEN
            GetNext();
            ok := TRUE;
          ELSE
            scanner.pushBack(actToken);
            actToken := memo;
          END;
        END;
      END;
      RETURN ok;
    END Matches;

  (*-------------------------------------------------------------------------*)
  PROCEDURE MatchesSpecial(t1 : TEXT; t2 : TEXT := NIL) : BOOLEAN
    RAISES {E} =

    (*-----------------------------------------------------------------------*)
    PROCEDURE Matches1(t : TEXT) : BOOLEAN =
      BEGIN
        RETURN (actToken.kind = ScanToken.Kind.CompoundSymbol OR
                actToken.kind = ScanToken.Kind.Other) AND
               Text.Equal(actToken.repr, t);
      END Matches1;

    VAR ok := FALSE; memo := actToken;
    BEGIN
      IF Matches1(t1) THEN
        GetNext();
        IF t2 = NIL THEN
          ok := TRUE;
        ELSE
          IF Matches1(t2) THEN
            GetNext();
            ok := TRUE;
          ELSE
            scanner.pushBack(actToken);
            actToken := memo;
          END;
        END;
      END;
      RETURN ok;
    END MatchesSpecial;

  (*-------------------------------------------------------------------------*)
  PROCEDURE GetStringOrIdentifier() : TEXT RAISES {E} =
    VAR id := actToken.repr;
    BEGIN
      IF actToken.kind = ScanToken.Kind.String OR
         actToken.kind = ScanToken.Kind.Ident THEN
        GetNext();
        RETURN id;
      END;
      ParseError("expected string or identifier"); <* NOWARN *>
    END GetStringOrIdentifier;

  (*-------------------------------------------------------------------------*)
  PROCEDURE Options(s : TEXT) : Glob.MatchOptions =
    BEGIN
      IF s # NIL AND Text.Length(s) > 0 THEN
        IF Text.GetChar(s, 0) = FindExpr.RegularExpressionPrefix THEN
          RETURN Glob.MatchOptions{Glob.MatchOption.UseSimpleRE};
        END;
      END;
      RETURN Glob.MatchOptions{};
    END Options;

  (*-------------------------------------------------------------------------*)
  PROCEDURE StripREPrefix(s : TEXT) : TEXT =
    BEGIN
      IF s # NIL AND Text.Length(s) > 0 THEN
        IF Text.GetChar(s, 0) = FindExpr.RegularExpressionPrefix THEN
          RETURN Text.Sub(s, 1);
        END;
      END;
      RETURN s;
    END StripREPrefix;

  (*-------------------------------------------------------------------------*)
  PROCEDURE ParseExpr() : FindExpr.T RAISES {E} =
    VAR start : FindExpr.T;
    BEGIN
      start := ParseTerm();
      IF Matches("or") THEN
        RETURN GlobTree.Or(start, ParseExpr());
      ELSIF MatchesSpecial(")") THEN
        IF subExprs > 0 THEN
          DEC(subExprs);
          RETURN start;
        ELSE
          ParseError("too many `)'");
        END;
      END;
      RETURN start;
    END ParseExpr;

  (*-------------------------------------------------------------------------*)
  PROCEDURE ParseTerm() : FindExpr.T RAISES {E} =
    VAR start : FindExpr.T;
    BEGIN
      start := ParseFactor();
      IF Matches("and") THEN
        RETURN GlobTree.And(start, ParseTerm());
      ELSIF MatchesSpecial(")") THEN
        IF subExprs > 0 THEN
          DEC(subExprs);
          RETURN start;
        ELSE
          ParseError("too many `)'");
        END;
      END;
      RETURN start;
    END ParseTerm;

  (*-------------------------------------------------------------------------*)
  PROCEDURE ParseFactor() : FindExpr.T RAISES {E} =
    VAR factor : TEXT;
    BEGIN
      IF Matches("true") THEN
        RETURN GlobTree.True;
      ELSIF Matches("false") THEN
        RETURN GlobTree.False;
      ELSIF Matches("not") THEN
        RETURN GlobTree.Not(ParseFactor());
      ELSIF MatchesSpecial("(") THEN
        INC(subExprs);
        RETURN ParseExpr();
      ELSIF MatchesSpecial(")") THEN
        ParseError("unexpected `)'");
      ELSE
        factor := GetStringOrIdentifier();
        RETURN GlobTree.Match(StripREPrefix(factor),  <*NOWARN*>
                              Options(factor));
      END;
    END ParseFactor;

  (*-------------------------------------------------------------------------*)
  PROCEDURE ParseRule() RAISES {E} =
    VAR
      lhs : FindExpr.T;
      rhs : TEXT;
    BEGIN
      IF Matches("default") THEN
        IF NOT MatchesSpecial("=>") THEN
          ParseError("expected `=>'");
        END;
        rhs := GetStringOrIdentifier();
        IF rhs = NIL THEN
          ParseError("expected string or identifier");
        END;
        IF defaultSeen THEN
          ParseError("multiple default declarations");
        END;
        defaultSeen := TRUE;
        TRY
          lhs := FindExpr.New("\"*\"");
        EXCEPT ELSE END;
        self.lhs.addhi(lhs);
        self.rhs.addhi(rhs);
      ELSE
        lhs := ParseExpr();
        IF lhs = NIL THEN
          ParseError("unparseable expression");
        END;
        IF NOT MatchesSpecial("=>") THEN
          ParseError("expected `=>'");
        END;
        IF Matches("ignoredir") OR Matches("skip") THEN
          self.idirs.addhi(lhs);
        ELSE
          IF Matches("ignore") THEN
            rhs := NIL;
          ELSE
            rhs := GetStringOrIdentifier();
            IF rhs = NIL THEN
              ParseError("expected string or identifier");
            END;
          END;
          self.lhs.addhi(lhs);
          self.rhs.addhi(rhs);
        END;
      END;
    END ParseRule;

  (*-------------------------------------------------------------------------*)
  PROCEDURE ParseAll() RAISES {E} =
    BEGIN
      WHILE NOT scanner.eof() DO
        ParseRule();
      END;
    END ParseAll;

  (*-------------------------------------------------------------------------*)
  BEGIN (* New *)
    EVAL scanner.init(rd);
    scanner.skipComments    := TRUE;
    scanner.nestingComments := TRUE;
    scanner.commentOpenSym  := "(*";
    scanner.commentCloseSym := "*)";
    scanner.lineCommentSym  := "#";
    EVAL scanner.keywordToken.insert("default");
    EVAL scanner.keywordToken.insert("ignore");
    EVAL scanner.keywordToken.insert("ignoredir");
    EVAL scanner.keywordToken.insert("skip");
    EVAL scanner.keywordToken.insert("and");
    EVAL scanner.keywordToken.insert("or");
    EVAL scanner.keywordToken.insert("not");
    EVAL scanner.keywordToken.insert("true");
    EVAL scanner.keywordToken.insert("false");
    EVAL scanner.compoundToken.insert("=>");
    GetNext();
    ParseAll();
    IF NOT scanner.eof() THEN
      ParseError("premature end of file classification");
    END;
  END ParseRules;
---------------------------------------------------------------------------
PROCEDURE AddFromText(self : T; t : TEXT) RAISES {E} =
  VAR rd := TextRd.New(t);
  BEGIN
    ParseRules(self, rd);
    TRY
      Rd.Close(rd);
    EXCEPT ELSE END; (* we are not interested any more *)
  END AddFromText;
---------------------------------------------------------------------------
PROCEDURE AddFromFile(self : T; fn : Pathname.T) RAISES {E} =
  VAR rd : FileRd.T;
  BEGIN
    TRY
      rd := FileRd.Open(fn);
    EXCEPT ELSE
      RAISE E("cannot open file " & fn);
    END;
    ParseRules(self, rd);
    TRY
      Rd.Close(rd);
    EXCEPT ELSE END; (* we are not interested any more *)
  END AddFromFile;
---------------------------------------------------------------------------
PROCEDURE Patterns(self : T) : FindExprSeq.T =
  BEGIN
    RETURN self.lhs;
  END Patterns;
---------------------------------------------------------------------------
PROCEDURE ClassSpecs(self : T) : TextSeq.T =
  BEGIN
    RETURN self.rhs;
  END ClassSpecs;
---------------------------------------------------------------------------
PROCEDURE Pattern(self : T; n : CARDINAL) : FindExpr.T =
  BEGIN
    RETURN self.lhs.get(n);
  END Pattern;
---------------------------------------------------------------------------
PROCEDURE ClassSpec(self : T; n : CARDINAL) : TEXT =
  BEGIN
    RETURN self.rhs.get(n);
  END ClassSpec;
---------------------------------------------------------------------------
PROCEDURE IgnoreDir(self : T; dir : Pathname.T) : BOOLEAN =
  VAR
    match   := FALSE;
    dirlast := Pathname.Last(dir);
  BEGIN
    FOR i := 0 TO self.idirs.size() - 1 DO
      WITH expr = self.idirs.get(i) DO
        TRY
          match := expr.test(dirlast);
        EXCEPT
          RegEx.Error => (* skip *)
        END;
        IF match THEN
          RETURN TRUE;
        END;
      END;
    END;
    RETURN FALSE;
  END IgnoreDir;
---------------------------------------------------------------------------
PROCEDURE Matches(self : T; fn : Pathname.T) : INTEGER RAISES {E} =
  VAR
    rhs : TEXT;
    match : BOOLEAN;
    fnlast := Pathname.Last(fn);
  BEGIN
    FOR i := 0 TO self.lhs.size() - 1 DO
      WITH expr = self.lhs.get(i) DO
        IF expr = NIL THEN
          rhs := self.rhs.get(i);
          IF rhs = NIL THEN rhs := "NIL"; END;
          RAISE E("undefined expression for rule " &
                Fmt.Int(i) & ", rhs = `" & rhs & "'");
        END;
        TRY
          match := expr.test(fnlast);
        EXCEPT
          RegEx.Error => (* skip *)
        END;
        IF match THEN
          IF self.rhs.get(i) = NIL THEN
            (* ignore rule *)
            RETURN -1;
          ELSE
            RETURN i;
          END;
        END;
      END;
    END;
    RETURN -1;
  END Matches;
---------------------------------------------------------------------------
PROCEDURE Match(self : T; fn : Pathname.T;
                env : TextTextTbl.T := NIL) : TEXT RAISES {E} =
  VAR
    n : INTEGER;
  BEGIN
    n := Matches(self, fn);
    IF n = -1 THEN
      RETURN NIL;
    END;
    RETURN SubstClassSpec(self, n, fn, env);
  END Match;
---------------------------------------------------------------------------
PROCEDURE SubstClassSpec(self : T; n : CARDINAL; fn : Pathname.T;
                         env : TextTextTbl.T := NIL) : TEXT RAISES {E} =
  VAR res, spec : TEXT;
  BEGIN
    IF env = NIL THEN
      env := NEW(TextTextTbl.Default).init();
    END;
    IF self.uppn THEN
      EVAL env.put("fn", PathRepr.Posix(fn));
      EVAL env.put("dir", PathRepr.Posix(Pathname.Prefix(fn)));
      EVAL env.put("last", PathRepr.Posix(Pathname.Last(fn)));
      EVAL env.put("base", PathRepr.Posix(Pathname.Base(fn)));
      EVAL env.put("lastbase", PathRepr.Posix(Pathname.LastBase(fn)));
      EVAL env.put("ext", PathRepr.Posix(Pathname.LastExt(fn)));
    ELSE
      EVAL env.put("fn", fn);
      EVAL env.put("dir", Pathname.Prefix(fn));
      EVAL env.put("last", Pathname.Last(fn));
      EVAL env.put("base", Pathname.Base(fn));
      EVAL env.put("lastbase", Pathname.LastBase(fn));
      EVAL env.put("ext", Pathname.LastExt(fn));
    END;
    IF n < 0 OR n >= self.rhs.size() THEN
      RAISE E("no such classification rule");
    END;
    spec := self.rhs.get(n);
    IF spec = NIL THEN
      RETURN NIL; (* ignore rule *)
    END;
    TRY
      res := TextUtils.SubstituteVariables(spec, env);
    EXCEPT
      TextUtils.Error(e) => RAISE E("substitution failed: " & e);
    END;
    RETURN res;
  END SubstClassSpec;
---------------------------------------------------------------------------
PROCEDURE New(t : TEXT; usePosixPathnames := FALSE) : T RAISES {E} =
  VAR fc := NEW(T).init(usePosixPathnames);
  BEGIN
    fc.addFromText(t);
    RETURN fc;
  END New;
---------------------------------------------------------------------------
PROCEDURE Read(fn : Pathname.T; usePosixPathnames := FALSE) : T RAISES {E} =
  VAR fc := NEW(T).init(usePosixPathnames);
  BEGIN
    fc.addFromFile(fn);
    RETURN fc;
  END Read;

BEGIN
END FileClassification.

interface Glob is in:


interface GlobTree is in:


interface RegEx is in:


interface TextUtils is in: