m3core/src/text/String16.m3


 Copyright 1996-2000, Critical Mass, Inc.  All rights reserved. 
 See file COPYRIGHT-CMASS for details. 

UNSAFE MODULE String16;

IMPORT Word;

TYPE Ptr = UNTRACED REF WIDECHAR;

PROCEDURE Equal (a, b: ADDRESS;  len: CARDINAL): BOOLEAN =
Return TRUE if the strings of len characters starting at a and b have the same (case-sensitive) contents.
 PRE: a MOD ADRSIZE(WIDECHAR) = 0 AND b MOD ADRSIZE(WIDECHAR) = 0  
  
VAR pa := LOOPHOLE (a, Ptr);  pb := LOOPHOLE (b, Ptr);
  BEGIN
    IF (len = 0) THEN RETURN TRUE; END;
    IF (a = NIL) OR (b = NIL) THEN RETURN FALSE; END;
    WHILE (len > 0) AND (pa^ = pb^) DO
      DEC (len);  INC (pa, ADRSIZE (pa^));  INC (pb, ADRSIZE (pb^));
    END;
    RETURN (len = 0);
  END Equal;

PROCEDURE Compare (a: ADDRESS;  len_a: CARDINAL;
                   b: ADDRESS;  len_b: CARDINAL): [-1..1] =
Return -1 if string a occurs before string b, 0 if the strings are equal, and +1 if a occurs after b in lexicographic order.
 PRE: a MOD ADRSIZE(WIDECHAR) = 0 AND b MOD ADRSIZE(WIDECHAR) = 0  
  
CONST Map = ARRAY BOOLEAN OF [-1..+1] { +1, -1 };
  VAR
    pa  := LOOPHOLE (a, Ptr);
    pb  := LOOPHOLE (b, Ptr);
    len := MIN (len_a, len_b);
  BEGIN
    WHILE (len > 0) AND (pa^ = pb^) DO
      DEC (len);  INC (pa, ADRSIZE (pa^));  INC (pb, ADRSIZE (pb^));
    END;
    IF    (len # 0)       THEN RETURN Map [pa^ < pb^];
    ELSIF (len_a # len_b) THEN RETURN Map [len_a < len_b];
    ELSE                       RETURN 0;
    END;
  END Compare;

PROCEDURE Hash (a: ADDRESS;  len: CARDINAL;  initial: INTEGER): INTEGER =
Return a hash function of the contents of string a starting with the value initial.
 PRE: a MOD ADRSIZE(WIDECHAR) = 0  
  
VAR p := LOOPHOLE (a, Ptr);  result := initial;
  BEGIN
    IF (p = NIL) THEN RETURN result; END;
    WHILE (len > 0) DO
      result := Word.Xor (Word.LeftRotate (result, 13), ORD (p^));
      INC (p, ADRSIZE (p^));  DEC (len);
    END;
    RETURN result;
  END Hash;

PROCEDURE FindChar (a: ADDRESS;  len: CARDINAL;  c: WIDECHAR): INTEGER =
If c = a[i] for some i in [0~..~len-1], return the smallest such i; otherwise, return -1.
 PRE: a MOD ADRSIZE(WIDECHAR) = 0  
  
VAR p := LOOPHOLE (a, Ptr);  cnt := len;
  BEGIN
    IF (p = NIL) THEN RETURN -1; END;
    WHILE (cnt > 0) DO
      IF (p^ = c) THEN RETURN len - cnt; END;
      INC (p, ADRSIZE (p^));  DEC (cnt);
    END;
    RETURN -1;
  END FindChar;

PROCEDURE FindCharR (a: ADDRESS;  len: CARDINAL;  c: WIDECHAR): INTEGER =
If c = a[i] for some i in [0~..~len-1], return the largest such i; otherwise, return -1.
 PRE: a MOD ADRSIZE(WIDECHAR) = 0  
  
VAR p := LOOPHOLE (a, Ptr);
  BEGIN
    IF (p = NIL) THEN RETURN -1; END;
    INC (p, len * ADRSIZE (p^));
    WHILE (len > 0) DO
      DEC (p, ADRSIZE (p^));  DEC (len);
      IF (p^ = c) THEN RETURN len; END;
    END;
    RETURN -1;
  END FindCharR;

CONST WideCharBitsize = BITSIZE (WIDECHAR);
CONST WideCharsPerWord = BITSIZE (Word.T) DIV BITSIZE (WIDECHAR);
CONST CharMask = Word.Not (16_FF);
CONST CharMask1 = Word.Not (Word.Shift (16_FF, WideCharBitsize));
CONST CharMask2 = Word.Not (Word.Shift (16_FF, 2*WideCharBitsize));
CONST CharMask3 = Word.Not (Word.Shift (16_FF, 3*WideCharBitsize));
CONST CharsInWordMask = Word.And ( Word.And (CharMask, CharMask1)
                                 , Word.And (CharMask2, CharMask3)
                                 );

TYPE WordPtr = UNTRACED REF Word.T;

PROCEDURE HasWideChars (a: ADDRESS; len: CARDINAL): BOOLEAN =
Return ORD(a[i]) > LAST (CHAR), for some i in [0~..~len-1].
 PRE: a MOD ADRSIZE(WIDECHAR) = 0 
 This algorithm assumes ADRSIZE(Word.T) MOD ADRSIZE(WIDECHAR) = 0.   
 This algorithm assumes FIRST(CHAR) = 0 AND LAST(CHAR) = 16_FF.   
  
VAR p := LOOPHOLE (a, Ptr);
  BEGIN
    IF (p = NIL) THEN RETURN FALSE; END;
    (* Do non-word-aligned WIDECHARs at the left. *)
    WHILE Word.Mod (LOOPHOLE(p, Word.T), ADRSIZE (Word.T)) # 0 AND len > 0 DO
      IF Word.And (ORD (p^), CharMask) # 0 THEN RETURN TRUE; END;
      INC (p, ADRSIZE (p^));
      DEC (len);
    END;
    (* Do word-aligned WIDECHARs in the middle. *)
    WHILE len >= WideCharsPerWord DO
      IF Word.And (ORD (LOOPHOLE (p, WordPtr)^), CharsInWordMask) # 0 THEN
        RETURN TRUE;
      END;
      INC (p, ADRSIZE (Word.T));
      DEC (len, WideCharsPerWord);
    END;
    (* Do remaining non-word-aligned WIDECHARs at the right. *)
    WHILE len > 0 DO
      IF Word.And (ORD (p^), CharMask) # 0 THEN RETURN TRUE; END;
      INC (p, ADRSIZE (p^));
      DEC (len);
    END;
    RETURN FALSE;
  END HasWideChars;

PROCEDURE ArrayStart (READONLY a: ARRAY OF WIDECHAR): ADDRESS =
Returns the address of the first character of a if it is non-empty, otherwise returns NIL. WARNING: the returned address is only valid as long as a does not move. To prevent heap allocated arrays from moving, keep the returned address on the stack.
  BEGIN
    IF NUMBER (a) < 1 THEN RETURN NIL; END;
    RETURN ADR (a[0]);
  END ArrayStart;

BEGIN
END String16.