MODULE-------------------- TCP stuff --------------------; IMPORT ASCII, ConnRW, Date, Env, FloatMode, Fmt, IP, Lex, Rd, TextList, TCP, Text, TextRd, TextWr, Thread, Wr; <* FATAL FloatMode.Trap, Rd.Failure *> CONST ProxyVar = "http_proxy"; NoProxyVar = "no_proxy"; (* This is the HTTP marker for "end of this line *) EndOfRequest = "\r\n"; TYPE MethodType = {Head, Get, GetDebug, Post}; REVEAL T = ROOT BRANDED OBJECT host : TEXT; port : INTEGER; noProxy: TextList.T; END; VAR mu := NEW(MUTEX); DefaultServer: T := NIL; Web
PROCEDURE-------------------- Exported procedures --------------------ChannelPut (channel: TCP.T; text: TEXT) RAISES {Wr.Failure, Thread.Alerted} = VAR len := Text.Length(text); buf: ARRAY [0 .. 2047] OF CHAR; BEGIN <* ASSERT len < NUMBER(buf) *> Text.SetChars(buf, text); channel.put(SUBARRAY(buf, 0, len)); END ChannelPut; PROCEDURENoProxyMatch (host, suffix: TEXT): BOOLEAN = VAR hostLen := Text.Length(host); suffixLen := Text.Length(suffix); BEGIN IF hostLen < suffixLen THEN RETURN FALSE END; FOR i := 1 TO suffixLen DO IF Text.GetChar(host, hostLen - i) # Text.GetChar(suffix, suffixLen - i) THEN RETURN FALSE END; END; RETURN TRUE; END NoProxyMatch; PROCEDUREOpenTCPConnect (server: T; VAR url: TEXT): TCP.T RAISES {IP.Error, Thread.Alerted, Error} = VAR addr : IP.Address; channel : TCP.T; host := server.host; port := server.port; list := server.noProxy; urlHost, urlPath: TEXT := NIL; urlPort : INTEGER := 80; BEGIN URLHostPort(url, urlHost, urlPort, urlPath); IF server.host = NIL THEN host := urlHost; port := urlPort; url := urlPath; ELSE WHILE list # NIL DO IF NoProxyMatch(urlHost, Pop(list)) THEN host := urlHost; port := urlPort; url := urlPath; END; END; END; IF IP.GetHostByName(host, addr) THEN channel := TCP.Connect(IP.Endpoint{addr, port}); ELSE RAISE IP.Error(NIL); END; RETURN channel; END OpenTCPConnect;
port must be initialized to default value splits URL into host, port, pathname
PROCEDUREforceCache is meaningless on POST callsURLHostPort ( url : TEXT; VAR (* OUT *) host: TEXT; VAR (* IN/OUT *) port: INTEGER; VAR (* OUT *) path: TEXT ) RAISES {Thread.Alerted, Error} = VAR rd := TextRd.New(url); BEGIN TRY TRY EVAL Lex.Scan(rd, NonColon); (* throw away https:// *) Lex.Skip(rd, Seps); host := Lex.Scan(rd, NonColon); IF Rd.GetChar(rd) = ':' THEN (* port ; otherwise default *) port := Lex.Int(rd); ELSE Rd.UnGetChar(rd); END; path := Rd.GetText(rd, 300); EXCEPT Lex.Error => RAISE Error(Fmt.F("Lexing error parsing %s\n", url)); | Rd.EndOfFile => path := "/"; (* okay; probably just an https://host/ *) END; FINALLY Rd.Close(rd); END; END URLHostPort; PROCEDURESetup (proxyURL, noProxyList: TEXT := NIL): T RAISES {Error} = VAR server := NEW(T, port := 8080); rd : TextRd.T; txt, noProxy: TEXT; garbage : TEXT; <* FATAL Rd.Failure, Thread.Alerted *> BEGIN IF proxyURL = NIL THEN proxyURL := Env.Get(ProxyVar); END; IF proxyURL = NIL THEN proxyURL := DefaultProxyHost; END; IF proxyURL = NIL THEN server.host := NIL; ELSE URLHostPort(proxyURL, server.host, server.port, garbage); END; IF noProxyList = NIL THEN noProxy := Env.Get(NoProxyVar); IF noProxy = NIL THEN noProxy := DefaultNoProxyList; END; TRY rd := TextRd.New(noProxy); LOOP txt := Lex.Scan(rd, NoProxyChars); IF Text.Length(txt) = 0 THEN EXIT END; server.noProxy := TextList.Cons(txt, server.noProxy); Lex.Skip(rd, NoProxySeps); END; FINALLY Rd.Close(rd); END; END; RETURN server END Setup; PROCEDUREGet ( url : TEXT; VAR header: Header; READONLY requestFields: ARRAY OF TEXT := DefaultRequestFields; forceCache, debug: BOOLEAN := FALSE; server : T := NIL ): Rd.T RAISES {Error, Thread.Alerted, IP.Error} = BEGIN IF debug THEN RETURN InternalDoRequest(MethodType.GetDebug, url, NIL, header, requestFields, forceCache, server); ELSE RETURN InternalDoRequest(MethodType.Get, url, NIL, header, requestFields, forceCache, server); END; END Get;
PROCEDURE-------------------- Internal routine for Get, GetHead, Post --------------------Post ( url : TEXT; argString: TEXT; VAR header : Header; READONLY requestFields: ARRAY OF TEXT := DefaultRequestFields; server: T := NIL): Rd.T RAISES {Error, Thread.Alerted, IP.Error} = BEGIN RETURN InternalDoRequest(MethodType.Post, url, argString, header, requestFields, FALSE, server); END Post; PROCEDUREGetHead (url: TEXT; READONLY requestFields: ARRAY OF TEXT := DefaultRequestFields; forceCache: BOOLEAN := FALSE; server : T := NIL ): Rd.T RAISES {Error, Thread.Alerted, IP.Error} = VAR header: Header; BEGIN RETURN InternalDoRequest( MethodType.Head, url, NIL, header, requestFields, forceCache, server); END GetHead;
PROCEDURE-------------------- Fun with Lex --------------------InternalDoRequest ( method : MethodType; url : TEXT; postMethodArgs: TEXT := NIL; VAR header : Header; READONLY requestFields: ARRAY OF TEXT := DefaultRequestFields; forceCache: BOOLEAN := FALSE; server : T := NIL ): Rd.T RAISES {Error, Thread.Alerted, IP.Error} = VAR channel: TCP.T; rd : Rd.T; <* FATAL Wr.Failure *> BEGIN IF server = NIL THEN LOCK mu DO IF DefaultServer = NIL THEN DefaultServer := Setup() END; server := DefaultServer END; END; channel := OpenTCPConnect(server, url); CASE method OF | MethodType.Get, MethodType.GetDebug => ChannelPut(channel, Fmt.F("GET %s HTTP/1.0%s", url, EndOfRequest)); | MethodType.Post => ChannelPut(channel, Fmt.F("POST %s %s HTTP/1.0%s", url, postMethodArgs, EndOfRequest)); | MethodType.Head => ChannelPut(channel, Fmt.F("HEAD %s HTTP/1.0%s", url, EndOfRequest)); END; FOR i := 0 TO LAST(requestFields) DO ChannelPut(channel, Fmt.F("%s%s", requestFields[i], EndOfRequest)); END; IF forceCache THEN ChannelPut(channel, Fmt.F("Pragma: no-cache%s", EndOfRequest)); END; ChannelPut(channel, EndOfRequest); rd := ConnRW.NewRd (channel); IF method = MethodType.Head OR method = MethodType.GetDebug THEN RETURN rd; END; header := ParseHead(rd); RETURN rd; END InternalDoRequest;
CONST NonColon = SET OF CHAR{'\000'.. '~'} - SET OF CHAR{':', '/', '\n'}; ColonSpace = SET OF CHAR{' ', '\t', ':', ','}; Seps = SET OF CHAR{':', '/'}; NoProxySeps = SET OF CHAR{',', ' ', '\n'}; NoProxyChars = SET OF CHAR{'\000'.. '~'} - NoProxySeps;format is HTTP/1.0 200 OK. If header is gibberish, that means the server isn't responding.
PROCEDUREEnd of header marker is vague: should be \r\n\r\n, but experimentallySplitStatusLine (rd: Rd.T; VAR h: Header) RAISES {Thread.Alerted} = BEGIN TRY h.httpVersion := Lex.Scan(rd); Lex.Skip(rd); h.statusCode := Lex.Int(rd); Lex.Skip(rd); h.reason := Rd.GetLine(rd); Lex.Skip(rd); EXCEPT Rd.Failure, Lex.Error, Rd.EndOfFile => h.statusCode := 404; h.reason := "The information server either is not accessible or is refusing to serve the document to you."; END; END SplitStatusLine; PROCEDUREGetContentType (rd: Rd.T; VAR type: MIMEType; VAR subtype: TEXT) RAISES {Thread.Alerted, Error} = VAR t: TEXT; BEGIN TRY t := Lex.Scan(rd, SET OF CHAR{'A'.. 'Z', 'a'.. 'z'}); IF CIEqual(t, "text") THEN type := MIMEType.Text ELSIF CIEqual(t, "multipart") THEN type := MIMEType.Multipart; ELSIF CIEqual(t, "message") THEN type := MIMEType.Message; ELSIF CIEqual(t, "image") THEN type := MIMEType.Image; ELSIF CIEqual(t, "audio") THEN type := MIMEType.Audio; ELSIF CIEqual(t, "video") THEN type := MIMEType.Video; ELSIF CIEqual(t, "application") THEN type := MIMEType.Application; ELSIF CIEqual(Text.Sub(t, 0, 2), "X-") THEN type := MIMEType.Xperimental; ELSE RAISE Error( Fmt.F( "Unrecognized MIME type ``%s''in content-type field\n", t)); END; EVAL Rd.GetChar(rd); subtype := Lex.Scan(rd); EVAL Rd.GetLine(rd); EXCEPT Rd.Failure, Rd.EndOfFile => RAISE Error("Error in content-type field\n"); END; END GetContentType;
two or more carriage returns separated by linefeed
seems to be about
the best I can do.
PROCEDUREinternal ref; don't want slashParseHead (rd: Rd.T): Header RAISES {Error, Thread.Alerted} = VAR h : Header; label: TEXT; <* FATAL Rd.Failure *> BEGIN TRY SplitStatusLine(rd, h); LOOP label := Lex.Scan(rd, NonColon); Lex.Skip(rd, ColonSpace); IF label = NIL OR Text.Length(label) = 0 OR Text.GetChar(label, 0) = '\r' THEN Lex.Skip(rd); EXIT (* end of header, we hope *) ELSIF CIEqual(label, "Allowed") OR CIEqual(label, "Allow") THEN h.allowed := Rd.GetLine(rd); ELSIF CIEqual(label, "Public") THEN h.public := Rd.GetLine(rd); ELSIF CIEqual(label, "Content-Length") THEN h.contentLength := Lex.Int(rd); EVAL Rd.GetLine(rd); ELSIF CIEqual(label, "Content-Encoding") THEN h.encoding := Rd.GetLine(rd); ELSIF CIEqual(label, "Content-type") THEN GetContentType(rd, h.contentType, h.contentSubType); ELSIF CIEqual(label, "Date") THEN h.date := Rd.GetLine(rd); ELSIF CIEqual(label, "Expires") THEN h.expires := Rd.GetLine(rd); ELSIF CIEqual(label, "Last-Modified") THEN h.lastModified := Rd.GetLine(rd); ELSIF CIEqual(label, "Server") THEN h.server := Rd.GetLine(rd); ELSIF CIEqual(label, "MIME-version") THEN h.MIMEVersion := Rd.GetLine(rd); ELSIF CIEqual(label, "Title") THEN h.title := Rd.GetLine(rd); ELSIF CIEqual(label, "Location") THEN h.location := Rd.GetLine(rd); ELSE (* pitch 'em *) EVAL Rd.GetLine(rd); END; END; EXCEPT | Lex.Error, Rd.EndOfFile => RAISE Error("Error while parsing http header"); END; RETURN h; END ParseHead; PROCEDUREAbsoluteURL (url: TEXT; base: TEXT): TEXT = VAR i: INTEGER; BEGIN IF url = NIL OR Text.Empty(url) THEN RETURN NIL END; IF base = NIL OR Text.Empty(base) THEN RETURN url END; (* if the URL starts with foo:// or file:/, it's OK; otherwise, strip off the foo: *) i := Text.FindChar(url, ':'); IF i # -1 AND Text.Length(url) > i + 2 THEN IF Text.GetChar(url, i + 1) # '/' OR (Text.GetChar(url, i + 2) # '/' AND (NOT Text.Equal("file",Text.Sub(url,0,i)))) THEN url := Text.Sub(url, i + 1) END END; IF Text.GetChar(url, 0) = '/' THEN (* Relative to base's host. *) i := Text.FindChar(base, ':'); IF i # -1 AND Text.Equal("file",Text.Sub(base,0,i)) THEN url := "file:" & url; ELSE i := Text.FindChar(base, '/', i + 3); IF i = -1 THEN (* base doesn't end with a slash *) url := base & url ELSE url := Text.Sub(base, 0, i) & url END; END;
ELSIF Text.GetChar(url, 0) = '#' THEN url := base & url; ELSIF Text.FindChar(url, ':', 0) # -1 THEN (* Absolute form already. *) ELSE (* Relative to base's host and directory. *) i := Text.FindCharR(base, '/'); url := Text.Sub(base, 0, i) & "/" & url; END; (* Add trailing slash if need be. *) i := Text.FindChar(url, ':', 0); IF Text.FindChar(url, '/', i + 3) = -1 THEN url := url & "/"; END; RETURN url; END AbsoluteURL;Brute strength and ignorance.
PROCEDURE----------------------------- Date parsing -------EncodeURL (t: TEXT): TEXT RAISES {Thread.Alerted} = VAR c : CHAR; rd := TextRd.New(t); wr := TextWr.New(); <* FATAL Wr.Failure *> BEGIN TRY LOOP c := Rd.GetChar(rd); CASE c OF | ' ' => Wr.PutChar(wr, '+'); | '!' => Wr.PutText(wr, "%21"); | '"' => Wr.PutText(wr, "%22"); | '#' => Wr.PutText(wr, "%23"); | '$' => Wr.PutText(wr, "%24"); | '%' => Wr.PutText(wr, "%25"); | '&' => Wr.PutText(wr, "%26"); | '\'' => Wr.PutText(wr, "%27"); | '(' => Wr.PutText(wr, "%28"); | ')' => Wr.PutText(wr, "%29"); | '+' => Wr.PutText(wr, "%2B"); | ',' => Wr.PutText(wr, "%2C"); | '/' => Wr.PutText(wr, "%2F"); | ':' => Wr.PutText(wr, "%3A"); | ';' => Wr.PutText(wr, "%3B"); | '<' => Wr.PutText(wr, "%3C"); | '=' => Wr.PutText(wr, "%3D"); | '>' => Wr.PutText(wr, "%3E"); | '?' => Wr.PutText(wr, "%3F"); | '[' => Wr.PutText(wr, "%5B"); | '\\' => Wr.PutText(wr, "%5C"); | ']' => Wr.PutText(wr, "%5D"); | '^' => Wr.PutText(wr, "%5E"); | '{' => Wr.PutText(wr, "%7B"); | '|' => Wr.PutText(wr, "%7C"); | '}' => Wr.PutText(wr, "%7D"); | '~' => Wr.PutText(wr, "%7E"); ELSE Wr.PutChar(wr, c); END; END; EXCEPT Rd.EndOfFile => Rd.Close(rd); RETURN TextWr.ToText(wr); END; END EncodeURL;
Date format is : Monday, 12-Dec-94 04:02:35 GMT
PROCEDURE--------------Utility --------------------ToDate (t: HTMLDate): Date.T RAISES {Error, Thread.Alerted} = VAR d : Date.T; rd: Rd.T := NIL; BEGIN TRY rd := TextRd.New(t); d.weekDay := GetDay(Lex.Scan(rd)); (* includes the trailing comma *) Lex.Skip(rd); d.day := Lex.Int(rd); EVAL Rd.GetChar(rd); (* hyphen *) d.month := GetMonth(Lex.Scan(rd, SET OF CHAR{'A'.. 'Z', 'a'.. 'z'})); EVAL Rd.GetChar(rd); (* hyphen *) d.year := 1900 + Lex.Int(rd); Lex.Skip(rd); (* hyphen *) d.hour := Lex.Int(rd); EVAL Rd.GetChar(rd); d.minute := Lex.Int(rd); EVAL Rd.GetChar(rd); d.second := Lex.Int(rd); Lex.Skip(rd); d.zone := Lex.Scan(rd); (* time zone *) d.offset := 0; (* make sure it's GMT *) Rd.Close(rd); RETURN d; EXCEPT Rd.EndOfFile, Lex.Error => Rd.Close(rd); RAISE Error(Fmt.F("Error parsing date string %s\n", t)); | Error (e) => RAISE Error(Fmt.F("Error %s parsing date string %s\n", e, t)); END; END ToDate; PROCEDUREGetMonth (t: TEXT): Date.Month RAISES {Error} = CONST Months = ARRAY OF TEXT{"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; BEGIN FOR i := 0 TO LAST(Months) DO IF Text.Equal(t, Months[i]) THEN RETURN VAL(i, Date.Month); END; END; RAISE Error(Fmt.F("Can't parse month %s\n", t)); END GetMonth; PROCEDUREGetDay (t: TEXT): Date.WeekDay RAISES {Error} = CONST Days = ARRAY OF TEXT{"Sunday,", "Monday,", "Tuesday,", "Wednesday,", "Thursday,", "Friday,", "Saturday,"}; BEGIN IF t = NIL OR Text.Length(t) < 4 THEN RAISE Error("Empty or truncated day"); END; FOR i := 0 TO LAST(Days) DO IF Text.Equal(t, Days[i]) THEN RETURN VAL(i, Date.WeekDay); END; END; RAISE Error(Fmt.F("Can't parse day %s\n", t)); END GetDay;
PROCEDUREPop (VAR list: TextList.T): TEXT = VAR car := list.head; BEGIN list := list.tail; RETURN car; END Pop; PROCEDURECIEqual (t, u: TEXT): BOOLEAN RAISES {} = BEGIN IF Text.GetChar(t,0) # Text.GetChar(u,0) THEN RETURN FALSE END; VAR lt: CARDINAL := Text.Length(t); lu: CARDINAL := Text.Length(u); i : CARDINAL := 0; BEGIN IF lt = lu THEN WHILE i < lt DO IF ASCII.Upper[Text.GetChar(t,i)] # ASCII.Upper[Text.GetChar(u,i)] THEN RETURN FALSE ELSE INC(i) END; END; RETURN TRUE; ELSE RETURN FALSE END; END END CIEqual; BEGIN END Web.