primitives.lisp
  1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
2
3 ;;; Copyright (c) 2004-2010, Dr. Edmund Weitz. All rights reserved.
4
5 ;;; Redistribution and use in source and binary forms, with or without
6 ;;; modification, are permitted provided that the following conditions
7 ;;; are met:
8
9 ;;; * Redistributions of source code must retain the above copyright
10 ;;; notice, this list of conditions and the following disclaimer.
11
12 ;;; * Redistributions in binary form must reproduce the above
13 ;;; copyright notice, this list of conditions and the following
14 ;;; disclaimer in the documentation and/or other materials
15 ;;; provided with the distribution.
16
17 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
18 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 (in-package :url-rewrite)
30
31 (declaim (inline peek-char*))
32 (defun peek-char* ()
33 "PEEK-CHAR with input stream bound to *STANDARD-INPUT* and returning
34 NIL on EOF."
35 (peek-char nil nil nil))
36
37 (declaim (inline whitespacep))
38 (defun whitespacep (c)
39 "Checks whether C is a whitespace character."
40 (find c '(#\Space #\Tab #\Newline #\Linefeed #\Return #\Page)))
41
42 (declaim (inline letterp))
43 (defun letterp (c)
44 "Checks whether C is a character between A and Z
45 \(case-insensitive)."
46 (and (characterp c)
47 (or (char<= #\a c #\z)
48 (char<= #\A c #\Z))))
49
50 (declaim (inline name-char-p))
51 (defun name-char-p (c)
52 "Checks whether C is a name constituent character in the sense of
53 HTML."
54 (and (characterp c)
55 (or (letterp c)
56 (digit-char-p c)
57 (char= c #\-)
58 (char= c #\.))))
59
60 (defun comment-start-p ()
61 "Checks whether *STANDARD-OUTPUT* currently 'looks at' the string
62 \"--\". Will move the position within the stream by one unless the
63 first characters it sees is not a hyphen."
64 (unless (eql (peek-char*) #\-)
65 ;; if the first character isn't #\- we can return immediately
66 (return-from comment-start-p nil))
67 ;; otherwise read the #\- so we can check the next character
68 (read-char)
69 (eql (peek-char*) #\-))
70
71 (defun read-while (predicate &key (skip t) (write-through t))
72 "Reads characters from *STANDARD-INPUT* while PREDICATE returns a
73 true value for each character. Returns the string which was read
74 unless SKIP is true. Writes all characters read to *STANDARD-OUTPUT*
75 if WRITE-THROUGH is true. On EOF the string read so far is returned."
76 (let ((collector (or skip
77 (make-array 0
78 :element-type 'character
79 :fill-pointer t
80 :adjustable t))))
81 (handler-case
82 (loop while (funcall predicate (peek-char)) do
83 (let ((char (read-char)))
84 (when write-through
85 (write-char char))
86 (unless skip
87 (vector-push-extend char collector)))
88 finally (return (and (not skip) collector)))
89 (end-of-file ()
90 (and (not skip) collector)))))
91
92 (defun read-until (string &key (skip t) (write-through t))
93 "Reads characters from *STANDARD-INPUT* up to and including STRING.
94 Returns the string which was read \(excluding STRING) unless SKIP is
95 true. Writes all characters read to *STANDARD-OUTPUT* if
96 WRITE-THROUGH is true. On EOF the string read so far is returned."
97 (let* ((length (length string))
98 (offsets
99 ;; we first check whether some substring which starts
100 ;; STRING can be found again later in STRING - this is
101 ;; necessary because we only peek one character ahead
102 (cond ((gethash string *find-string-hash*))
103 (t (setf (gethash string *find-string-hash*)
104 ;; the resulting array of offsets is
105 ;; cached in *FIND-STRING-HASH* so we can
106 ;; use it again in case READ-UNTIL is
107 ;; called with the same STRING argument
108 (loop with offsets = (make-array length
109 :initial-element nil)
110 for i from 1 below length
111 ;; check if STRING starting from 0
112 ;; has something in common with
113 ;; STRING starting from I
114 for mismatch = (mismatch string string
115 :start1 i :test #'char=)
116 when (> mismatch i)
117 ;; if this is the case remember the
118 ;; length of the match plus the
119 ;; character which must follow in
120 ;; OFFSETS
121 do (push (cons (char string (- mismatch i))
122 (1+ (- mismatch i)))
123 (svref offsets i))
124 finally (return offsets))))))
125 (collector (or skip
126 (make-array 0
127 :element-type 'character
128 :fill-pointer t
129 :adjustable t))))
130 (handler-case
131 (loop for i = 0 then (cond (match (1+ i))
132 ;; if there is an offset (see above)
133 ;; we don't have to start from the
134 ;; beginning of STRING
135 ((cdr (assoc c (svref offsets i))))
136 (t 0))
137 for c = (peek-char)
138 for match = (char= c (char string i))
139 while (or (not match) (< (1+ i) length)) do
140 (cond (skip (read-char))
141 (t (vector-push-extend (read-char) collector)))
142 when write-through do
143 (write-char c)
144 finally (if write-through
145 (write-char (read-char))
146 (read-char))
147 (unless skip
148 ;; decrement the fill pointer because collector now also
149 ;; contains STRING itself
150 (decf (fill-pointer collector) (1- length)))
151 (return (and (not skip) collector)))
152 (end-of-file ()
153 (and (not skip) collector)))))
154