rfc2388.lisp
  1 ;;;; -*- mode: LISP; package: RFC2388 -*-
2 ;;;; Copyright (c) 2003 Janis Dzerins
3 ;;;; Modifications for TBNL Copyright (c) 2004 Michael Weber and Dr. Edmund Weitz
4 ;;;;
5 ;;;; Redistribution and use in source and binary forms, with or without
6 ;;;; modification, are permitted provided that the following conditions
7 ;;;; are met:
8 ;;;; 1. Redistributions of source code must retain the above copyright
9 ;;;; notice, this list of conditions and the following disclaimer.
10 ;;;; 2. Redistributions in binary form must reproduce the above copyright
11 ;;;; notice, this list of conditions and the following disclaimer in the
12 ;;;; documentation and/or other materials provided with the distribution.
13 ;;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
14 ;;;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
15 ;;;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
16 ;;;; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
17 ;;;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
18 ;;;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
19 ;;;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
20 ;;;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 ;;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
22 ;;;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23
24 #+xcvb (module (:depends-on ("packages")))
25
26 (in-package :rfc2388)
27
28
29
30 ;;; Utility functions
31
32
33 (defun lwsp-char-p (char)
34 "Returns true if CHAR is a linear-whitespace-char (LWSP-char). Either
35 space or tab, in short."
36 (or (char= char #\space)
37 (char= char #\tab)))
38
39
40 ;;; *** This actually belongs to RFC2046
41 ;;;
42 (defun read-until-next-boundary (stream boundary &optional discard out-stream)
43 "Reads from STREAM up to the next boundary. Returns two values: read
44 data (nil if DISCARD is true), and true if the boundary is not last
45 (i.e., there's more data)."
46 ;; Read until [CRLF]--boundary[--][transport-padding]CRLF
47 ;; States: 1 2 345 67 8 9 10
48 ;;
49 ;; *** This will WARN like crazy on some bad input -- should only do each
50 ;; warning once.
51
52 (let ((length (length boundary)))
53 (unless (<= 1 length 70)
54 (warn "Boundary has invalid length -- must be between 1 and 70, but is: ~S" length))
55 (when (lwsp-char-p (schar boundary (1- length)))
56 (warn "Boundary has trailing whitespace: ~S" boundary)))
57
58 (flet ((run (result)
59 "This one writes everything up to a boundary to RESULT stream,
60 and returns false if the closing delimiter has been read, and
61 true otherwise."
62 (let ((state 1)
63 (boundary-index 0)
64 (boundary-length (length boundary))
65 (closed nil)
66 (queued-chars (make-string 4))
67 (queue-index 0)
68 char
69 (leave-char nil))
70
71 (flet ((write-queued-chars ()
72 (dotimes (i queue-index)
73 (write-char (schar queued-chars i) result))
74 (setf queue-index 0))
75
76 (enqueue-char ()
77 (setf (schar queued-chars queue-index) char)
78 (incf queue-index)))
79
80 (loop
81
82 (if leave-char
83 (setq leave-char nil)
84 (setq char (read-char stream nil nil)))
85
86 (unless char
87 (setq closed t)
88 (return))
89
90 #-(and)
91 (format t "~&S:~D QI:~D BI:~2,'0D CH:~:[~;*~]~S~%"
92 state queue-index boundary-index leave-char char)
93
94 (case state
95 (1 ;; optional starting CR
96 (cond ((char= char #\return)
97 (enqueue-char)
98 (setq state 2))
99 ((char= char #\-)
100 (setq leave-char t
101 state 3))
102 (t
103 (write-char char result))))
104
105 (2 ;; optional starting LF
106 (cond ((char= char #\linefeed)
107 (enqueue-char)
108 (setq state 3))
109 (t
110 (write-queued-chars)
111 (setq leave-char t
112 state 1))))
113
114 (3 ;; first dash in dash-boundary
115 (cond ((char= char #\-)
116 (enqueue-char)
117 (setq state 4))
118 (t
119 (write-queued-chars)
120 (setq leave-char t
121 state 1))))
122
123 (4 ;; second dash in dash-boundary
124 (cond ((char= char #\-)
125 (enqueue-char)
126 (setq state 5))
127 (t
128 (write-queued-chars)
129 (setq leave-char t
130 state 1))))
131
132 (5 ;; boundary
133 (cond ((char= char (schar boundary boundary-index))
134 (incf boundary-index)
135 (when (= boundary-index boundary-length)
136 (setq state 6)))
137 (t
138 (write-queued-chars)
139 (write-sequence boundary result :end boundary-index)
140 (setq boundary-index 0
141 leave-char t
142 state 1))))
143
144 (6 ;; first dash in close-delimiter
145 (cond ((char= char #\-)
146 (setq state 7))
147 (t
148 (setq leave-char t)
149 (setq state 8))))
150
151 (7 ;; second dash in close-delimiter
152 (cond ((char= char #\-)
153 (setq closed t
154 state 8))
155 (t
156 ;; this is a strange situation -- only two dashes, linear
157 ;; whitespace or CR is allowed after boundary, but there was
158 ;; a single dash... One thing is clear -- this is not a
159 ;; close-delimiter. Hence this is garbage what we're looking
160 ;; at!
161 (warn "Garbage where expecting close-delimiter!")
162 (setq leave-char t)
163 (setq state 8))))
164
165 (8 ;; transport-padding (LWSP* == [#\space #\tab]*)
166 (cond ((lwsp-char-p char)
167 ;; ignore these
168 )
169 (t
170 (setq leave-char t)
171 (setq state 9))))
172
173 (9 ;; CR
174 (cond ((char= char #\return)
175 (setq state 10))
176 (t
177 (warn "Garbage where expecting CR!"))))
178
179 (10 ;; LF
180 (cond ((char= char #\linefeed)
181 ;; the end
182 (return))
183 (t
184 (warn "Garbage where expecting LF!")))))))
185 (not closed))))
186
187 (if discard
188 (let ((stream (make-broadcast-stream)))
189 (values nil (run stream)))
190 (let* ((stream (or out-stream (make-string-output-stream)))
191 (closed (run stream)))
192 (values (or out-stream (get-output-stream-string stream))
193 closed)))))
194
195
196 (defun make-tmp-file-name ()
197 (if (find-package :tbnl)
198 (funcall (find-symbol #.(string '#:make-tmp-file-name) :tbnl))
199 (error "WRITE-CONTENT-TO-FILE keyword argument to PARSE-MIME is supported in TBNL only at the moment.")))
200
201
202
203 ;;; Header parsing
204
205
206 (defstruct (header (:type list)
207 (:constructor make-header (name value parameters)))
208 name
209 value
210 parameters)
211
212
213 (defun skip-linear-whitespace (string &key (start 0) end)
214 "Returns the position of first non-linear-whitespace character in STRING
215 bound by START and END."
216 (position-if-not #'lwsp-char-p string :start start :end end))
217
218
219 (defgeneric parse-header (source &optional start-state)
220 (:documentation "Parses SOURCE and returs a single MIME header.
221
222 Header is a list of the form (NAME VALUE PARAMETERS), PARAMETERS
223 is a list of (NAME . VALUE)"))
224
225
226 (defmethod parse-header ((source string) &optional (start-state :name))
227 (with-input-from-string (in source)
228 (parse-header in start-state)))
229
230
231 ;;; *** I don't like this parser -- it will have to be rewritten when I
232 ;;; make my state-machine parser-generator macro!
233 ;;;
234 (defmethod parse-header ((stream stream) &optional (start-state :name))
235 "Returns a MIME part header, or NIL, if there is no header. Header is
236 terminated by CRLF."
237 (let ((state (ecase start-state
238 (:name 1)
239 (:value 2)
240 (:parameters 3)))
241 (result (make-string-output-stream))
242 char
243 (leave-char nil)
244 name
245 value
246 parameter-name
247 parameters)
248
249 (labels ((skip-lwsp (next-state)
250 (loop
251 do (setq char (read-char stream nil nil))
252 while (and char (lwsp-char-p char)))
253 (setq leave-char t
254 state next-state))
255
256 (collect-parameter ()
257 (push (cons parameter-name
258 (get-output-stream-string result))
259 parameters)
260 (setq parameter-name nil)
261 (skip-lwsp 3))
262
263 (token-end-char-p (char)
264 (or (char= char #\;)
265 (lwsp-char-p char))))
266
267 (loop
268
269 (if leave-char
270 (setq leave-char nil)
271 (setq char (read-char stream nil nil)))
272
273 ;; end of stream
274 (unless char
275 (return))
276
277 (when (char= #\return char)
278 (setq char (read-char stream nil nil))
279 (cond ((or (null char)
280 (char= #\linefeed char))
281 ;; CRLF ends the input
282 (return))
283 (t
284 (warn "LINEFEED without RETURN in header.")
285 (write-char #\return result)
286 (setq leave-char t))))
287
288 #-(and)
289 (format t "~&S:~,'0D CH:~:[~;*~]~S~%"
290 state leave-char char)
291
292 (ecase state
293 (1 ;; NAME
294 (cond ((char= char #\:)
295 ;; end of name
296 (setq name (get-output-stream-string result))
297 (skip-lwsp 2))
298 (t
299 (write-char char result))))
300
301 (2 ;; VALUE
302 (cond ((token-end-char-p char)
303 (setq value (get-output-stream-string result))
304 (skip-lwsp 3))
305 (t
306 (write-char char result))))
307
308 (3 ;; PARAMETER name
309 (cond ((char= #\= char)
310 (setq parameter-name (get-output-stream-string result)
311 state 4))
312 (t
313 (write-char char result))))
314
315 (4 ;; PARAMETER value start
316 (cond ((char= #\" char)
317 (setq state 5))
318 (t
319 (setq leave-char t
320 state 7))))
321
322 (5 ;; Quoted PARAMETER value
323 (cond ((char= #\" char)
324 (setq state 6))
325 (t
326 (write-char char result))))
327
328 (6 ;; End of quoted PARAMETER value
329 (cond ((token-end-char-p char)
330 (collect-parameter))
331 (t
332 ;; no space or semicolon after quoted parameter value
333 (setq leave-char t
334 state 3))))
335
336 (7 ;; Unquoted PARAMETER value
337 (cond ((token-end-char-p char)
338 (collect-parameter))
339 (t
340 (write-char char result))))))
341
342 (case state
343 (1
344 (setq name (get-output-stream-string result)))
345 (2
346 (setq value (get-output-stream-string result)))
347 ((3 4)
348 (let ((name (get-output-stream-string result)))
349 (unless (zerop (length name))
350 (warn "Parameter without value in header.")
351 (push (cons name nil) parameters))))
352 ((5 6 7)
353 (push (cons parameter-name (get-output-stream-string result)) parameters))))
354
355 (if (and (or (null name)
356 (zerop (length name)))
357 (null value)
358 (null parameters))
359 nil
360 (make-header name value parameters))))
361
362
363
364 ;;; _The_ MIME parsing
365
366
367 (defgeneric parse-mime (source boundary &key write-content-to-file)
368 (:documentation
369 "Parses MIME entities, returning them as a list. Each element in the
370 list is of form: (body headers), where BODY is the contents of MIME
371 part, and HEADERS are all headers for that part. BOUNDARY is a string
372 used to separate MIME entities."))
373
374
375 (defstruct (content-type (:type list)
376 (:constructor make-content-type (super sub)))
377 super
378 sub)
379
380
381 (defun parse-content-type (string)
382 "Returns content-type which is parsed from STRING."
383 (let ((sep-offset (position #\/ string))
384 (type (array-element-type string)))
385 (if (numberp sep-offset)
386 (make-content-type (make-array sep-offset
387 :element-type type
388 :displaced-to string)
389 (make-array (- (length string) (incf sep-offset))
390 :element-type type
391 :displaced-to string
392 :displaced-index-offset sep-offset))
393 (make-content-type string nil))))
394
395
396 (defun unparse-content-type (ct)
397 "Returns content-type CT in string representation."
398 (let ((super (content-type-super ct))
399 (sub (content-type-sub ct)))
400 (cond ((and super sub)
401 (concatenate 'string super "/" sub))
402 (t (or super "")))))
403
404 (defstruct (mime-part (:type list)
405 (:constructor make-mime-part (contents headers)))
406 contents
407 headers)
408
409
410 (defmethod parse-mime ((input string) separator &key (write-content-to-file t))
411 (with-input-from-string (stream input)
412 (parse-mime stream separator :write-content-to-file write-content-to-file)))
413
414
415 (defmethod parse-mime ((input stream) boundary &key (write-content-to-file t))
416 ;; Find the first boundary. Return immediately if it is also the last
417 ;; one.
418 (unless (nth-value 1 (read-until-next-boundary input boundary t))
419 (return-from parse-mime nil))
420
421 (let ((result ()))
422 (loop
423 (let ((headers (loop
424 for header = (parse-header input)
425 while header
426 when (string-equal "CONTENT-TYPE" (header-name header))
427 do (setf (header-value header) (parse-content-type (header-value header)))
428 collect header)))
429 (let ((file-name (get-file-name headers)))
430 (cond ((and write-content-to-file
431 file-name)
432 (let ((temp-file (make-tmp-file-name)))
433 (multiple-value-bind (text more)
434 (with-open-file (out-file (ensure-directories-exist temp-file)
435 :direction :output
436 ;; external format for faithful I/O
437 ;; see <http://cl-cookbook.sourceforge.net/io.html#faith>
438 #+(or :sbcl :lispworks :allegro :openmcl)
439 :external-format
440 #+sbcl :latin-1
441 #+:lispworks '(:latin-1 :eol-style :lf)
442 #+:allegro (excl:crlf-base-ef :latin1)
443 #+:openmcl '(:character-encoding :iso-8859-1
444 :line-termination :unix))
445 (read-until-next-boundary input boundary nil out-file))
446 (declare (ignore text))
447 (when (and (stringp file-name)
448 (plusp (length file-name)))
449 (push (make-mime-part temp-file headers) result))
450 (when (not more)
451 (return)))))
452 (t
453 (multiple-value-bind (text more)
454 (read-until-next-boundary input boundary)
455 (push (make-mime-part text headers) result)
456 (when (not more)
457 (return))))))))
458 (nreverse result)))
459
460
461 (defun find-header (label headers)
462 "Find header by label from set of headers."
463 (find label headers :key #'rfc2388:header-name :test #'string-equal))
464
465
466 (defun find-parameter (name params)
467 "Find header parameter by name from set of parameters."
468 (assoc name params :test #'string-equal))
469
470
471 (defun content-type (part &key as-string)
472 "Returns the Content-Type header of mime-part PART."
473 (let ((header (find-header "CONTENT-TYPE" (mime-part-headers part))))
474 (if header
475 (if as-string
476 (or (unparse-content-type (header-value header)) "")
477 (header-value header))
478 (when as-string ""))))
479
480
481 (defun find-content-disposition-header (headers)
482 (find-if (lambda (header)
483 (and (string-equal "CONTENT-DISPOSITION"
484 (rfc2388:header-name header))
485 (string-equal "FORM-DATA"
486 (rfc2388:header-value header))))
487 headers))
488
489
490 (defun get-file-name (headers)
491 (cdr (find-parameter "FILENAME"
492 (header-parameters (find-content-disposition-header headers)))))