xml.lisp
  1 ;;;; -*- mode: lisp -*-
2 ;;;;
3 ;;;; $Id: xml.lisp,v 1.14 2005/11/20 14:24:34 scaekenberghe Exp $
4 ;;;;
5 ;;;; This is a Common Lisp implementation of a basic but usable XML parser.
6 ;;;; The parser is non-validating and not complete (no CDATA).
7 ;;;; Namespace and entities are handled.
8 ;;;; The API into the parser is a pure functional parser hook model that comes from SSAX,
9 ;;;; see also http://pobox.com/~oleg/ftp/Scheme/xml.html or http://ssax.sourceforge.net
10 ;;;; Different DOM models are provided, an XSML, an LXML and a xml-element struct based one.
11 ;;;;
12 ;;;; Copyright (C) 2002, 2003, 2004, 2005 Sven Van Caekenberghe, Beta Nine BVBA.
13 ;;;;
14 ;;;; You are granted the rights to distribute and use this software
15 ;;;; as governed by the terms of the Lisp Lesser General Public License
16 ;;;; (http://opensource.franz.com/preamble.html), also known as the LLGPL.
17
18 (in-package :s-xml)
19
20 ;;; error reporting
21
22 (define-condition xml-parser-error (error)
23 ((message :initarg :message :reader xml-parser-error-message)
24 (args :initarg :args :reader xml-parser-error-args)
25 (stream :initarg :stream :reader xml-parser-error-stream :initform nil))
26 (:report (lambda (condition stream)
27 (format stream
28 "XML parser ~?~@[ near stream position ~d~]."
29 (xml-parser-error-message condition)
30 (xml-parser-error-args condition)
31 (and (xml-parser-error-stream condition)
32 (file-position (xml-parser-error-stream condition))))))
33 (:documentation "Thrown by the XML parser to indicate errorneous input"))
34
35 (setf (documentation 'xml-parser-error-message 'function)
36 "Get the message from an XML parser error"
37 (documentation 'xml-parser-error-args 'function)
38 "Get the error arguments from an XML parser error"
39 (documentation 'xml-parser-error-stream 'function)
40 "Get the stream from an XML parser error")
41
42 (defun parser-error (message &optional args stream)
43 (make-condition 'xml-parser-error
44 :message message
45 :args args
46 :stream stream))
47
48 ;;; utilities
49
50 (defun whitespace-char-p (char)
51 "Is char an XML whitespace character ?"
52 (or (char= char #\space)
53 (char= char #\tab)
54 (char= char #\return)
55 (char= char #\linefeed)))
56
57 (defun identifier-char-p (char)
58 "Is char an XML identifier character ?"
59 (or (and (char<= #\A char) (char<= char #\Z))
60 (and (char<= #\a char) (char<= char #\z))
61 (and (char<= #\0 char) (char<= char #\9))
62 (char= char #\-)
63 (char= char #\_)
64 (char= char #\.)
65 (char= char #\:)))
66
67 (defun skip-whitespace (stream)
68 "Skip over XML whitespace in stream, return first non-whitespace
69 character which was peeked but not read, return nil on eof"
70 (loop
71 (let ((char (peek-char nil stream nil nil)))
72 (if (and char (whitespace-char-p char))
73 (read-char stream)
74 (return char)))))
75
76 (defun make-extendable-string (&optional (size 10))
77 "Make an extendable string which is a one-dimensional character
78 array which is adjustable and has a fill pointer"
79 (make-array size
80 :element-type 'character
81 :adjustable t
82 :fill-pointer 0))
83
84 (defun print-string-xml (string stream &key (start 0) end)
85 "Write the characters of string to stream using basic XML conventions"
86 (loop for offset upfrom start below (or end (length string))
87 for char = (char string offset)
88 do (case char
89 (#\& (write-string "&amp;" stream))
90 (#\< (write-string "&lt;" stream))
91 (#\> (write-string "&gt;" stream))
92 (#\" (write-string "&quot;" stream))
93 ((#\newline #\return #\tab) (write-char char stream))
94 (t (if (and (<= 32 (char-code char))
95 (<= (char-code char) 126))
96 (write-char char stream)
97 (progn
98 (write-string "&#x" stream)
99 (write (char-code char) :stream stream :base 16)
100 (write-char #\; stream)))))))
101
102 (defun make-standard-entities ()
103 "A hashtable mapping XML entity names to their replacement strings,
104 filled with the standard set"
105 (let ((entities (make-hash-table :test #'equal)))
106 (setf (gethash "amp" entities) (string #\&)
107 (gethash "quot" entities) (string #\")
108 (gethash "apos" entities) (string #\')
109 (gethash "lt" entities) (string #\<)
110 (gethash "gt" entities) (string #\>)
111 (gethash "nbsp" entities) (string #\space))
112 entities))
113
114 (defun resolve-entity (stream extendable-string entities &optional (entity (make-extendable-string)))
115 "Read and resolve an XML entity from stream, positioned after the '&' entity marker,
116 accepting &name; &#DEC; and &#xHEX; formats,
117 destructively modifying string, which is also returned,
118 destructively modifying entity, incorrect entity formats result in errors"
119 (loop
120 (let ((char (read-char stream nil nil)))
121 (cond ((null char) (error (parser-error "encountered eof before end of entity")))
122 ((char= #\; char) (return))
123 (t (vector-push-extend char entity)))))
124 (if (char= (char entity 0) #\#)
125 (let ((code (if (char= (char entity 1) #\x)
126 (parse-integer entity :start 2 :radix 16 :junk-allowed t)
127 (parse-integer entity :start 1 :radix 10 :junk-allowed t))))
128 (when (null code)
129 (error (parser-error "encountered incorrect entity &~s;" (list entity) stream)))
130 (vector-push-extend (code-char code) extendable-string))
131 (let ((value (gethash entity entities)))
132 (if value
133 (loop :for char :across value
134 :do (vector-push-extend char extendable-string))
135 (error (parser-error "encountered unknown entity &~s;" (list entity) stream)))))
136 extendable-string)
137
138 ;;; namespace support
139
140 (defvar *ignore-namespaces* nil
141 "When t, namespaces are ignored like in the old version of S-XML")
142
143 (defclass xml-namespace ()
144 ((uri :documentation "The URI used to identify this namespace"
145 :accessor get-uri
146 :initarg :uri)
147 (prefix :documentation "The preferred prefix assigned to this namespace"
148 :accessor get-prefix
149 :initarg :prefix
150 :initform nil)
151 (package :documentation "The Common Lisp package where this namespace's symbols are interned"
152 :accessor get-package
153 :initarg :package
154 :initform nil))
155 (:documentation "Describes an XML namespace and how it is handled"))
156
157 (defmethod print-object ((object xml-namespace) stream)
158 (print-unreadable-object (object stream :type t :identity t)
159 (format stream "~A - ~A" (get-prefix object) (get-uri object))))
160
161 (defvar *local-namespace* (make-instance 'xml-namespace
162 :uri "local"
163 :prefix ""
164 :package (find-package :keyword))
165 "The local (global default) XML namespace")
166
167 (defvar *xml-namespace* (make-instance 'xml-namespace
168 :uri "http://www.w3.org/XML/1998/namespace"
169 :prefix "xml"
170 :package (or (find-package :xml)
171 (make-package :xml :nicknames '("XML"))))
172 "REC-xml-names-19990114 says the prefix xml is bound to the namespace http://www.w3.org/XML/1998/namespace.")
173
174 (defvar *known-namespaces* (list *local-namespace* *xml-namespace*)
175 "The list of known/defined namespaces")
176
177 (defvar *namespaces* `(("xml" . ,*xml-namespace*) ("" . ,*local-namespace*))
178 "Ordered list of (prefix . XML-namespace) bindings currently in effect - special variable")
179
180 (defun find-namespace (uri)
181 "Find a registered XML namespace identified by uri"
182 (find uri *known-namespaces* :key #'get-uri :test #'string-equal))
183
184 (defun register-namespace (uri prefix package)
185 "Register a new or redefine an existing XML namespace defined by uri with prefix and package"
186 (let ((namespace (find-namespace uri)))
187 (if namespace
188 (setf (get-prefix namespace) prefix
189 (get-package namespace) (find-package package))
190 (push (setf namespace (make-instance 'xml-namespace
191 :uri uri
192 :prefix prefix
193 :package (find-package package)))
194 *known-namespaces*))
195 namespace))
196
197 (defun find-namespace-binding (prefix namespaces)
198 "Find the XML namespace currently bound to prefix in the namespaces bindings"
199 (cdr (assoc prefix namespaces :test #'string-equal)))
200
201 (defun split-identifier (identifier)
202 "Split an identifier 'prefix:name' and return (values prefix name)"
203 (when (symbolp identifier)
204 (setf identifier (symbol-name identifier)))
205 (let ((colon-position (position #\: identifier :test #'char=)))
206 (if colon-position
207 (values (subseq identifier 0 colon-position)
208 (subseq identifier (1+ colon-position)))
209 (values nil identifier))))
210
211 (defvar *require-existing-symbols* nil
212 "If t, each XML identifier must exist as symbol already")
213
214 (defvar *auto-export-symbols* t
215 "If t, export newly interned symbols form their packages")
216
217 (defun resolve-identifier (identifier namespaces &optional as-attribute)
218 "Resolve the string identifier in the list of namespace bindings"
219 (if *ignore-namespaces*
220 (intern identifier :keyword)
221 (flet ((intern-symbol (string package) ; intern string as a symbol in package
222 (if *require-existing-symbols*
223 (let ((symbol (find-symbol string package)))
224 (or symbol
225 (error "Symbol ~s does not exist in ~s" string package)))
226 (let ((symbol (intern string package)))
227 (when (and *auto-export-symbols*
228 (not (eql package (find-package :keyword))))
229 (export symbol package))
230 symbol))))
231 (multiple-value-bind (prefix name)
232 (split-identifier identifier)
233 (if (or (null prefix) (string= prefix "xmlns"))
234 (if as-attribute
235 (intern (if (string= prefix "xmlns") identifier name) (get-package *local-namespace*))
236 (let ((default-namespace (find-namespace-binding "" namespaces)))
237 (intern-symbol name (get-package default-namespace))))
238 (let ((namespace (find-namespace-binding prefix namespaces)))
239 (if namespace
240 (intern-symbol name (get-package namespace))
241 (error "namespace not found for prefix ~s" prefix))))))))
242
243 (defvar *auto-create-namespace-packages* t
244 "If t, new packages will be created for namespaces, if needed, named by the prefix")
245
246 (defun new-namespace (uri &optional prefix)
247 "Register a new namespace for uri and prefix, creating a package if necessary"
248 (if prefix
249 (register-namespace uri
250 prefix
251 (or (find-package prefix)
252 (if *auto-create-namespace-packages*
253 (make-package prefix :nicknames `(,(string-upcase prefix)))
254 (error "Cannot find or create package ~s" prefix))))
255 (let ((unique-name (loop :for i :upfrom 0
256 :do (let ((name (format nil "ns-~d" i)))
257 (when (not (find-package name))
258 (return name))))))
259 (register-namespace uri
260 unique-name
261 (if *auto-create-namespace-packages*
262 (make-package (string-upcase unique-name) :nicknames `(,unique-name))
263 (error "Cannot create package ~s" unique-name))))))
264
265 (defun extend-namespaces (attributes namespaces)
266 "Given possible 'xmlns[:prefix]' attributes, extend the namespaces bindings"
267 (unless *ignore-namespaces*
268 (let (default-namespace-uri)
269 (loop :for (key . value) :in attributes
270 :do (if (string= key "xmlns")
271 (setf default-namespace-uri value)
272 (multiple-value-bind (prefix name)
273 (split-identifier key)
274 (when (string= prefix "xmlns")
275 (let* ((uri value)
276 (prefix name)
277 (namespace (find-namespace uri)))
278 (unless namespace
279 (setf namespace (new-namespace uri prefix)))
280 (push `(,prefix . ,namespace) namespaces))))))
281 (when default-namespace-uri
282 (let ((namespace (find-namespace default-namespace-uri)))
283 (unless namespace
284 (setf namespace (new-namespace default-namespace-uri)))
285 (push `("" . ,namespace) namespaces)))))
286 namespaces)
287
288 (defun print-identifier (identifier stream &optional as-attribute)
289 "Print identifier on stream using namespace conventions"
290 (declare (ignore as-attribute) (special *namespaces*))
291 (if *ignore-namespaces*
292 (princ identifier stream)
293 (if (symbolp identifier)
294 (let ((package (symbol-package identifier))
295 (name (symbol-name identifier)))
296 (let* ((namespace (find package *known-namespaces* :key #'get-package))
297 (prefix (or (car (find namespace *namespaces* :key #'cdr))
298 (get-prefix namespace))))
299 (if (string= prefix "")
300 (princ name stream)
301 (format stream "~a:~a" prefix name))))
302 (princ identifier stream))))
303
304 ;;; the parser state
305
306 (defclass xml-parser-state ()
307 ((entities :documentation "A hashtable mapping XML entity names to their replacement stings"
308 :accessor get-entities
309 :initarg :entities
310 :initform (make-standard-entities))
311 (seed :documentation "The user seed object"
312 :accessor get-seed
313 :initarg :seed
314 :initform nil)
315 (buffer :documentation "The main reusable character buffer"
316 :accessor get-buffer
317 :initform (make-extendable-string))
318 (mini-buffer :documentation "The secondary, smaller reusable character buffer"
319 :accessor get-mini-buffer
320 :initform (make-extendable-string))
321 (new-element-hook :documentation "Called when new element starts"
322 ;; Handle the start of a new xml element with name and attributes,
323 ;; receiving seed from previous element (sibling or parent)
324 ;; return seed to be used for first child (content)
325 ;; or directly to finish-element-hook
326 :accessor get-new-element-hook
327 :initarg :new-element-hook
328 :initform #'(lambda (name attributes seed)
329 (declare (ignore name attributes))
330 seed))
331 (finish-element-hook :documentation "Called when element ends"
332 ;; Handle the end of an xml element with name and attributes,
333 ;; receiving parent-seed, the seed passed to us when this element started,
334 ;; i.e. passed to our corresponding new-element-hook
335 ;; and receiving seed from last child (content)
336 ;; or directly from new-element-hook
337 ;; return final seed for this element to next element (sibling or parent)
338 :accessor get-finish-element-hook
339 :initarg :finish-element-hook
340 :initform #'(lambda (name attributes parent-seed seed)
341 (declare (ignore name attributes parent-seed))
342 seed))
343 (text-hook :documentation "Called when text is found"
344 ;; Handle text in string, found as contents,
345 ;; receiving seed from previous element (sibling or parent),
346 ;; return final seed for this element to next element (sibling or parent)
347 :accessor get-text-hook
348 :initarg :text-hook
349 :initform #'(lambda (string seed)
350 (declare (ignore string))
351 seed)))
352 (:documentation "The XML parser state passed along all code making up the parser"))
353
354 (setf (documentation 'get-seed 'function)
355 "Get the initial user seed of an XML parser state"
356 (documentation 'get-entities 'function)
357 "Get the entities hashtable of an XML parser state"
358 (documentation 'get-new-element-hook 'function)
359 "Get the new element hook of an XML parser state"
360 (documentation 'get-finish-element-hook 'function)
361 "Get the finish element hook of an XML parser state"
362 (documentation 'get-text-hook 'function)
363 "Get the text hook of an XML parser state")
364
365 #-allegro
366 (setf (documentation '(setf get-seed) 'function)
367 "Set the initial user seed of an XML parser state"
368 (documentation '(setf get-entities) 'function)
369 "Set the entities hashtable of an XML parser state"
370 (documentation '(setf get-new-element-hook) 'function)
371 "Set the new element hook of an XML parser state"
372 (documentation '(setf get-finish-element-hook) 'function)
373 "Set the finish element hook of an XML parser state"
374 (documentation '(setf get-text-hook) 'function)
375 "Set the text hook of an XML parser state")
376
377 (defmethod get-mini-buffer :after ((state xml-parser-state))
378 "Reset and return the reusable mini buffer"
379 (with-slots (mini-buffer) state
380 (setf (fill-pointer mini-buffer) 0)))
381
382 (defmethod get-buffer :after ((state xml-parser-state))
383 "Reset and return the main reusable buffer"
384 (with-slots (buffer) state
385 (setf (fill-pointer buffer) 0)))
386
387 ;;; parser support
388
389 (defun parse-whitespace (stream extendable-string)
390 "Read and collect XML whitespace from stream in string which is
391 destructively modified, return first non-whitespace character which
392 was peeked but not read, return nil on eof"
393 (loop
394 (let ((char (peek-char nil stream nil nil)))
395 (if (and char (whitespace-char-p char))
396 (vector-push-extend (read-char stream) extendable-string)
397 (return char)))))
398
399 (defun parse-string (stream state &optional (string (make-extendable-string)))
400 "Read and return an XML string from stream, delimited by either
401 single or double quotes, the stream is expected to be on the opening
402 delimiter, at the end the closing delimiter is also read, entities
403 are resolved, eof before end of string is an error"
404 (let ((delimiter (read-char stream nil nil))
405 (char))
406 (when (or (null delimiter) (not (or (char= delimiter #\') (char= delimiter #\"))))
407 (error (parser-error "expected string delimiter" nil stream)))
408 (loop
409 (setf char (read-char stream nil nil))
410 (cond ((null char) (error (parser-error "encountered eof before end of string")))
411 ((char= char delimiter) (return))
412 ((char= char #\&) (resolve-entity stream string (get-entities state) (get-mini-buffer state)))
413 (t (vector-push-extend char string))))
414 string))
415
416 (defun parse-text (stream state extendable-string)
417 "Read and collect XML text from stream in string which is
418 destructively modified, the text ends with a '<', which is peeked and
419 returned, entities are resolved, eof is considered an error"
420 (let (char)
421 (loop
422 (setf char (peek-char nil stream nil nil))
423 (when (null char) (error (parser-error "encountered unexpected eof in text")))
424 (when (char= char #\<) (return))
425 (read-char stream)
426 (if (char= char #\&)
427 (resolve-entity stream extendable-string (get-entities state) (get-mini-buffer state))
428 (vector-push-extend char extendable-string)))
429 char))
430
431 (defun parse-identifier (stream &optional (identifier (make-extendable-string)))
432 "Read and returns an XML identifier from stream, positioned at the
433 start of the identifier, ending with the first non-identifier
434 character, which is peeked, the identifier is written destructively
435 into identifier which is also returned"
436 (loop
437 (let ((char (peek-char nil stream nil nil)))
438 (cond ((and char (identifier-char-p char))
439 (read-char stream)
440 (vector-push-extend char identifier))
441 (t
442 (return identifier))))))
443
444 (defun skip-comment (stream)
445 "Skip an XML comment in stream, positioned after the opening '<!--',
446 consumes the closing '-->' sequence, unexpected eof or a malformed
447 closing sequence result in a error"
448 (let ((dashes-to-read 2))
449 (loop
450 (if (zerop dashes-to-read) (return))
451 (let ((char (read-char stream nil nil)))
452 (if (null char)
453 (error (parser-error "encountered unexpected eof for comment")))
454 (if (char= char #\-)
455 (decf dashes-to-read)
456 (setf dashes-to-read 2)))))
457 (if (char/= (read-char stream nil nil) #\>)
458 (error (parser-error "expected > ending comment" nil stream))))
459
460 (defun read-cdata (stream state &optional (string (make-extendable-string)))
461 "Reads in the CDATA and calls the callback for CDATA if it exists"
462 ;; we already read the <![CDATA[ stuff
463 ;; continue to read until we hit ]]>
464 (let ((char #\space)
465 (last-3-characters (list #\[ #\A #\T))
466 (pattern (list #\> #\] #\])))
467 (loop
468 (setf char (read-char stream nil nil))
469 (when (null char) (error (parser-error "encountered unexpected eof in text")))
470 (push char last-3-characters)
471 (setf (cdddr last-3-characters) nil)
472 (cond
473 ((equal last-3-characters
474 pattern)
475 (setf (fill-pointer string)
476 (- (fill-pointer string) 2))
477 (setf (get-seed state)
478 (funcall (get-text-hook state)
479 (copy-seq string)
480 (get-seed state)))
481 (return-from read-cdata))
482 (t
483 (vector-push-extend char string))))))
484
485 (defun skip-special-tag (stream state)
486 "Skip an XML special tag (comments and processing instructions) in
487 stream, positioned after the opening '<', unexpected eof is an error"
488 ;; opening < has been read, consume ? or !
489 (read-char stream)
490 (let ((char (read-char stream nil nil)))
491 ;; see if we are dealing with a comment
492 (when (char= char #\-)
493 (setf char (read-char stream nil nil))
494 (when (char= char #\-)
495 (skip-comment stream)
496 (return-from skip-special-tag)))
497 ;; maybe we are dealing with CDATA?
498 (when (and (char= char #\[)
499 (loop :for pattern :across "CDATA["
500 :for char = (read-char stream nil nil)
501 :when (null char) :do
502 (error (parser-error "encountered unexpected eof in cdata"))
503 :always (char= char pattern)))
504 (read-cdata stream state (get-buffer state))
505 (return-from skip-special-tag))
506 ;; loop over chars, dealing with strings (skipping their content)
507 ;; and counting opening and closing < and > chars
508 (let ((taglevel 1)
509 (string-delimiter))
510 (loop
511 (when (zerop taglevel) (return))
512 (setf char (read-char stream nil nil))
513 (when (null char)
514 (error (parser-error "encountered unexpected eof for special (! or ?) tag" nil stream)))
515 (if string-delimiter
516 ;; inside a string we only look for a closing string delimiter
517 (when (char= char string-delimiter)
518 (setf string-delimiter nil))
519 ;; outside a string we count < and > and watch out for strings
520 (cond ((or (char= char #\') (char= char #\")) (setf string-delimiter char))
521 ((char= char #\<) (incf taglevel))
522 ((char= char #\>) (decf taglevel))))))))
523
524 ;;; the XML parser proper
525
526 (defun parse-xml-element-attributes (stream state)
527 "Parse XML element attributes from stream positioned after the tag
528 identifier, returning the attributes as an assoc list, ending at
529 either a '>' or a '/' which is peeked and also returned"
530 (declare (special *namespaces*))
531 (let (char attributes)
532 (loop
533 ;; skip whitespace separating items
534 (setf char (skip-whitespace stream))
535 ;; start tag attributes ends with > or />
536 (when (and char (or (char= char #\>) (char= char #\/))) (return))
537 ;; read the attribute key
538 (let ((key (copy-seq (parse-identifier stream (get-mini-buffer state)))))
539 ;; skip separating whitespace
540 (setf char (skip-whitespace stream))
541 ;; require = sign (and consume it if present)
542 (if (and char (char= char #\=))
543 (read-char stream)
544 (error (parser-error "expected =" nil stream)))
545 ;; skip separating whitespace
546 (skip-whitespace stream)
547 ;; read the attribute value as a string
548 (push (cons key (copy-seq (parse-string stream state (get-buffer state))))
549 attributes)))
550 ;; return attributes peek char ending loop
551 (values attributes char)))
552
553 (defun parse-xml-element (stream state)
554 "Parse and return an XML element from stream, positioned after the opening '<'"
555 (declare (special *namespaces*))
556 ;; opening < has been read
557 (when (char= (peek-char nil stream nil nil) #\!)
558 (skip-special-tag stream state)
559 (return-from parse-xml-element))
560 (let (char buffer open-tag parent-seed has-children)
561 (setf parent-seed (get-seed state))
562 ;; read tag name (no whitespace between < and name ?)
563 (setf open-tag (copy-seq (parse-identifier stream (get-mini-buffer state))))
564 ;; tag has been read, read attributes if any
565 (multiple-value-bind (attributes peeked-char)
566 (parse-xml-element-attributes stream state)
567 (let ((*namespaces* (extend-namespaces attributes *namespaces*)))
568 (setf open-tag (resolve-identifier open-tag *namespaces*)
569 attributes (loop :for (key . value) :in attributes
570 :collect (cons (resolve-identifier key *namespaces* t) value)))
571 (setf (get-seed state) (funcall (get-new-element-hook state)
572 open-tag attributes (get-seed state)))
573 (setf char peeked-char)
574 (when (char= char #\/)
575 ;; handle solitary tag of the form <tag .. />
576 (read-char stream)
577 (setf char (read-char stream nil nil))
578 (if (char= #\> char)
579 (progn
580 (setf (get-seed state) (funcall (get-finish-element-hook state)
581 open-tag attributes parent-seed (get-seed state)))
582 (return-from parse-xml-element))
583 (error (parser-error "expected >" nil stream))))
584 ;; consume >
585 (read-char stream)
586 (loop
587 (setf buffer (get-buffer state))
588 ;; read whitespace into buffer
589 (setf char (parse-whitespace stream buffer))
590 ;; see what ended the whitespace scan
591 (cond ((null char) (error (parser-error "encountered unexpected eof handling ~a" (list open-tag))))
592 ((char= char #\<)
593 ;; consume the <
594 (read-char stream)
595 (if (char= (peek-char nil stream nil nil) #\/)
596 (progn
597 ;; handle the matching closing tag </tag> and done
598 ;; if we read whitespace as this (leaf) element's contents, it is significant
599 (when (and (not has-children) (plusp (length buffer)))
600 (setf (get-seed state) (funcall (get-text-hook state)
601 (copy-seq buffer) (get-seed state))))
602 (read-char stream)
603 (let ((close-tag (resolve-identifier (parse-identifier stream (get-mini-buffer state))
604 *namespaces*)))
605 (unless (eq open-tag close-tag)
606 (error (parser-error "found <~a> not matched by </~a> but by <~a>"
607 (list open-tag open-tag close-tag) stream)))
608 (unless (char= (read-char stream nil nil) #\>)
609 (error (parser-error "expected >" nil stream)))
610 (setf (get-seed state) (funcall (get-finish-element-hook state)
611 open-tag attributes parent-seed (get-seed state))))
612 (return))
613 ;; handle child tag and loop, no hooks to call here
614 ;; whitespace between child elements is skipped
615 (progn
616 (setf has-children t)
617 (parse-xml-element stream state))))
618 (t
619 ;; no child tag, concatenate text to whitespace in buffer
620 ;; handle text content and loop
621 (setf char (parse-text stream state buffer))
622 (setf (get-seed state) (funcall (get-text-hook state)
623 (copy-seq buffer) (get-seed state))))))))))
624
625 (defun start-parse-xml (stream &optional (state (make-instance 'xml-parser-state)))
626 "Parse and return a toplevel XML element from stream, using parser state"
627 (loop
628 (let ((char (skip-whitespace stream)))
629 (when (null char) (return-from start-parse-xml))
630 ;; skip whitespace until start tag
631 (unless (char= char #\<)
632 (error (parser-error "expected <" nil stream)))
633 (read-char stream) ; consume peeked char
634 (setf char (peek-char nil stream nil nil))
635 (if (or (char= char #\!) (char= char #\?))
636 ;; deal with special tags
637 (skip-special-tag stream state)
638 (progn
639 ;; read the main element
640 (parse-xml-element stream state)
641 (return-from start-parse-xml (get-seed state)))))))
642
643 ;;;; eof