Scheme Parser Combinators
; Justin Pombrio
; Parser Combinator Library
; ~2006
; Parser
(define (add-rule! name pattern-f pattern-args func)
(hash-table/put! the-parser name (cons (lambda () (apply pattern-f pattern-args)) func)))
(define (parse string rule)
(set! parser-buffer (make-buffer string 0))
(parse-rule rule))
(define pass (lambda (x) x))
(define noise (lambda (x) 'succ))
; Fail / Succ
(define (fail? x) (eqv? x 'fail))
(define (succeed? x) (eqv? x 'succ))
; Buffer
(define (make-buffer string index) (cons index string))
(define (buffer-real-index buffer) (car buffer))
(define (buffer-set-index! buffer n) (set-car! buffer n))
(define (buffer-real-string buffer) (cdr buffer))
(define (buffer-string-length buffer) (- (string-length (buffer-real-string buffer))
(buffer-real-index buffer)))
(define (buffer-string buffer) (string-tail (buffer-real-string buffer)
(buffer-real-index buffer)))
(define (buffer-inc buffer n)
(if (< (buffer-string-length buffer) n)
(error "Buffer-increment - past end of string")
(make-buffer (buffer-real-string buffer)
(+ n (buffer-real-index buffer)))))
(define (buffer-substring buffer n)
(if (< (buffer-string-length buffer) n)
(error "Buffer-substring - past end of string")
(substring (buffer-real-string buffer)
(buffer-real-index buffer)
(+ (buffer-real-index buffer) n))))
; Global
(define the-parser (make-eqv-hash-table))
(define parser-buffer (make-buffer "" 0))
#| Buffer functions
- either return new offset or 'fail
|#
(define (buffer-many-not buffer chars)
(let ((offset (string-find-next-char-in-set (buffer-string buffer) chars)))
(cond ((false? offset)
(if (= 0 (buffer-string-length buffer)) 'fail (buffer-string-length buffer)))
((= 0 offset) 'fail)
(else offset))))
(define (buffer-one buffer chars)
(cond ((= 0 (buffer-string-length buffer))
'fail)
((char-set-member? chars (string-ref (buffer-substring buffer 1) 0))
1)))
(define (buffer-many buffer chars)
(buffer-many-not buffer (char-set-invert chars)))
(define (buffer-match-string buffer string)
(if (string-prefix? string (buffer-string buffer))
(string-length string)
'fail))
(define (buffer-match-strings buffer strings)
(cond ((null? strings)
'fail)
((string-prefix? (car strings) (buffer-string buffer))
(string-length (car strings)))
(else (buffer-match-strings buffer (cdr strings)))))
(define (buffer-match-until-string buffer string)
(let ((index (string-search-forward string (buffer-string buffer))))
(if (false? index) 'fail index)))
#| Match - (match buffer pattern)
- try match pattern
if fail: return 'fail
if succ: update index & return token
|#
(define (make-matcher buffer-func)
(lambda args
(let ((offset (apply buffer-func (cons parser-buffer args))))
(if (fail? offset)
'fail
(let ((return-string (buffer-substring parser-buffer offset)))
(set! parser-buffer (buffer-inc parser-buffer offset))
return-string)))))
(define match-one-chars (make-matcher buffer-one))
(define match-chars (make-matcher buffer-many))
(define match-until-chars (make-matcher buffer-many-not))
(define match-string (make-matcher buffer-match-string))
(define match-strings (make-matcher buffer-match-strings))
(define match-until-string (make-matcher buffer-match-until-string))
#| Rule - (rule 'name (lambda () <match>) (lambda (result) <use-result>))
- Attempt <match>
if succ: return (use-result)
if fail: 'fail
|#
(define (parse-rule name)
(if (not (hash-table/get the-parser name #f))
(error "Parser - rule not defined: " name))
(let ((match-func (car (hash-table/get the-parser name #f)))
(use-func (cdr (hash-table/get the-parser name #f))))
(let ((result (match-func)))
(if (fail? result)
'fail
(use-func result)))))
#| Complete - (complete rule)
- attempt rule
if fail or buffer non-empty: 'fail
else: result
|#
(define (parse-complete rule)
(let ((result (parse-rule rule)))
(cond ((fail? result) 'fail)
((not (= (buffer-string-length parser-buffer) 0)) 'fail)
(else result))))
#| Many - (many rule)
- repeat rule until it fails
return a list of results
- if rule is not applied, return '()
|#
(define (parse-many rule)
(define (aux results)
(let ((result (parse-rule rule)))
(if (fail? result)
results
(aux (cons result results)))))
(aux '()))
#| Try - (try . rules)
- Attempt rules in order
return first non-fail/succ result
or 'fail
|#
(define (parse-try . rules)
(if (null? rules)
'fail
(let ((attempt (parse-rule (car rules))))
(if (or (fail? attempt) (succeed? attempt))
(apply parse-try (cdr rules))
attempt))))
#| Seq - (seq . rules)
- save index
- attempt rules in order
if any fails: reset index
return 'fail
if there is a non-fail/succ result, return the last one
else return 'succ
|#
(define (parse-seq . rules)
(define (aux saved-index result rules)
(if (null? rules)
result
(let ((attempt (parse-rule (car rules))))
(cond ((fail? attempt)
(buffer-set-index! parser-buffer saved-index)
'fail)
((succeed? attempt)
(aux saved-index result (cdr rules)))
(else
(aux saved-index attempt (cdr rules)))))))
(aux (buffer-real-index parser-buffer) 'succ rules))
#| SeqList - (seq . rules)
- save index
- attempt rules in order
if any fails: reset index
return 'fail
if there are non-fail/succ results, return a list of them
else return 'succ
|#
(define (parse-seq-list . rules)
(define (aux saved-index result rules)
(if (null? rules)
result
(let ((attempt (parse-rule (car rules))))
(cond ((fail? attempt)
(buffer-set-index! parser-buffer saved-index)
'fail)
((succeed? attempt)
(aux saved-index result (cdr rules)))
(else
(aux saved-index (cons attempt result) (cdr rules)))))))
(let ((return (aux (buffer-real-index parser-buffer) null rules)))
(if (null? return)
'succ
return)))