Justin Pombrio

What we preceive as reality is a construct of the mind.

Scheme Parser Combinators

; Justin Pombrio
; Parser Combinator Library
; ~2006

; Parser

(define (add-rule! name pattern-f pattern-args func)
  (hash-table/put! the-parser name (cons (lambda () (apply pattern-f pattern-args)) func)))

(define (parse string rule)
  (set! parser-buffer (make-buffer string 0))
  (parse-rule rule))

(define pass (lambda (x) x))
(define noise (lambda (x) 'succ))

; Fail / Succ

(define (fail? x) (eqv? x 'fail))
(define (succeed? x) (eqv? x 'succ))

; Buffer

(define (make-buffer string index) (cons index string))
(define (buffer-real-index buffer) (car buffer))
(define (buffer-set-index! buffer n) (set-car! buffer n))
(define (buffer-real-string buffer) (cdr buffer))
(define (buffer-string-length buffer) (- (string-length (buffer-real-string buffer))
                                         (buffer-real-index buffer)))
(define (buffer-string buffer) (string-tail (buffer-real-string buffer)
                                            (buffer-real-index buffer)))
(define (buffer-inc buffer n)
  (if (< (buffer-string-length buffer) n)
      (error "Buffer-increment - past end of string")
      (make-buffer (buffer-real-string buffer)
                   (+ n (buffer-real-index buffer)))))
(define (buffer-substring buffer n)
  (if (< (buffer-string-length buffer) n)
      (error "Buffer-substring - past end of string")
      (substring (buffer-real-string buffer)
                 (buffer-real-index buffer)
                 (+ (buffer-real-index buffer) n))))

; Global

(define the-parser (make-eqv-hash-table))

(define parser-buffer (make-buffer "" 0))

#| Buffer functions
   - either return new offset or 'fail
|#

(define (buffer-many-not buffer chars)
  (let ((offset (string-find-next-char-in-set (buffer-string buffer) chars)))
    (cond ((false? offset)
           (if (= 0 (buffer-string-length buffer)) 'fail (buffer-string-length buffer)))     
          ((= 0 offset) 'fail)
          (else offset))))

(define (buffer-one buffer chars)
  (cond ((= 0 (buffer-string-length buffer))
         'fail)
        ((char-set-member? chars (string-ref (buffer-substring buffer 1) 0))
         1)))

(define (buffer-many buffer chars)
  (buffer-many-not buffer (char-set-invert chars)))

(define (buffer-match-string buffer string)
  (if (string-prefix? string (buffer-string buffer))
      (string-length string)
      'fail))

(define (buffer-match-strings buffer strings)
  (cond ((null? strings)
         'fail)
        ((string-prefix? (car strings) (buffer-string buffer))
         (string-length (car strings)))
        (else (buffer-match-strings buffer (cdr strings)))))

(define (buffer-match-until-string buffer string)
  (let ((index (string-search-forward string (buffer-string buffer))))
    (if (false? index) 'fail index)))

#| Match - (match buffer pattern)
         - try match pattern
            if fail: return 'fail
            if succ: update index & return token
|#

(define (make-matcher buffer-func)
  (lambda args
    (let ((offset (apply buffer-func (cons parser-buffer args))))
      (if (fail? offset)
          'fail
          (let ((return-string (buffer-substring parser-buffer offset)))
            (set! parser-buffer (buffer-inc parser-buffer offset))
            return-string)))))

(define match-one-chars (make-matcher buffer-one))
(define match-chars (make-matcher buffer-many))
(define match-until-chars (make-matcher buffer-many-not))
(define match-string (make-matcher buffer-match-string))
(define match-strings (make-matcher buffer-match-strings))
(define match-until-string (make-matcher buffer-match-until-string))

#| Rule - (rule 'name (lambda () <match>) (lambda (result) <use-result>))
        - Attempt <match>
           if succ: return (use-result)
           if fail: 'fail
|#

(define (parse-rule name)
  (if (not (hash-table/get the-parser name #f))
      (error "Parser - rule not defined: " name))
  (let ((match-func (car (hash-table/get the-parser name #f)))
        (use-func (cdr (hash-table/get the-parser name #f))))
    (let ((result (match-func)))
      (if (fail? result)
          'fail
          (use-func result)))))

#| Complete - (complete rule)
            - attempt rule
               if fail or buffer non-empty: 'fail
               else: result
|#

(define (parse-complete rule)
  (let ((result (parse-rule rule)))
    (cond ((fail? result) 'fail)
          ((not (= (buffer-string-length parser-buffer) 0)) 'fail)
          (else result))))

#| Many - (many rule)
        - repeat rule until it fails
           return a list of results
        - if rule is not applied, return '()
|#

(define (parse-many rule)
  (define (aux results)
    (let ((result (parse-rule rule)))
      (if (fail? result)
          results
          (aux (cons result results)))))
  (aux '()))

#| Try - (try . rules)
       - Attempt rules in order
          return first non-fail/succ result
          or 'fail
|#

(define (parse-try . rules)
  (if (null? rules)
      'fail
      (let ((attempt (parse-rule (car rules))))
        (if (or (fail? attempt) (succeed? attempt))
            (apply parse-try (cdr rules))
            attempt))))

#| Seq - (seq . rules)
       - save index
       - attempt rules in order
          if any fails: reset index
                        return 'fail
          if there is a non-fail/succ result, return the last one
          else return 'succ
|#

(define (parse-seq . rules)
  (define (aux saved-index result rules)
    (if (null? rules)
        result
        (let ((attempt (parse-rule (car rules))))
          (cond ((fail? attempt)
                 (buffer-set-index! parser-buffer saved-index)
                 'fail)
                ((succeed? attempt)
                 (aux saved-index result (cdr rules)))
                (else
                 (aux saved-index attempt (cdr rules)))))))
  (aux (buffer-real-index parser-buffer) 'succ rules))

#| SeqList - (seq . rules)
           - save index
           - attempt rules in order
              if any fails: reset index
                            return 'fail
              if there are non-fail/succ results, return a list of them
              else return 'succ
|#

(define (parse-seq-list . rules)
  (define (aux saved-index result rules)
    (if (null? rules)
        result
        (let ((attempt (parse-rule (car rules))))
          (cond ((fail? attempt)
                 (buffer-set-index! parser-buffer saved-index)
                 'fail)
                ((succeed? attempt)
                 (aux saved-index result (cdr rules)))
                (else
                 (aux saved-index (cons attempt result) (cdr rules)))))))
  (let ((return (aux (buffer-real-index parser-buffer) null rules)))
    (if (null? return)
        'succ
        return)))