(load-file "rep.so") (setq rep-dir (vc-call-backend 'Git 'root default-directory)) (setq test-parser (rep-new-parser (with-temp-buffer (insert-file-contents (expand-file-name "test.abnf" (expand-file-name "abnf grammars" (expand-file-name "grammar" rep-dir)))) (buffer-string)))) (defvar input nil "A vector that represents a testing input.") (setq input (vector 3 0 2 2 2 1 1 1 0 1)) (rep-recognize test-parser input) (rep-parse test-parser input) ;; (rep-parse-string ;; test-parser ;; "* title\nprice: 512\nnote: this is a test\n") ;; (rep-parse-string ;; test-parser ;; "183t3ru") ;; Below is an experimental tokenizer implemented in Emacs Lisp. ;; The tokens for the test grammar are as follows: ;; ;; 0. SP ;; 1. %xA ;; 2. TEXT ;; 3. %x2A ;; 4. note: ;; 5. price: ;; 6. DIGIT (defun example-tokenize (str) "Tokenize the string STR. The function returns a pair of vectors, one of positive integers and the other of corresponding spans in the original input. The span is represented as a cons-cell, whose `car' is the starting position in the input, and whose `cdr' is the length of the span. The tokens are as follows: 0. SP 1. %xA 2. TEXT 3. %x2A 4. note: 5. price: 6. DIGIT" (let ((i 0) (len (length str)) result result-spans) (while (< i len) (let ((element (aref str i))) (cond ((= element 32) (setq result (cons 0 result)) (setq result-spans (cons (cons i 1) result-spans)) (setq i (1+ i))) ((= element #xa) (setq result (cons 1 result)) (setq result-spans (cons (cons i 1) result-spans)) (setq i (1+ i))) ((and (= element #x2a) (or (= i 0) (= (aref str (1- i)) #xa))) (setq result (cons 3 result)) (setq result-spans (cons (cons i 1) result-spans)) (setq i (1+ i))) ((and (<= ?0 element) (<= element ?9)) (let ((j i) temp stop) (while (and (not stop) (< j len)) (setq temp (aref str j)) (cond ((and (<= ?0 temp) (<= temp ?9)) (setq j (min (1+ j) len))) ((setq stop t)))) (setq result (cons 6 result)) (setq result-spans (cons (cons i (- j i)) result-spans)) (setq i j))) ((and (= element ?n) (< (+ i 4) len) (= (aref str (+ i 1)) ?o) (= (aref str (+ i 2)) ?t) (= (aref str (+ i 3)) ?e) (= (aref str (+ i 4)) ?:)) (setq result (cons 4 result)) (setq result-spans (cons (cons i 5) result-spans)) (setq i (+ i 5))) ((and (= element ?p) (< (+ i 5) len) (= (aref str (+ i 1)) ?r) (= (aref str (+ i 2)) ?i) (= (aref str (+ i 3)) ?c) (= (aref str (+ i 4)) ?e) (= (aref str (+ i 5)) ?:)) (setq result (cons 5 result)) (setq result-spans (cons (cons i 6) result-spans)) (setq i (+ i 6))) ((setq result-spans (cons (cons i 1) result-spans)) (let ((j i) stop temp) (while (and (not stop) (< j len)) (setq temp (aref str j)) (cond ((= temp #xa) (setq stop t)) ((setq j (min (1+ j) len))))) (setq result (cons 2 result)) (setq result-spans (cons (cons i (- j i)) result-spans)) (setq i j)))))) (cons (apply #'vector (nreverse result)) (apply #'vector (nreverse result-spans))))) (defvar test-document (expand-file-name "test.document" (expand-file-name "test-data" rep-dir)) "A document for testing purposes.") (defvar input-spans nil "A vector that represents spans of tokens in the input.") (let ((result (example-tokenize (with-temp-buffer (insert-file-contents test-document) (buffer-substring-no-properties (point-min) (point-max)))))) (setq input (car result)) (setq input-spans (cdr result))) (rep-recognize test-parser input) (rep-parse test-parser input)