summaryrefslogtreecommitdiff
path: root/src/test.el
diff options
context:
space:
mode:
Diffstat (limited to 'src/test.el')
-rw-r--r--src/test.el109
1 files changed, 109 insertions, 0 deletions
diff --git a/src/test.el b/src/test.el
index d106a03..a6f6e7a 100644
--- a/src/test.el
+++ b/src/test.el
@@ -26,3 +26,112 @@
;; (rep-parse-string
;; test-parser
;; "183t3ru")
+
+;; Below is an experimental tokenizer implemented in Emacs Lisp.
+
+;; The tokens for the test grammar are as follows:
+;;
+;; 0. SP
+;; 1. %xA
+;; 2. TEXT
+;; 3. %x2A
+;; 4. note:
+;; 5. price:
+;; 6. DIGIT
+
+(defun example-tokenize (str)
+ "Tokenize the string STR.
+The function returns a pair of vectors, one of positive integers
+and the other of corresponding spans in the original input. The
+span is represented as a cons-cell, whose `car' is the starting
+position in the input, and whose `cdr' is the length of the span.
+
+The tokens are as follows:
+
+0. SP
+1. %xA
+2. TEXT
+3. %x2A
+4. note:
+5. price:
+6. DIGIT"
+ (let ((i 0) (len (length str)) result result-spans)
+ (while (< i len)
+ (let ((element (aref str i)))
+ (cond
+ ((= element 32)
+ (setq result (cons 0 result))
+ (setq result-spans (cons (cons i 1) result-spans))
+ (setq i (1+ i)))
+ ((= element #xa)
+ (setq result (cons 1 result))
+ (setq result-spans (cons (cons i 1) result-spans))
+ (setq i (1+ i)))
+ ((and (= element #x2a)
+ (or (= i 0) (= (aref str (1- i)) #xa)))
+ (setq result (cons 3 result))
+ (setq result-spans (cons (cons i 1) result-spans))
+ (setq i (1+ i)))
+ ((and (<= ?0 element) (<= element ?9))
+ (let ((j i) temp stop)
+ (while (and (not stop) (< j len))
+ (setq temp (aref str j))
+ (cond
+ ((and (<= ?0 temp) (<= temp ?9))
+ (setq j (min (1+ j) len)))
+ ((setq stop t))))
+ (setq result (cons 6 result))
+ (setq result-spans
+ (cons (cons i (- j i)) result-spans))
+ (setq i j)))
+ ((and (= element ?n)
+ (< (+ i 4) len)
+ (= (aref str (+ i 1)) ?o)
+ (= (aref str (+ i 2)) ?t)
+ (= (aref str (+ i 3)) ?e)
+ (= (aref str (+ i 4)) ?:))
+ (setq result (cons 4 result))
+ (setq result-spans (cons (cons i 5) result-spans))
+ (setq i (+ i 5)))
+ ((and (= element ?p)
+ (< (+ i 5) len)
+ (= (aref str (+ i 1)) ?r)
+ (= (aref str (+ i 2)) ?i)
+ (= (aref str (+ i 3)) ?c)
+ (= (aref str (+ i 4)) ?e)
+ (= (aref str (+ i 5)) ?:))
+ (setq result (cons 5 result))
+ (setq result-spans (cons (cons i 6) result-spans))
+ (setq i (+ i 6)))
+ ((setq result-spans (cons (cons i 1) result-spans))
+ (let ((j i) stop temp)
+ (while (and (not stop) (< j len))
+ (setq temp (aref str j))
+ (cond
+ ((= temp #xa) (setq stop t))
+ ((setq j (min (1+ j) len)))))
+ (setq result (cons 2 result))
+ (setq result-spans (cons (cons i (- j i)) result-spans))
+ (setq i j))))))
+ (cons (apply #'vector (nreverse result))
+ (apply #'vector (nreverse result-spans)))))
+
+(defvar test-document (expand-file-name
+ "test.document"
+ (expand-file-name
+ "test-data"
+ rep-dir))
+ "A document for testing purposes.")
+
+(defvar input-spans nil
+ "A vector that represents spans of tokens in the input.")
+
+(let ((result (example-tokenize (with-temp-buffer
+ (insert-file-contents test-document)
+ (buffer-substring-no-properties
+ (point-min) (point-max))))))
+ (setq input (car result))
+ (setq input-spans (cdr result)))
+
+(rep-recognize test-parser input)
+(rep-parse test-parser input)