summaryrefslogtreecommitdiff
path: root/suffix tree
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2021-01-14 01:14:27 +0800
committerJSDurand <mmemmew@gmail.com>2021-01-14 01:14:27 +0800
commite2fe5882ad434b433bfba4529abdd2666d9c3d99 (patch)
tree815816360fb0bae6ad4c237e56c37360c3334893 /suffix tree
parent3666deaed5b0baf0a74f14db5872105c9e7865f9 (diff)
A temporary bug fix
There are more bugs unfortunately.
Diffstat (limited to 'suffix tree')
-rw-r--r--suffix tree/LCS.el31
-rw-r--r--suffix tree/generalized-suffix-tree.el122
-rw-r--r--suffix tree/gst test ground.txt176
-rw-r--r--suffix tree/suffix-tree files.zipbin14879 -> 0 bytes
4 files changed, 281 insertions, 48 deletions
diff --git a/suffix tree/LCS.el b/suffix tree/LCS.el
index 989c25e..41bcec0 100644
--- a/suffix tree/LCS.el
+++ b/suffix tree/LCS.el
@@ -24,9 +24,15 @@
;;;###autoload
(defun lcs-edge-length (node str-lens)
"Return the length of the edge going into NODE.
-Unlike `gst-edge-length', this does not need to know about POSITION
-and NUM, since it is assumed that the tree is already built
-before calling this function."
+
+Unlike `gst-edge-length', this does not need to know about
+POSITION and NUM, since it is assumed that the tree is already
+built before calling this function.
+
+If the node is a leaf, its end is denoted by infty, and we
+wouldn't know the exact length if we don't know the length of the
+string it corresponds to, so we need the argument STR-LENS to
+provide that information."
(let ((start (car (cdr node)))
(end (cond
((eq (car (cdr (cdr node))) 'infty)
@@ -47,7 +53,11 @@ before calling this function."
;;;###autoload
(defun lcs-get-env (node str-lens)
- "Return the information we want to keep on the stack."
+ "Return the information we want to keep from NODE on the stack.
+
+STR-LENS is here to provide information about the ending index of
+NODE if it is a lead. See the documentation for `lcs-edge-length'
+for more."
(let ((num (car node))
(start (car (cdr node)))
(end (cond
@@ -93,7 +103,10 @@ before calling this function."
;;;###autoload
(defun lcs-pretty-stack (stack)
- "Prettify a stack."
+ "Prettify STACK.
+
+Apply `lcs-pretty-stack-element' to each element, and concatenate
+them in a clean way."
(concat
"[ "
(mapconcat #'lcs-pretty-stack-element
@@ -146,7 +159,7 @@ Of course, without STRS, we don't even know what our strings are."
"Return the longest common substring of a list STRS of strings.
If JUST-LENGTH is non-nil, then this only returns the length of the
-longest common substring. Otherwise, it returns the length as well as the
+longest common substring. Otherwise, it returns the length as well as the
longest common substring itself."
(let* ((strs (mapcar (lambda (str)
(vconcat str (list -1)))
@@ -223,9 +236,9 @@ longest common substring itself."
current-children)
temp)
;; An alternative
- ;; (apply #'logior
- ;; (mapcar (lambda (num)
- ;; (gethash num bits-table))
+ ;; (apply #'logior
+ ;; (mapcar (lambda (num)
+ ;; (gethash num bits-table))
;; (hash-table-values current-children)))
))
(setq temp nil)
diff --git a/suffix tree/generalized-suffix-tree.el b/suffix tree/generalized-suffix-tree.el
index 227aea9..07d9b18 100644
--- a/suffix tree/generalized-suffix-tree.el
+++ b/suffix tree/generalized-suffix-tree.el
@@ -45,7 +45,9 @@ every number."
(defun gst-edge-length (node position num str-lens)
"Return the length of the edge into NODE.
See the comment above this function for the reason
-POSITION and NUM are here."
+POSITION and NUM are here.
+
+And STR-LENS is here so that we know the lengths of previous strings."
(let* ((node-active-num (car node))
(leafp (eq (car (cdr (cdr node))) 'infty))
(leaf-labels (cond (leafp (car (nthcdr 5 node))))))
@@ -83,10 +85,12 @@ LAST-ADDED is the number of elements already in the TREE."
;;;###autoload
(defun gst-add-leaf-label (tree leaf number start)
- "Add a label to LEAF."
+ "Add a label to LEAF in TREE.
+NUMBER and START represent the label."
(let* ((actual-node (gethash leaf tree))
(leaf-labels (cdr (cdr (cdr (cdr (cdr actual-node)))))))
- (cond ((consp leaf-labels)
+ (cond ((and (consp leaf-labels)
+ (not (eq (caaar leaf-labels) number)))
(setcar leaf-labels
(cons (list number start)
(car leaf-labels)))))))
@@ -106,7 +110,16 @@ This always returns NODE."
;;;###autoload
(defun gst-canonize (tree node position active-number active-edge-index active-length active-node
str-lens)
- "Walk down TREE to find the correct active point."
+ "Walk down TREE to find the correct active point.
+
+To be precise, we start from NODE, use POSITION, ACTIVE-NUMBER,
+and STR-LENS to calculate the length of the edge, and compare
+with ACTIVE-LENGTH. If it is less than that, then that means we
+shall continue walking down the tree, so we return t in the first
+element, while setting other variables to the correct values so
+that the caller of this function can use the updated values.
+
+ACTIVE-EDGE-INDEX is for the purpose of updating solely."
(let* ((actual-node (gethash node tree))
(node-edge-length (gst-edge-length actual-node position active-number
str-lens)))
@@ -128,23 +141,31 @@ This always returns NODE."
;;;###autoload
(defsubst gst-aref (strs num index)
- "Return the INDEX-th element in NUM-th element of STRS."
+ "Return the INDEX th element in NUM th element of STRS."
(aref (nth num strs) index))
;;;###autoload
(defun gst-extend-tree (tree last-added position remain
active-node active-number active-edge-index
active-length character strs num str-lens)
- "Extend a tree by CHARACTER in NUM-th string of STRS.
+ "Extend TREE by CHARACTER in NUM-th string of STRS.
The return value is
-(tree
+\(tree
last-added remain active-node
- active-number active-edge-index active-length)"
+ active-number active-edge-index active-length)
+
+Other parameters: LAST-ADDED, POSITION, REMAIN, ACTIVE-NODE,
+ACTIVE-NUMBER, ACTIVE-EDGE-INDEX, and ACTIVE-LENGTH have special
+meanings in the algorithm.
+
+STR-LENS can be supplied so that we don't have to calculate the
+lengths repeatedly."
(let* ((need-sl 0)
(remain (1+ remain))
- continue-p breakp)
+ continue-p breakp terminating-no-split)
(while (and (not breakp) (> remain 0))
(setq continue-p nil breakp nil)
+ (setq terminating-no-split nil)
(cond
((= active-length 0)
(setq active-edge-index position)
@@ -153,7 +174,7 @@ The return value is
(nxt (cond
(actual-node
(gethash (gst-aref strs active-number active-edge-index)
- (cadr (cdr (cdr (cdr actual-node)))))))))
+ (car (nthcdr 4 actual-node)))))))
(cond
((null nxt)
(let ((leaf (gst-new-node tree last-added num position nil
@@ -161,7 +182,7 @@ The return value is
(setq last-added leaf)
(puthash (gst-aref strs active-number active-edge-index)
leaf
- (cadr (cdr (cdr (cdr (gethash active-node tree))))))
+ (car (nthcdr 4 (gethash active-node tree))))
(setq need-sl (gst-add-suffix-link tree need-sl active-node)))
;; rule 2
)
@@ -188,14 +209,22 @@ The return value is
;; observation 1
(setq active-length (1+ active-length))
(setq need-sl (gst-add-suffix-link tree need-sl active-node))
- (setq breakp t)
- ;; add a label
+ ;; (setq breakp t)
+ ;; terminating symbol special handling
(cond
((eq character -1) ; terminating symbol
- (gst-add-leaf-label tree nxt num (- position remain -1))))))
+ (gst-add-leaf-label tree nxt num (- position remain -1))
+ (setq active-length (1- active-length))
+ ;; We don't want to split since this is a match. But
+ ;; we don't want to break since this is a
+ ;; terminating symbol. So we invent a new variable
+ ;; to do this.
+ (setq terminating-no-split t))
+ (t
+ (setq breakp t)))))
(cond
- (breakp)
- (t ;; splitting
+ ((or breakp terminating-no-split))
+ (t ; splitting
(let ((split (gst-new-node
tree last-added (car (gethash nxt tree)) (cadr (gethash nxt tree))
(+ (cadr (gethash nxt tree)) active-length))))
@@ -206,11 +235,14 @@ The return value is
(let ((leaf (gst-new-node tree last-added num position
nil (- position remain -1))))
(setq last-added leaf)
- (puthash character leaf
+ (puthash character
+ leaf
(cadddr (cdr (gethash split tree))))
(setcar (cdr (gethash nxt tree))
- (+ (cadr (gethash nxt tree))
- active-length))
+ (min
+ (+ (cadr (gethash nxt tree))
+ active-length)
+ (1- (nth (car (gethash nxt tree)) str-lens))))
(puthash (gst-aref strs
(car (gethash nxt tree))
(cadr (gethash nxt tree)))
@@ -258,9 +290,9 @@ lengths again."
(active-number index)
(active-edge-index 0)
(active-length 0)
- old-character result) ; temporary holder
+ result) ; temporary holder
(while character
- (setq old-character character)
+ ;; (setq old-character character)
(setq result (gst-extend-tree tree last-added position remain
active-node active-number active-edge-index
active-length character strs index
@@ -274,22 +306,24 @@ lengths again."
(setq active-length (pop result))
(setq position (1+ position))
(setq character (ignore-errors (gst-aref strs index position)))
- (cond
- ((characterp old-character)
- (insert (format "After adding character %c:\n" old-character))
- (gst-print-tree tree
- (append (seq-take strs index)
- (list (concat
- (seq-take (nth index strs) position)
- "$"))))
- (insert "\n\n"))
- ((= old-character -1)
- (insert (format "After adding character -1:\n"))
- (gst-print-tree tree
- (seq-take strs (1+ index)))
- (insert "\n\n")))))
+ ;; (cond
+ ;; ((characterp old-character)
+ ;; (insert (format "After adding character %c:\n" old-character))
+ ;; (gst-print-tree tree
+ ;; (append (seq-take strs index)
+ ;; (list (concat
+ ;; (seq-take (nth index strs) position)
+ ;; "$"))))
+ ;; (insert "\n\n"))
+ ;; ((= old-character -1)
+ ;; (insert (format "After adding character -1:\n"))
+ ;; (gst-print-tree tree
+ ;; (seq-take strs (1+ index)))
+ ;; (insert "\n\n")))
+ ))
(setq index (1+ index))
- (insert "\n \n"))
+ ;; (insert "\n \n")
+ )
tree))
;;; Printing
@@ -312,7 +346,10 @@ lengths again."
;;;###autoload
(defun gst-pretty-hash-table (table)
- "Only returns the useful parts from a table."
+ "Only return the useful parts from TABLE.
+
+To be precise, this returns the keys and values of the
+hash-table, in the form of an alist."
(let (keys-values-alist)
(maphash
(lambda (key value)
@@ -353,7 +390,13 @@ lengths again."
;;;###autoload
(defun gst-print-tree (tree strs)
"Print TREE with the aid of STRS."
- (let* ((symbol-tree (make-symbol "new-hierarchy")))
+ (let* ((symbol-tree (make-symbol "new-hierarchy"))
+ (strs (mapcar (lambda (str)
+ (mapcar (lambda (c)
+ (cond ((eq c -1) ?$)
+ (c)))
+ str))
+ strs)))
(set symbol-tree (hierarchy-new))
(maphash
(lambda (key value)
@@ -395,4 +438,5 @@ lengths again."
(t actual-str)))))))))
(provide 'generalized-suffix-tree)
-;;; generalized-suffiex-tree.el ends here
+
+;;; generalized-suffix-tree.el ends here
diff --git a/suffix tree/gst test ground.txt b/suffix tree/gst test ground.txt
index 2dad02e..84524cf 100644
--- a/suffix tree/gst test ground.txt
+++ b/suffix tree/gst test ground.txt
@@ -43,6 +43,8 @@ root
+Debugging...
+
After adding character i:
root
i$ (0 : 0)
@@ -12289,3 +12291,177 @@ root
+
+Debugging continues.
+
+Generalized suffix tree for: ido-completions, comint-completion-at-point, previous-completion, completion-flex--make-flex-pattern, eldoc-remove-command-completions:
+root
+ i
+ o
+ us-completion$ (2 : 4)
+ n
+ -
+ flex--make-flex-pattern$ (3 : 7)
+ at-point$ (1 : 14)
+ s$ (4 : 28)$ (0 : 11)
+ $ (2 : 16)
+ do-completions$ (0 : 0)
+ nt
+ $ (1 : 23)
+ -completion-at-point$ (1 : 3)
+ d
+ -completions$ (4 : 19)
+ o
+ c-remove-command-completions$ (4 : 2)
+ -completions$ (0 : 1)
+ o
+ m
+ int-completion-at-point$ (1 : 1)
+ pletion
+ -
+ flex--make-flex-pattern$ (3 : 1)
+ at-point$ (1 : 8)
+ s$ (4 : 22)$ (0 : 5)
+ $ (2 : 10)
+ mand-completions$ (4 : 14)
+ -completions$ (0 : 2)
+ n
+ -
+ flex--make-flex-pattern$ (3 : 8)
+ at-point$ (1 : 15)
+ s$ (4 : 29)$ (0 : 12)
+ $ (2 : 17)
+ int$ (1 : 22)
+ us-completion$ (2 : 5)
+ c-remove-command-completions$ (4 : 3)
+ ve-command-completions$ (4 : 9)
+ -
+ at-point$ (1 : 17)
+ com
+ mand-completions$ (4 : 12)
+ pletion
+ -at-point$ (1 : 6)
+ s$ (4 : 20)$ (0 : 3)
+ $ (2 : 8)
+ p
+ attern$ (3 : 26)
+ oint$ (1 : 20)
+ flex-
+ pattern$ (3 : 21)
+ -make-flex-pattern$ (3 : 10)
+ -make-flex-pattern$ (3 : 15)
+ make-flex-pattern$ (3 : 16)
+ remove-command-completions$ (4 : 5)
+ c
+ -remove-command-completions$ (4 : 4)
+ om
+ int-completion-at-point$ (1 : 0)
+ pletion
+ -
+ flex--make-flex-pattern$ (3 : 0)
+ at-point$ (1 : 7)
+ s$ (4 : 21)$ (0 : 4)
+ $ (2 : 9)
+ mand-completions$ (4 : 13)
+ m
+ int-completion-at-point$ (1 : 2)
+ pletion
+ -
+ flex--make-flex-pattern$ (3 : 2)
+ at-point$ (1 : 9)
+ s$ (4 : 23)$ (0 : 6)
+ $ (2 : 11)
+ a
+ nd-completions$ (4 : 16)
+ ke-flex-pattern$ (3 : 17)
+ ove-command-completions$ (4 : 8)
+ mand-completions$ (4 : 15)
+ p
+ oint$ (1 : 21)
+ letion
+ -
+ flex--make-flex-pattern$ (3 : 3)
+ at-point$ (1 : 10)
+ s$ (4 : 24)$ (0 : 7)
+ $ (2 : 12)
+ revious-completion$ (2 : 0)
+ attern$ (3 : 27)
+ l
+ doc-remove-command-completions$ (4 : 1)
+ e
+ x-
+ pattern$ (3 : 23)
+ -make-flex-pattern$ (3 : 12)
+ tion
+ -
+ flex--make-flex-pattern$ (3 : 4)
+ at-point$ (1 : 11)
+ s$ (4 : 25)$ (0 : 8)
+ $ (2 : 13)
+ e
+ vious-completion$ (2 : 2)
+ tion
+ -
+ flex--make-flex-pattern$ (3 : 5)
+ at-point$ (1 : 12)
+ s$ (4 : 26)$ (0 : 9)
+ $ (2 : 14)
+ x-
+ pattern$ (3 : 24)
+ -make-flex-pattern$ (3 : 13)
+ -
+ command-completions$ (4 : 11)
+ flex-pattern$ (3 : 20)
+ rn$ (3 : 31)
+ ldoc-remove-command-completions$ (4 : 0)
+ move-command-completions$ (4 : 7)
+ t
+ -
+ point$ (1 : 19)
+ completion-at-point$ (1 : 5)
+ ion
+ -
+ flex--make-flex-pattern$ (3 : 6)
+ at-point$ (1 : 13)
+ s$ (4 : 27)$ (0 : 10)
+ $ (2 : 15)
+ $ (1 : 25)
+ tern$ (3 : 29)
+ ern$ (3 : 30)
+ n
+ t
+ $ (1 : 24)
+ -completion-at-point$ (1 : 4)
+ s$ (4 : 30)$ (0 : 13)
+ -
+ flex--make-flex-pattern$ (3 : 9)
+ at-point$ (1 : 16)
+ $ (3 : 33)$ (2 : 18)
+ d-completions$ (4 : 18)
+ s
+ -completion$ (2 : 7)
+ $ (4 : 31)$ (0 : 14)
+ $ (4 : 32)$ (3 : 34)$ (2 : 19)$ (1 : 26)$ (0 : 15)
+ a
+ ke-flex-pattern$ (3 : 18)
+ t
+ tern$ (3 : 28)
+ -point$ (1 : 18)
+ nd-completions$ (4 : 17)
+ r
+ n$ (3 : 32)
+ e
+ move-command-completions$ (4 : 6)
+ vious-completion$ (2 : 1)
+ v
+ e-command-completions$ (4 : 10)
+ ious-completion$ (2 : 3)
+ us-completion$ (2 : 6)
+ flex-
+ pattern$ (3 : 22)
+ -make-flex-pattern$ (3 : 11)
+ x-
+ pattern$ (3 : 25)
+ -make-flex-pattern$ (3 : 14)
+ ke-flex-pattern$ (3 : 19)
+
diff --git a/suffix tree/suffix-tree files.zip b/suffix tree/suffix-tree files.zip
deleted file mode 100644
index 946352b..0000000
--- a/suffix tree/suffix-tree files.zip
+++ /dev/null
Binary files differ