summaryrefslogtreecommitdiff
path: root/suffix tree/generalized-suffix-tree.el
diff options
context:
space:
mode:
Diffstat (limited to 'suffix tree/generalized-suffix-tree.el')
-rw-r--r--suffix tree/generalized-suffix-tree.el57
1 files changed, 40 insertions, 17 deletions
diff --git a/suffix tree/generalized-suffix-tree.el b/suffix tree/generalized-suffix-tree.el
index 07d9b18..0435614 100644
--- a/suffix tree/generalized-suffix-tree.el
+++ b/suffix tree/generalized-suffix-tree.el
@@ -59,7 +59,7 @@ And STR-LENS is here so that we know the lengths of previous strings."
(caddr node))
(t (gst-min (caddr node) (1+ position))))
(cond
- ((and (= node-active-num num)
+ ((and (/= node-active-num num)
leafp)
(or (cadr (assoc num leaf-labels #'eq))
(cadr node)))
@@ -102,9 +102,11 @@ In particular, the node corresponding to NEED-SL in TREE
gets a suffix link pointing to NODE.
This always returns NODE."
- (cond
- ((and (> need-sl 1) (/= need-sl node))
- (setcar (cdr (cdr (cdr (gethash need-sl tree)))) node)))
+ (let ((suffix-link-cdr (nthcdr 3 (gethash need-sl tree))))
+ (cond
+ ((and (> need-sl 1) (/= need-sl node)
+ (= (car suffix-link-cdr) 0))
+ (setcar suffix-link-cdr node))))
node)
;;;###autoload
@@ -119,7 +121,7 @@ shall continue walking down the tree, so we return t in the first
element, while setting other variables to the correct values so
that the caller of this function can use the updated values.
-ACTIVE-EDGE-INDEX is for the purpose of updating solely."
+ACTIVE-EDGE-INDEX is present solely for the purpose of updating."
(let* ((actual-node (gethash node tree))
(node-edge-length (gst-edge-length actual-node position active-number
str-lens)))
@@ -177,13 +179,21 @@ lengths repeatedly."
(car (nthcdr 4 actual-node)))))))
(cond
((null nxt)
- (let ((leaf (gst-new-node tree last-added num position nil
- (- position remain -1))))
- (setq last-added leaf)
- (puthash (gst-aref strs active-number active-edge-index)
- leaf
- (car (nthcdr 4 (gethash active-node tree))))
- (setq need-sl (gst-add-suffix-link tree need-sl active-node)))
+ ;; We don't want many terminating characters branches.
+ (cond
+ ((and (eq character -1)
+ (eq (caddr actual-node) 'infty)
+ (eq (gst-aref strs (car actual-node)
+ (cadr actual-node))
+ -1)))
+ (t
+ (let ((leaf (gst-new-node tree last-added num position nil
+ (- position remain -1))))
+ (setq last-added leaf)
+ (puthash (gst-aref strs active-number active-edge-index)
+ leaf
+ (car (nthcdr 4 (gethash active-node tree))))
+ (setq need-sl (gst-add-suffix-link tree need-sl active-node)))))
;; rule 2
)
(t
@@ -219,7 +229,12 @@ lengths repeatedly."
;; we don't want to break since this is a
;; terminating symbol. So we invent a new variable
;; to do this.
- (setq terminating-no-split t))
+ (setq terminating-no-split t)
+ ;; But if remain = 1 then we do want to break.
+ (cond
+ ((eq remain 1)
+ (setq breakp t)))
+ )
(t
(setq breakp t)))))
(cond
@@ -266,7 +281,15 @@ lengths repeatedly."
(let ((slink (cadddr (gethash active-node tree))))
(cond ; follow the suffix link or go to root
((> slink 1) slink)
- (t 1))))))))))
+ (t 1)))))))))
+ ;; (cond ((eq character -1)
+ ;; (insert (format "Extending terminating character\n"))
+ ;; (insert "strs: " (format "%S" strs) "\n")
+ ;; (lcs-debug remain active-node active-edge-index
+ ;; active-number active-length num
+ ;; continue-p breakp terminating-no-split)
+ ;; (gst-print-tree tree strs)))
+ )
(list tree last-added remain active-node
active-number active-edge-index active-length)))
@@ -284,14 +307,14 @@ lengths again."
(setq last-added active-node)
(while (< index len)
(let* ((position 0)
- (character (ignore-errors (gst-aref strs index position)))
+ (character (gst-aref strs index position))
(remain 0)
(active-node 1) ; start from the root
- (active-number index)
+ (active-number index) ; root comes
(active-edge-index 0)
(active-length 0)
result) ; temporary holder
- (while character
+ (while (and (< position (nth index str-lens)) character)
;; (setq old-character character)
(setq result (gst-extend-tree tree last-added position remain
active-node active-number active-edge-index