diff options
Diffstat (limited to 'suffix tree/generalized-suffix-tree.el')
-rw-r--r-- | suffix tree/generalized-suffix-tree.el | 57 |
1 files changed, 40 insertions, 17 deletions
diff --git a/suffix tree/generalized-suffix-tree.el b/suffix tree/generalized-suffix-tree.el index 07d9b18..0435614 100644 --- a/suffix tree/generalized-suffix-tree.el +++ b/suffix tree/generalized-suffix-tree.el @@ -59,7 +59,7 @@ And STR-LENS is here so that we know the lengths of previous strings." (caddr node)) (t (gst-min (caddr node) (1+ position)))) (cond - ((and (= node-active-num num) + ((and (/= node-active-num num) leafp) (or (cadr (assoc num leaf-labels #'eq)) (cadr node))) @@ -102,9 +102,11 @@ In particular, the node corresponding to NEED-SL in TREE gets a suffix link pointing to NODE. This always returns NODE." - (cond - ((and (> need-sl 1) (/= need-sl node)) - (setcar (cdr (cdr (cdr (gethash need-sl tree)))) node))) + (let ((suffix-link-cdr (nthcdr 3 (gethash need-sl tree)))) + (cond + ((and (> need-sl 1) (/= need-sl node) + (= (car suffix-link-cdr) 0)) + (setcar suffix-link-cdr node)))) node) ;;;###autoload @@ -119,7 +121,7 @@ shall continue walking down the tree, so we return t in the first element, while setting other variables to the correct values so that the caller of this function can use the updated values. -ACTIVE-EDGE-INDEX is for the purpose of updating solely." +ACTIVE-EDGE-INDEX is present solely for the purpose of updating." (let* ((actual-node (gethash node tree)) (node-edge-length (gst-edge-length actual-node position active-number str-lens))) @@ -177,13 +179,21 @@ lengths repeatedly." (car (nthcdr 4 actual-node))))))) (cond ((null nxt) - (let ((leaf (gst-new-node tree last-added num position nil - (- position remain -1)))) - (setq last-added leaf) - (puthash (gst-aref strs active-number active-edge-index) - leaf - (car (nthcdr 4 (gethash active-node tree)))) - (setq need-sl (gst-add-suffix-link tree need-sl active-node))) + ;; We don't want many terminating characters branches. + (cond + ((and (eq character -1) + (eq (caddr actual-node) 'infty) + (eq (gst-aref strs (car actual-node) + (cadr actual-node)) + -1))) + (t + (let ((leaf (gst-new-node tree last-added num position nil + (- position remain -1)))) + (setq last-added leaf) + (puthash (gst-aref strs active-number active-edge-index) + leaf + (car (nthcdr 4 (gethash active-node tree)))) + (setq need-sl (gst-add-suffix-link tree need-sl active-node))))) ;; rule 2 ) (t @@ -219,7 +229,12 @@ lengths repeatedly." ;; we don't want to break since this is a ;; terminating symbol. So we invent a new variable ;; to do this. - (setq terminating-no-split t)) + (setq terminating-no-split t) + ;; But if remain = 1 then we do want to break. + (cond + ((eq remain 1) + (setq breakp t))) + ) (t (setq breakp t))))) (cond @@ -266,7 +281,15 @@ lengths repeatedly." (let ((slink (cadddr (gethash active-node tree)))) (cond ; follow the suffix link or go to root ((> slink 1) slink) - (t 1)))))))))) + (t 1))))))))) + ;; (cond ((eq character -1) + ;; (insert (format "Extending terminating character\n")) + ;; (insert "strs: " (format "%S" strs) "\n") + ;; (lcs-debug remain active-node active-edge-index + ;; active-number active-length num + ;; continue-p breakp terminating-no-split) + ;; (gst-print-tree tree strs))) + ) (list tree last-added remain active-node active-number active-edge-index active-length))) @@ -284,14 +307,14 @@ lengths again." (setq last-added active-node) (while (< index len) (let* ((position 0) - (character (ignore-errors (gst-aref strs index position))) + (character (gst-aref strs index position)) (remain 0) (active-node 1) ; start from the root - (active-number index) + (active-number index) ; root comes (active-edge-index 0) (active-length 0) result) ; temporary holder - (while character + (while (and (< position (nth index str-lens)) character) ;; (setq old-character character) (setq result (gst-extend-tree tree last-added position remain active-node active-number active-edge-index |