diff options
-rw-r--r-- | comb/suffix-tree.el | 138 |
1 files changed, 64 insertions, 74 deletions
diff --git a/comb/suffix-tree.el b/comb/suffix-tree.el index f7149da..af55ba9 100644 --- a/comb/suffix-tree.el +++ b/comb/suffix-tree.el @@ -1,22 +1,26 @@ ;;; suffiex-tree.el --- Ukkonen algorithm for building a suffix tree -*- lexical-binding: t; -*- +;;; Author: Durand +;;; Version: 0.0.1 -;; Our node is represented as a list with the following elements: +;;; Commentary: -;; start, -;; which is the starting index of the edge going from its parent -;; node -;; end, the index of the end index -;; suffix-link, the index of the node its suffix-link points to -;; children, -;; a hash-table of pairs of integers and indices of its -;; children +;; Our node is represented as a list of the following elements: + +;; start , which is the starting index of the edge going from its parent +;; node to this node +;; end , the index of the end of the above edge +;; suffix-link, the index of the node this node's suffix-link points to +;; children , a hash-table of pairs of integers and indices of its +;; children ;; To compute the length of the edge going into NODE we use: ;; (- (min end (1+ st-position)) start) ;; which is actually how far the position is on that edge, ;; if it is on that edge. +;;; Code: + ;;;###autoload (defun st-min (&rest args) "Return the minimum among ARGS. @@ -61,7 +65,7 @@ gets a suffix link pointing to NODE. This always returns NODE." (cond - ((> need-sl 0) + ((and (> need-sl 1) (/= need-sl node)) (setcar (cdr (cdr (gethash need-sl tree))) node))) node) @@ -83,15 +87,6 @@ This always returns NODE." active-length active-node))))) -;;; For review later -;;;###autoload -;; (defun st-init) -;; { -;; needSL = 0, last_added = 0, pos = -1, -;; remainder = 0, active_node = 0, active_e = 0, active_len = 0; -;; root = active_node = new_node(-1, -1); -;; } - ;;;###autoload (defun st-extend-tree (tree last-added position remain active-node active-edge-index @@ -104,6 +99,7 @@ The return value is (remain (1+ remain)) continue-p breakp) (while (and (not breakp) (> remain 0)) + (setq continue-p nil breakp nil) (cond ((= active-length 0) (setq active-edge-index position))) @@ -112,9 +108,6 @@ The return value is (actual-node (gethash (aref str active-edge-index) (cadr (cdr (cdr actual-node)))))))) - ;; (cond ((null actual-node) (user-error "active-node: %S" active-node))) - ;; (message "aei: %S" active-edge-index) - ;; (message "act: %S" (gethash active-node tree)) (cond ((null nxt) (let ((leaf (st-new-node tree last-added position))) @@ -122,9 +115,9 @@ The return value is (puthash (aref str active-edge-index) leaf (cadr (cdr (cdr (gethash active-node tree))))) - (setq need-sl (st-add-suffix-link tree need-sl active-node)) + (setq need-sl (st-add-suffix-link tree need-sl active-node))) ;; rule 2 - )) + ) (t (let* ((result (st-canonize tree nxt position active-edge-index @@ -181,10 +174,9 @@ The return value is (setq active-node (let ((slink (caddr (gethash active-node tree)))) (cond - ((> slink 0) slink) + ((> slink 1) slink) ; or root (t 1)))))))))) - ;; (message "remain is %d" remain) (list tree last-added remain active-node active-edge-index active-length))) @@ -239,26 +231,65 @@ The return value is (lambda (item) (cond ((= item 1) "root") (t (let ((node (gethash item tree))) - (substring str (car node) (cond ((integerp (cadr node)) (cadr node))))))))))) - - - - + (substring str (car node) (cond ((integerp (cadr node)) (cadr node)))) + ;; (format "%d): (%S, %S): (%d): \"%s\"" + ;; item + ;; (car node) + ;; (cadr node) + ;; (caddr node) + ;; (substring str (car node) (cond ((integerp (cadr node)) (cadr node)))) + ;; ) + ))))))) +;;; Some printing functions +(use-package "hierarchy" 'hierarchy) +(provide 'suffix-tree) +;;; suffiex-tree.el ends here +;;; archive +;; ;;;###autoload +;; (defvar st-root nil +;; "The root of the suffix tree.") +;; ;;;###autoload +;; (defvar st-position nil +;; "The current position in the string.") +;; ;;;###autoload +;; (defvar st-wait-for-link nil +;; "The node that is waiting to have a suffix link.") +;; ;;;###autoload +;; (defvar st-remain nil +;; "The number of remaining suffixes to insert.") +;; ;;;###autoload +;; (defvar st-active-node nil +;; "Where to insert a new suffix.") +;; ;;;###autoload +;; (defvar st-active-edge-index nil +;; "The index of the active edge.") +;; ;;;###autoload +;; (defvar st-active-length nil +;; "How far down is the active point down the active edge.") -;;; Some printing functions +;; (insert (format "after character %c, the tree becomes\n" character)) +;; (insert (format "string is %s\n" (substring str 0 (1+ position)))) +;; (insert (format "active-node: %d\n" active-node)) +;; (insert (format "active-edge: %c\n" (aref str active-edge-index))) +;; (insert (format "active-length: %d\n" active-length)) +;; (cond ((eq last-added 11) +;; (insert (format "slink: %d\n" (caddr (gethash active-node tree)))) +;; (insert (format "con: %S" continue-p)) +;; (insert (format "breakp: %S\n" breakp)))) +;; (st-print-tree tree (substring str 0 (1+ position))) +;; (insert "\n\n") -(use-package "hierarchy" 'hierarchy) ;; ;;;###autoload ;; (defvar st-output-buffer-name "*suffix-tree*" @@ -295,44 +326,3 @@ The return value is ;; )))) - - - - - - - - - -(provide 'suffix-tree) -;;; suffiex-tree.el ends here. - -;;; archive - -;; ;;;###autoload -;; (defvar st-root nil -;; "The root of the suffix tree.") - -;; ;;;###autoload -;; (defvar st-position nil -;; "The current position in the string.") - -;; ;;;###autoload -;; (defvar st-wait-for-link nil -;; "The node that is waiting to have a suffix link.") - -;; ;;;###autoload -;; (defvar st-remain nil -;; "The number of remaining suffixes to insert.") - -;; ;;;###autoload -;; (defvar st-active-node nil -;; "Where to insert a new suffix.") - -;; ;;;###autoload -;; (defvar st-active-edge-index nil -;; "The index of the active edge.") - -;; ;;;###autoload -;; (defvar st-active-length nil -;; "How far down is the active point down the active edge.") |