diff options
Diffstat (limited to 'suffix tree/suffix-tree.el')
-rw-r--r-- | suffix tree/suffix-tree.el | 397 |
1 files changed, 0 insertions, 397 deletions
diff --git a/suffix tree/suffix-tree.el b/suffix tree/suffix-tree.el deleted file mode 100644 index 0d929d0..0000000 --- a/suffix tree/suffix-tree.el +++ /dev/null @@ -1,397 +0,0 @@ -;;; suffiex-tree.el --- Ukkonen algorithm for building a suffix tree -*- lexical-binding: t; -*- - -;;; Author: Durand -;;; Version: 0.0.1 - -;;; Commentary: - -;; Our node is represented as a list of the following elements: - -;; start , which is the starting index of the edge going from its parent -;; node to this node -;; end , the index of the end of the above edge -;; suffix-link, the index of the node this node's suffix-link points to -;; children , a hash-table of pairs of integers and indices of its -;; children - -;; To compute the length of the edge going into NODE we use: -;; (- (min end (1+ position)) start) -;; which is actually how far the position is on that edge, -;; if it is on that edge. - -;;; Code: - -;;;###autoload -(defun st-min (&rest args) - "Return the minimum among ARGS. -If an argument is 'infty, then it is considered greater than -every number." - (apply #'min (delq nil - (mapcar - (lambda (arg) - (cond - ((number-or-marker-p arg) arg) - ((eq arg 'infty) nil))) - args)))) - -;;;###autoload -(defun st-edge-length (node position) - "Return the length of the edge into NODE. -See the comment above this function for the reason -POSITION is here." - (- (st-min (car (cdr node)) (1+ position)) - (car node))) - -;;;###autoload -(defun st-new-node (tree last-added start &optional end) - "Make a new node with START and END as the coming edge. -Then add the new node to TREE. -LAST-ADDED is the number of elements already in the TREE." - (let* ((end (or end 'infty) ;; 'infty represents the index of a leaf - ) - (suffix-link 0) ;; The suffix link is initially 0 - (new-node - (list start end - suffix-link - (make-hash-table)))) - (puthash (1+ last-added) new-node tree) - (1+ last-added))) - -;;;###autoload -(defun st-add-suffix-link (tree need-sl node) - "If NEED-SL is positive, then add the suffix link. -In particular, the node corresponding to NEED-SL in TREE -gets a suffix link pointing to NODE. - -This always returns NODE." - (cond - ((and (> need-sl 1) (/= need-sl node)) - (setcar (cdr (cdr (gethash need-sl tree))) node))) - node) - -;;;###autoload -(defun st-canonize (tree node position active-edge-index active-length active-node) - "Walk down TREE to find the correct active point." - (let ((node-edge-length (st-edge-length (gethash node tree) position))) - (cond - ((>= active-length node-edge-length) - (list t - (+ active-edge-index - node-edge-length) - (- active-length - node-edge-length) - node)) - (t - (list nil - active-edge-index - active-length - active-node))))) - -;;;###autoload -(defun st-extend-tree (tree last-added position remain - active-node active-edge-index - active-length character str) - "Extend a tree by CHARACTER. -The return value is -(tree last-added remain active-node - active-edge-index active-length)" - (let* ((need-sl 0) - (remain (1+ remain)) - continue-p breakp) - (while (and (not breakp) (> remain 0)) - (setq continue-p nil breakp nil) - (cond - ((= active-length 0) - (setq active-edge-index position))) - (let* ((actual-node (gethash active-node tree)) - (nxt (cond - (actual-node - (gethash (aref str active-edge-index) - (cadr (cdr (cdr actual-node)))))))) - (cond - ((null nxt) - (let ((leaf (st-new-node tree last-added position))) - (setq last-added leaf) - (puthash (aref str active-edge-index) - leaf - (cadr (cdr (cdr (gethash active-node tree))))) - (setq need-sl (st-add-suffix-link tree need-sl active-node))) - ;; rule 2 - ) - (t - (let* ((result (st-canonize - tree nxt position active-edge-index - active-length active-node))) - (cond - ((car result) - ;; observation 2 - (setq active-edge-index (cadr result)) - (setq active-length (caddr result)) - (setq active-node (cadr (cddr result))) - (setq continue-p t)) - (t - (cond - ((eq (aref str (+ active-length - (car (gethash nxt tree)))) - character) - ;; observation 1 - (setq active-length (1+ active-length)) - (setq need-sl (st-add-suffix-link tree need-sl active-node)) - (setq breakp t))) - (cond - (breakp) - (t ;; splitting - (let ((split (st-new-node - tree last-added (car (gethash nxt tree)) - (+ (car (gethash nxt tree)) active-length)))) - (setq last-added split) - (puthash - (aref str active-edge-index) - split (cadr (cdr (cdr (gethash active-node tree))))) - (let ((leaf (st-new-node tree last-added position))) - (setq last-added leaf) - (puthash character leaf - (cadddr (gethash split tree))) - (setcar (gethash nxt tree) - (+ (car (gethash nxt tree)) - active-length)) - (puthash (aref str (car (gethash nxt tree))) nxt - (cadddr (gethash split tree))) - ;; rule 2 - (setq need-sl - (st-add-suffix-link tree need-sl split))))))))))) - (cond - ((or continue-p breakp)) - (t - (setq remain (1- remain)) - (cond - ((and (eq active-node 1) ; root - (> active-length 0)) - (setq active-length (1- active-length)) - (setq active-edge-index - (1+ (- position remain)))) - (t - (setq active-node - (let ((slink (caddr (gethash active-node tree)))) - (cond - ((> slink 1) slink) - ; or root - (t 1)))))))))) - (list tree last-added remain active-node - active-edge-index active-length))) - -;;;###autoload -(defun st-build-for-str (str) - "Build the suffix tree for STR." - (let* ((position 0) - (character (ignore-error 'error (aref str position))) - (tree (make-hash-table)) - (last-added 0) - (remain 0) - (active-node (st-new-node tree last-added -1 -1)) - (active-edge-index 0) - (active-length 0) - result) - (setq last-added active-node) - (while character - (setq result (st-extend-tree tree last-added position remain - active-node active-edge-index - active-length character str)) - (setq tree (pop result)) - (setq last-added (pop result)) - (setq remain (pop result)) - (setq active-node (pop result)) - (setq active-edge-index (pop result)) - (setq active-length (pop result)) - (setq position (1+ position)) - (setq character (ignore-error 'error (aref str position)))) - tree)) - -;;; Some printing functions - -(require 'hierarchy) - -;;;###autoload -(defun st-print-str (str) - "Print generalized string" - (mapc (lambda (char) - (cond ((characterp char) (insert char)) - (t (insert ", ")))) - str)) - -;;;###autoload -(defun st-print-tree-for-str (str) - "Print the suffix tree for STR." - (let* ((symbol (make-symbol "test"))) - (set symbol (st-build-for-str str)) - (insert "suffix tree for strings: ") - (st-print-str str) - (insert "\n") - (st-print-tree (symbol-value symbol) str))) - -;;;###autoload -(defun st-reduce-index (node strs) - "Reduce the second element of the NODE." - (let* ((lengths (mapcar #'length strs)) - (start (car node)) - (end (cadr node)) - (first-greater-than-start - (let ((sum -1) (index 0) done result) - (while (and (not done) (< index (length strs))) - (cond - ((<= (+ sum (nth index lengths)) start) - (setq index (1+ index)) - (setq sum (+ sum (nth index lengths)))) - (t (setq result (+ sum (nth index lengths))) - (setq done t)))) - (cond ((null result) (setq result (1- (apply #'+ lengths))))) - result))) - (st-min end first-greater-than-start)) - ;; (let* ((start (car node)) - ;; (end (cadr node)) - ;; (index start) - ;; result stop) - ;; (while (and (not stop) (or (eq end 'infty) (<= index end))) - ;; (cond - ;; ((and (< index (length str)) (characterp (aref str index))) (setq index (1+ index))) - ;; (t (setq stop t) - ;; (setq result index)))) - ;; (cond ((and (numberp end) (= index (1+ end))) (setq result end))) - ;; (cond ((<= result start) (setq result 'end))) - ;; result) - ) - -;; ;;;###autoload -;; (defun st-build-generalized-tree (strs) -;; (let* ((copy strs) -;; (long-str -;; (let ((index 1) result) -;; (while (consp copy) -;; (setq result (vconcat result -;; (car copy) -;; (vector (- index)))) -;; (setq index (1+ index)) -;; (setq copy (cdr copy))) -;; result)) -;; (tree (make-symbol "tree"))) -;; (set tree (st-build-for-str long-str)))) - -;;;###autoload -(defun st-print-tree (tree str) - "Print TREE with the aid of STR." - (let* ((symbol-tree (make-symbol "test-tree"))) - (set symbol-tree (hierarchy-new)) - (maphash - (lambda (key value) - (hierarchy-add-tree - (symbol-value symbol-tree) key nil - (lambda (item) - (hash-table-values (cadr (cdr (cdr (gethash item tree)))))))) - tree) - (hierarchy-print (symbol-value symbol-tree) - (lambda (item) - (cond ((= item 1) "root") - (t (let* ((node (gethash item tree)) - ;; (reduced (st-reduce-index node str)) - ) - (substring str (car node) - (cond ((integerp (cadr node)) (cadr node)))) - ;; (cond ((eq reduced 'end) "$") - ;; (t (apply #'string - ;; (append - ;; (seq-subseq str (car node) - ;; reduced - ;; ;; (cond ((integerp (cadr node)) (cadr node))) - ;; ) - ;; nil)))) - - ;; (format "%d): (%S, %S): (%d): \"%s\"" - ;; item - ;; (car node) - ;; (cadr node) - ;; (caddr node) - ;; (substring str (car node) (cond ((integerp (cadr node)) (cadr node)))) - ;; ) - ))))))) - -(provide 'suffix-tree) -;;; suffiex-tree.el ends here - -;;; archive - -;; ;;;###autoload -;; (defvar st-root nil -;; "The root of the suffix tree.") - -;; ;;;###autoload -;; (defvar st-position nil -;; "The current position in the string.") - -;; ;;;###autoload -;; (defvar st-wait-for-link nil -;; "The node that is waiting to have a suffix link.") - -;; ;;;###autoload -;; (defvar st-remain nil -;; "The number of remaining suffixes to insert.") - -;; ;;;###autoload -;; (defvar st-active-node nil -;; "Where to insert a new suffix.") - -;; ;;;###autoload -;; (defvar st-active-edge-index nil -;; "The index of the active edge.") - -;; ;;;###autoload -;; (defvar st-active-length nil -;; "How far down is the active point down the active edge.") - -;; (insert (format "after character %c, the tree becomes\n" character)) -;; (insert (format "string is %s\n" (substring str 0 (1+ position)))) -;; (insert (format "active-node: %d\n" active-node)) -;; (insert (format "active-edge: %c\n" (aref str active-edge-index))) -;; (insert (format "active-length: %d\n" active-length)) -;; (cond ((eq last-added 11) -;; (insert (format "slink: %d\n" (caddr (gethash active-node tree)))) -;; (insert (format "con: %S" continue-p)) -;; (insert (format "breakp: %S\n" breakp)))) -;; (st-print-tree tree (substring str 0 (1+ position))) -;; (insert "\n\n") - - -;; ;;;###autoload -;; (defvar st-output-buffer-name "*suffix-tree*" -;; "The name of the buffer that contains the output.") - -;; ;;;###autoload -;; (defun st-print-tree (tree node str prefix) -;; "Print TREE in the dedicated output buffer starting at NODE. -;; PREFIX is used to recursively call this function. - -;; We need STR since the information we stored in the tree -;; only contains the index into STR, -;; for the sake of optimizations." -;; (with-temp-buffer-window st-output-buffer-name '((display-buffer-at-bottom)) nil -;; (st-print-tree-internal tree node str prefix))) - -;; ;;;###autoload -;; (defun st-print-tree-internal (tree node str prefix) -;; "The function that really prints out the data." -;; (let* ((node-data (gethash node tree)) -;; (start (car node-data)) -;; (end (cadr node-data)) -;; (edges (cdddr node-data)) -;; (node-num (format "(%d)" node)) -;; (node-label (format "%s-%s-%s" -;; prefix node-num -;; (substring str start end)))) -;; (prin1 node-label) -;; ;; print the first edge -;; (let* ((prefix-base (concat -;; prefix -;; (make-string (length node-label) 32))) -;; ;; TODO -;; )))) - - |