summaryrefslogtreecommitdiff
path: root/suffix tree/suffix-tree.el
diff options
context:
space:
mode:
Diffstat (limited to 'suffix tree/suffix-tree.el')
-rw-r--r--suffix tree/suffix-tree.el397
1 files changed, 0 insertions, 397 deletions
diff --git a/suffix tree/suffix-tree.el b/suffix tree/suffix-tree.el
deleted file mode 100644
index 0d929d0..0000000
--- a/suffix tree/suffix-tree.el
+++ /dev/null
@@ -1,397 +0,0 @@
-;;; suffiex-tree.el --- Ukkonen algorithm for building a suffix tree -*- lexical-binding: t; -*-
-
-;;; Author: Durand
-;;; Version: 0.0.1
-
-;;; Commentary:
-
-;; Our node is represented as a list of the following elements:
-
-;; start , which is the starting index of the edge going from its parent
-;; node to this node
-;; end , the index of the end of the above edge
-;; suffix-link, the index of the node this node's suffix-link points to
-;; children , a hash-table of pairs of integers and indices of its
-;; children
-
-;; To compute the length of the edge going into NODE we use:
-;; (- (min end (1+ position)) start)
-;; which is actually how far the position is on that edge,
-;; if it is on that edge.
-
-;;; Code:
-
-;;;###autoload
-(defun st-min (&rest args)
- "Return the minimum among ARGS.
-If an argument is 'infty, then it is considered greater than
-every number."
- (apply #'min (delq nil
- (mapcar
- (lambda (arg)
- (cond
- ((number-or-marker-p arg) arg)
- ((eq arg 'infty) nil)))
- args))))
-
-;;;###autoload
-(defun st-edge-length (node position)
- "Return the length of the edge into NODE.
-See the comment above this function for the reason
-POSITION is here."
- (- (st-min (car (cdr node)) (1+ position))
- (car node)))
-
-;;;###autoload
-(defun st-new-node (tree last-added start &optional end)
- "Make a new node with START and END as the coming edge.
-Then add the new node to TREE.
-LAST-ADDED is the number of elements already in the TREE."
- (let* ((end (or end 'infty) ;; 'infty represents the index of a leaf
- )
- (suffix-link 0) ;; The suffix link is initially 0
- (new-node
- (list start end
- suffix-link
- (make-hash-table))))
- (puthash (1+ last-added) new-node tree)
- (1+ last-added)))
-
-;;;###autoload
-(defun st-add-suffix-link (tree need-sl node)
- "If NEED-SL is positive, then add the suffix link.
-In particular, the node corresponding to NEED-SL in TREE
-gets a suffix link pointing to NODE.
-
-This always returns NODE."
- (cond
- ((and (> need-sl 1) (/= need-sl node))
- (setcar (cdr (cdr (gethash need-sl tree))) node)))
- node)
-
-;;;###autoload
-(defun st-canonize (tree node position active-edge-index active-length active-node)
- "Walk down TREE to find the correct active point."
- (let ((node-edge-length (st-edge-length (gethash node tree) position)))
- (cond
- ((>= active-length node-edge-length)
- (list t
- (+ active-edge-index
- node-edge-length)
- (- active-length
- node-edge-length)
- node))
- (t
- (list nil
- active-edge-index
- active-length
- active-node)))))
-
-;;;###autoload
-(defun st-extend-tree (tree last-added position remain
- active-node active-edge-index
- active-length character str)
- "Extend a tree by CHARACTER.
-The return value is
-(tree last-added remain active-node
- active-edge-index active-length)"
- (let* ((need-sl 0)
- (remain (1+ remain))
- continue-p breakp)
- (while (and (not breakp) (> remain 0))
- (setq continue-p nil breakp nil)
- (cond
- ((= active-length 0)
- (setq active-edge-index position)))
- (let* ((actual-node (gethash active-node tree))
- (nxt (cond
- (actual-node
- (gethash (aref str active-edge-index)
- (cadr (cdr (cdr actual-node))))))))
- (cond
- ((null nxt)
- (let ((leaf (st-new-node tree last-added position)))
- (setq last-added leaf)
- (puthash (aref str active-edge-index)
- leaf
- (cadr (cdr (cdr (gethash active-node tree)))))
- (setq need-sl (st-add-suffix-link tree need-sl active-node)))
- ;; rule 2
- )
- (t
- (let* ((result (st-canonize
- tree nxt position active-edge-index
- active-length active-node)))
- (cond
- ((car result)
- ;; observation 2
- (setq active-edge-index (cadr result))
- (setq active-length (caddr result))
- (setq active-node (cadr (cddr result)))
- (setq continue-p t))
- (t
- (cond
- ((eq (aref str (+ active-length
- (car (gethash nxt tree))))
- character)
- ;; observation 1
- (setq active-length (1+ active-length))
- (setq need-sl (st-add-suffix-link tree need-sl active-node))
- (setq breakp t)))
- (cond
- (breakp)
- (t ;; splitting
- (let ((split (st-new-node
- tree last-added (car (gethash nxt tree))
- (+ (car (gethash nxt tree)) active-length))))
- (setq last-added split)
- (puthash
- (aref str active-edge-index)
- split (cadr (cdr (cdr (gethash active-node tree)))))
- (let ((leaf (st-new-node tree last-added position)))
- (setq last-added leaf)
- (puthash character leaf
- (cadddr (gethash split tree)))
- (setcar (gethash nxt tree)
- (+ (car (gethash nxt tree))
- active-length))
- (puthash (aref str (car (gethash nxt tree))) nxt
- (cadddr (gethash split tree)))
- ;; rule 2
- (setq need-sl
- (st-add-suffix-link tree need-sl split)))))))))))
- (cond
- ((or continue-p breakp))
- (t
- (setq remain (1- remain))
- (cond
- ((and (eq active-node 1) ; root
- (> active-length 0))
- (setq active-length (1- active-length))
- (setq active-edge-index
- (1+ (- position remain))))
- (t
- (setq active-node
- (let ((slink (caddr (gethash active-node tree))))
- (cond
- ((> slink 1) slink)
- ; or root
- (t 1))))))))))
- (list tree last-added remain active-node
- active-edge-index active-length)))
-
-;;;###autoload
-(defun st-build-for-str (str)
- "Build the suffix tree for STR."
- (let* ((position 0)
- (character (ignore-error 'error (aref str position)))
- (tree (make-hash-table))
- (last-added 0)
- (remain 0)
- (active-node (st-new-node tree last-added -1 -1))
- (active-edge-index 0)
- (active-length 0)
- result)
- (setq last-added active-node)
- (while character
- (setq result (st-extend-tree tree last-added position remain
- active-node active-edge-index
- active-length character str))
- (setq tree (pop result))
- (setq last-added (pop result))
- (setq remain (pop result))
- (setq active-node (pop result))
- (setq active-edge-index (pop result))
- (setq active-length (pop result))
- (setq position (1+ position))
- (setq character (ignore-error 'error (aref str position))))
- tree))
-
-;;; Some printing functions
-
-(require 'hierarchy)
-
-;;;###autoload
-(defun st-print-str (str)
- "Print generalized string"
- (mapc (lambda (char)
- (cond ((characterp char) (insert char))
- (t (insert ", "))))
- str))
-
-;;;###autoload
-(defun st-print-tree-for-str (str)
- "Print the suffix tree for STR."
- (let* ((symbol (make-symbol "test")))
- (set symbol (st-build-for-str str))
- (insert "suffix tree for strings: ")
- (st-print-str str)
- (insert "\n")
- (st-print-tree (symbol-value symbol) str)))
-
-;;;###autoload
-(defun st-reduce-index (node strs)
- "Reduce the second element of the NODE."
- (let* ((lengths (mapcar #'length strs))
- (start (car node))
- (end (cadr node))
- (first-greater-than-start
- (let ((sum -1) (index 0) done result)
- (while (and (not done) (< index (length strs)))
- (cond
- ((<= (+ sum (nth index lengths)) start)
- (setq index (1+ index))
- (setq sum (+ sum (nth index lengths))))
- (t (setq result (+ sum (nth index lengths)))
- (setq done t))))
- (cond ((null result) (setq result (1- (apply #'+ lengths)))))
- result)))
- (st-min end first-greater-than-start))
- ;; (let* ((start (car node))
- ;; (end (cadr node))
- ;; (index start)
- ;; result stop)
- ;; (while (and (not stop) (or (eq end 'infty) (<= index end)))
- ;; (cond
- ;; ((and (< index (length str)) (characterp (aref str index))) (setq index (1+ index)))
- ;; (t (setq stop t)
- ;; (setq result index))))
- ;; (cond ((and (numberp end) (= index (1+ end))) (setq result end)))
- ;; (cond ((<= result start) (setq result 'end)))
- ;; result)
- )
-
-;; ;;;###autoload
-;; (defun st-build-generalized-tree (strs)
-;; (let* ((copy strs)
-;; (long-str
-;; (let ((index 1) result)
-;; (while (consp copy)
-;; (setq result (vconcat result
-;; (car copy)
-;; (vector (- index))))
-;; (setq index (1+ index))
-;; (setq copy (cdr copy)))
-;; result))
-;; (tree (make-symbol "tree")))
-;; (set tree (st-build-for-str long-str))))
-
-;;;###autoload
-(defun st-print-tree (tree str)
- "Print TREE with the aid of STR."
- (let* ((symbol-tree (make-symbol "test-tree")))
- (set symbol-tree (hierarchy-new))
- (maphash
- (lambda (key value)
- (hierarchy-add-tree
- (symbol-value symbol-tree) key nil
- (lambda (item)
- (hash-table-values (cadr (cdr (cdr (gethash item tree))))))))
- tree)
- (hierarchy-print (symbol-value symbol-tree)
- (lambda (item)
- (cond ((= item 1) "root")
- (t (let* ((node (gethash item tree))
- ;; (reduced (st-reduce-index node str))
- )
- (substring str (car node)
- (cond ((integerp (cadr node)) (cadr node))))
- ;; (cond ((eq reduced 'end) "$")
- ;; (t (apply #'string
- ;; (append
- ;; (seq-subseq str (car node)
- ;; reduced
- ;; ;; (cond ((integerp (cadr node)) (cadr node)))
- ;; )
- ;; nil))))
-
- ;; (format "%d): (%S, %S): (%d): \"%s\""
- ;; item
- ;; (car node)
- ;; (cadr node)
- ;; (caddr node)
- ;; (substring str (car node) (cond ((integerp (cadr node)) (cadr node))))
- ;; )
- )))))))
-
-(provide 'suffix-tree)
-;;; suffiex-tree.el ends here
-
-;;; archive
-
-;; ;;;###autoload
-;; (defvar st-root nil
-;; "The root of the suffix tree.")
-
-;; ;;;###autoload
-;; (defvar st-position nil
-;; "The current position in the string.")
-
-;; ;;;###autoload
-;; (defvar st-wait-for-link nil
-;; "The node that is waiting to have a suffix link.")
-
-;; ;;;###autoload
-;; (defvar st-remain nil
-;; "The number of remaining suffixes to insert.")
-
-;; ;;;###autoload
-;; (defvar st-active-node nil
-;; "Where to insert a new suffix.")
-
-;; ;;;###autoload
-;; (defvar st-active-edge-index nil
-;; "The index of the active edge.")
-
-;; ;;;###autoload
-;; (defvar st-active-length nil
-;; "How far down is the active point down the active edge.")
-
-;; (insert (format "after character %c, the tree becomes\n" character))
-;; (insert (format "string is %s\n" (substring str 0 (1+ position))))
-;; (insert (format "active-node: %d\n" active-node))
-;; (insert (format "active-edge: %c\n" (aref str active-edge-index)))
-;; (insert (format "active-length: %d\n" active-length))
-;; (cond ((eq last-added 11)
-;; (insert (format "slink: %d\n" (caddr (gethash active-node tree))))
-;; (insert (format "con: %S" continue-p))
-;; (insert (format "breakp: %S\n" breakp))))
-;; (st-print-tree tree (substring str 0 (1+ position)))
-;; (insert "\n\n")
-
-
-;; ;;;###autoload
-;; (defvar st-output-buffer-name "*suffix-tree*"
-;; "The name of the buffer that contains the output.")
-
-;; ;;;###autoload
-;; (defun st-print-tree (tree node str prefix)
-;; "Print TREE in the dedicated output buffer starting at NODE.
-;; PREFIX is used to recursively call this function.
-
-;; We need STR since the information we stored in the tree
-;; only contains the index into STR,
-;; for the sake of optimizations."
-;; (with-temp-buffer-window st-output-buffer-name '((display-buffer-at-bottom)) nil
-;; (st-print-tree-internal tree node str prefix)))
-
-;; ;;;###autoload
-;; (defun st-print-tree-internal (tree node str prefix)
-;; "The function that really prints out the data."
-;; (let* ((node-data (gethash node tree))
-;; (start (car node-data))
-;; (end (cadr node-data))
-;; (edges (cdddr node-data))
-;; (node-num (format "(%d)" node))
-;; (node-label (format "%s-%s-%s"
-;; prefix node-num
-;; (substring str start end))))
-;; (prin1 node-label)
-;; ;; print the first edge
-;; (let* ((prefix-base (concat
-;; prefix
-;; (make-string (length node-label) 32)))
-;; ;; TODO
-;; ))))
-
-