diff options
author | JSDurand <mmemmew@gmail.com> | 2021-01-14 01:14:27 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2021-01-14 01:14:27 +0800 |
commit | e2fe5882ad434b433bfba4529abdd2666d9c3d99 (patch) | |
tree | 815816360fb0bae6ad4c237e56c37360c3334893 /suffix tree | |
parent | 3666deaed5b0baf0a74f14db5872105c9e7865f9 (diff) |
A temporary bug fix
There are more bugs unfortunately.
Diffstat (limited to 'suffix tree')
-rw-r--r-- | suffix tree/LCS.el | 31 | ||||
-rw-r--r-- | suffix tree/generalized-suffix-tree.el | 122 | ||||
-rw-r--r-- | suffix tree/gst test ground.txt | 176 | ||||
-rw-r--r-- | suffix tree/suffix-tree files.zip | bin | 14879 -> 0 bytes |
4 files changed, 281 insertions, 48 deletions
diff --git a/suffix tree/LCS.el b/suffix tree/LCS.el index 989c25e..41bcec0 100644 --- a/suffix tree/LCS.el +++ b/suffix tree/LCS.el @@ -24,9 +24,15 @@ ;;;###autoload (defun lcs-edge-length (node str-lens) "Return the length of the edge going into NODE. -Unlike `gst-edge-length', this does not need to know about POSITION -and NUM, since it is assumed that the tree is already built -before calling this function." + +Unlike `gst-edge-length', this does not need to know about +POSITION and NUM, since it is assumed that the tree is already +built before calling this function. + +If the node is a leaf, its end is denoted by infty, and we +wouldn't know the exact length if we don't know the length of the +string it corresponds to, so we need the argument STR-LENS to +provide that information." (let ((start (car (cdr node))) (end (cond ((eq (car (cdr (cdr node))) 'infty) @@ -47,7 +53,11 @@ before calling this function." ;;;###autoload (defun lcs-get-env (node str-lens) - "Return the information we want to keep on the stack." + "Return the information we want to keep from NODE on the stack. + +STR-LENS is here to provide information about the ending index of +NODE if it is a lead. See the documentation for `lcs-edge-length' +for more." (let ((num (car node)) (start (car (cdr node))) (end (cond @@ -93,7 +103,10 @@ before calling this function." ;;;###autoload (defun lcs-pretty-stack (stack) - "Prettify a stack." + "Prettify STACK. + +Apply `lcs-pretty-stack-element' to each element, and concatenate +them in a clean way." (concat "[ " (mapconcat #'lcs-pretty-stack-element @@ -146,7 +159,7 @@ Of course, without STRS, we don't even know what our strings are." "Return the longest common substring of a list STRS of strings. If JUST-LENGTH is non-nil, then this only returns the length of the -longest common substring. Otherwise, it returns the length as well as the +longest common substring. Otherwise, it returns the length as well as the longest common substring itself." (let* ((strs (mapcar (lambda (str) (vconcat str (list -1))) @@ -223,9 +236,9 @@ longest common substring itself." current-children) temp) ;; An alternative - ;; (apply #'logior - ;; (mapcar (lambda (num) - ;; (gethash num bits-table)) + ;; (apply #'logior + ;; (mapcar (lambda (num) + ;; (gethash num bits-table)) ;; (hash-table-values current-children))) )) (setq temp nil) diff --git a/suffix tree/generalized-suffix-tree.el b/suffix tree/generalized-suffix-tree.el index 227aea9..07d9b18 100644 --- a/suffix tree/generalized-suffix-tree.el +++ b/suffix tree/generalized-suffix-tree.el @@ -45,7 +45,9 @@ every number." (defun gst-edge-length (node position num str-lens) "Return the length of the edge into NODE. See the comment above this function for the reason -POSITION and NUM are here." +POSITION and NUM are here. + +And STR-LENS is here so that we know the lengths of previous strings." (let* ((node-active-num (car node)) (leafp (eq (car (cdr (cdr node))) 'infty)) (leaf-labels (cond (leafp (car (nthcdr 5 node)))))) @@ -83,10 +85,12 @@ LAST-ADDED is the number of elements already in the TREE." ;;;###autoload (defun gst-add-leaf-label (tree leaf number start) - "Add a label to LEAF." + "Add a label to LEAF in TREE. +NUMBER and START represent the label." (let* ((actual-node (gethash leaf tree)) (leaf-labels (cdr (cdr (cdr (cdr (cdr actual-node))))))) - (cond ((consp leaf-labels) + (cond ((and (consp leaf-labels) + (not (eq (caaar leaf-labels) number))) (setcar leaf-labels (cons (list number start) (car leaf-labels))))))) @@ -106,7 +110,16 @@ This always returns NODE." ;;;###autoload (defun gst-canonize (tree node position active-number active-edge-index active-length active-node str-lens) - "Walk down TREE to find the correct active point." + "Walk down TREE to find the correct active point. + +To be precise, we start from NODE, use POSITION, ACTIVE-NUMBER, +and STR-LENS to calculate the length of the edge, and compare +with ACTIVE-LENGTH. If it is less than that, then that means we +shall continue walking down the tree, so we return t in the first +element, while setting other variables to the correct values so +that the caller of this function can use the updated values. + +ACTIVE-EDGE-INDEX is for the purpose of updating solely." (let* ((actual-node (gethash node tree)) (node-edge-length (gst-edge-length actual-node position active-number str-lens))) @@ -128,23 +141,31 @@ This always returns NODE." ;;;###autoload (defsubst gst-aref (strs num index) - "Return the INDEX-th element in NUM-th element of STRS." + "Return the INDEX th element in NUM th element of STRS." (aref (nth num strs) index)) ;;;###autoload (defun gst-extend-tree (tree last-added position remain active-node active-number active-edge-index active-length character strs num str-lens) - "Extend a tree by CHARACTER in NUM-th string of STRS. + "Extend TREE by CHARACTER in NUM-th string of STRS. The return value is -(tree +\(tree last-added remain active-node - active-number active-edge-index active-length)" + active-number active-edge-index active-length) + +Other parameters: LAST-ADDED, POSITION, REMAIN, ACTIVE-NODE, +ACTIVE-NUMBER, ACTIVE-EDGE-INDEX, and ACTIVE-LENGTH have special +meanings in the algorithm. + +STR-LENS can be supplied so that we don't have to calculate the +lengths repeatedly." (let* ((need-sl 0) (remain (1+ remain)) - continue-p breakp) + continue-p breakp terminating-no-split) (while (and (not breakp) (> remain 0)) (setq continue-p nil breakp nil) + (setq terminating-no-split nil) (cond ((= active-length 0) (setq active-edge-index position) @@ -153,7 +174,7 @@ The return value is (nxt (cond (actual-node (gethash (gst-aref strs active-number active-edge-index) - (cadr (cdr (cdr (cdr actual-node))))))))) + (car (nthcdr 4 actual-node))))))) (cond ((null nxt) (let ((leaf (gst-new-node tree last-added num position nil @@ -161,7 +182,7 @@ The return value is (setq last-added leaf) (puthash (gst-aref strs active-number active-edge-index) leaf - (cadr (cdr (cdr (cdr (gethash active-node tree)))))) + (car (nthcdr 4 (gethash active-node tree)))) (setq need-sl (gst-add-suffix-link tree need-sl active-node))) ;; rule 2 ) @@ -188,14 +209,22 @@ The return value is ;; observation 1 (setq active-length (1+ active-length)) (setq need-sl (gst-add-suffix-link tree need-sl active-node)) - (setq breakp t) - ;; add a label + ;; (setq breakp t) + ;; terminating symbol special handling (cond ((eq character -1) ; terminating symbol - (gst-add-leaf-label tree nxt num (- position remain -1)))))) + (gst-add-leaf-label tree nxt num (- position remain -1)) + (setq active-length (1- active-length)) + ;; We don't want to split since this is a match. But + ;; we don't want to break since this is a + ;; terminating symbol. So we invent a new variable + ;; to do this. + (setq terminating-no-split t)) + (t + (setq breakp t))))) (cond - (breakp) - (t ;; splitting + ((or breakp terminating-no-split)) + (t ; splitting (let ((split (gst-new-node tree last-added (car (gethash nxt tree)) (cadr (gethash nxt tree)) (+ (cadr (gethash nxt tree)) active-length)))) @@ -206,11 +235,14 @@ The return value is (let ((leaf (gst-new-node tree last-added num position nil (- position remain -1)))) (setq last-added leaf) - (puthash character leaf + (puthash character + leaf (cadddr (cdr (gethash split tree)))) (setcar (cdr (gethash nxt tree)) - (+ (cadr (gethash nxt tree)) - active-length)) + (min + (+ (cadr (gethash nxt tree)) + active-length) + (1- (nth (car (gethash nxt tree)) str-lens)))) (puthash (gst-aref strs (car (gethash nxt tree)) (cadr (gethash nxt tree))) @@ -258,9 +290,9 @@ lengths again." (active-number index) (active-edge-index 0) (active-length 0) - old-character result) ; temporary holder + result) ; temporary holder (while character - (setq old-character character) + ;; (setq old-character character) (setq result (gst-extend-tree tree last-added position remain active-node active-number active-edge-index active-length character strs index @@ -274,22 +306,24 @@ lengths again." (setq active-length (pop result)) (setq position (1+ position)) (setq character (ignore-errors (gst-aref strs index position))) - (cond - ((characterp old-character) - (insert (format "After adding character %c:\n" old-character)) - (gst-print-tree tree - (append (seq-take strs index) - (list (concat - (seq-take (nth index strs) position) - "$")))) - (insert "\n\n")) - ((= old-character -1) - (insert (format "After adding character -1:\n")) - (gst-print-tree tree - (seq-take strs (1+ index))) - (insert "\n\n"))))) + ;; (cond + ;; ((characterp old-character) + ;; (insert (format "After adding character %c:\n" old-character)) + ;; (gst-print-tree tree + ;; (append (seq-take strs index) + ;; (list (concat + ;; (seq-take (nth index strs) position) + ;; "$")))) + ;; (insert "\n\n")) + ;; ((= old-character -1) + ;; (insert (format "After adding character -1:\n")) + ;; (gst-print-tree tree + ;; (seq-take strs (1+ index))) + ;; (insert "\n\n"))) + )) (setq index (1+ index)) - (insert "\n\n")) + ;; (insert "\n\n") + ) tree)) ;;; Printing @@ -312,7 +346,10 @@ lengths again." ;;;###autoload (defun gst-pretty-hash-table (table) - "Only returns the useful parts from a table." + "Only return the useful parts from TABLE. + +To be precise, this returns the keys and values of the +hash-table, in the form of an alist." (let (keys-values-alist) (maphash (lambda (key value) @@ -353,7 +390,13 @@ lengths again." ;;;###autoload (defun gst-print-tree (tree strs) "Print TREE with the aid of STRS." - (let* ((symbol-tree (make-symbol "new-hierarchy"))) + (let* ((symbol-tree (make-symbol "new-hierarchy")) + (strs (mapcar (lambda (str) + (mapcar (lambda (c) + (cond ((eq c -1) ?$) + (c))) + str)) + strs))) (set symbol-tree (hierarchy-new)) (maphash (lambda (key value) @@ -395,4 +438,5 @@ lengths again." (t actual-str))))))))) (provide 'generalized-suffix-tree) -;;; generalized-suffiex-tree.el ends here + +;;; generalized-suffix-tree.el ends here diff --git a/suffix tree/gst test ground.txt b/suffix tree/gst test ground.txt index 2dad02e..84524cf 100644 --- a/suffix tree/gst test ground.txt +++ b/suffix tree/gst test ground.txt @@ -43,6 +43,8 @@ root +Debugging... + After adding character i: root i$ (0 : 0) @@ -12289,3 +12291,177 @@ root + +Debugging continues. + +Generalized suffix tree for: ido-completions, comint-completion-at-point, previous-completion, completion-flex--make-flex-pattern, eldoc-remove-command-completions: +root + i + o + us-completion$ (2 : 4) + n + - + flex--make-flex-pattern$ (3 : 7) + at-point$ (1 : 14) + s$ (4 : 28)$ (0 : 11) + $ (2 : 16) + do-completions$ (0 : 0) + nt + $ (1 : 23) + -completion-at-point$ (1 : 3) + d + -completions$ (4 : 19) + o + c-remove-command-completions$ (4 : 2) + -completions$ (0 : 1) + o + m + int-completion-at-point$ (1 : 1) + pletion + - + flex--make-flex-pattern$ (3 : 1) + at-point$ (1 : 8) + s$ (4 : 22)$ (0 : 5) + $ (2 : 10) + mand-completions$ (4 : 14) + -completions$ (0 : 2) + n + - + flex--make-flex-pattern$ (3 : 8) + at-point$ (1 : 15) + s$ (4 : 29)$ (0 : 12) + $ (2 : 17) + int$ (1 : 22) + us-completion$ (2 : 5) + c-remove-command-completions$ (4 : 3) + ve-command-completions$ (4 : 9) + - + at-point$ (1 : 17) + com + mand-completions$ (4 : 12) + pletion + -at-point$ (1 : 6) + s$ (4 : 20)$ (0 : 3) + $ (2 : 8) + p + attern$ (3 : 26) + oint$ (1 : 20) + flex- + pattern$ (3 : 21) + -make-flex-pattern$ (3 : 10) + -make-flex-pattern$ (3 : 15) + make-flex-pattern$ (3 : 16) + remove-command-completions$ (4 : 5) + c + -remove-command-completions$ (4 : 4) + om + int-completion-at-point$ (1 : 0) + pletion + - + flex--make-flex-pattern$ (3 : 0) + at-point$ (1 : 7) + s$ (4 : 21)$ (0 : 4) + $ (2 : 9) + mand-completions$ (4 : 13) + m + int-completion-at-point$ (1 : 2) + pletion + - + flex--make-flex-pattern$ (3 : 2) + at-point$ (1 : 9) + s$ (4 : 23)$ (0 : 6) + $ (2 : 11) + a + nd-completions$ (4 : 16) + ke-flex-pattern$ (3 : 17) + ove-command-completions$ (4 : 8) + mand-completions$ (4 : 15) + p + oint$ (1 : 21) + letion + - + flex--make-flex-pattern$ (3 : 3) + at-point$ (1 : 10) + s$ (4 : 24)$ (0 : 7) + $ (2 : 12) + revious-completion$ (2 : 0) + attern$ (3 : 27) + l + doc-remove-command-completions$ (4 : 1) + e + x- + pattern$ (3 : 23) + -make-flex-pattern$ (3 : 12) + tion + - + flex--make-flex-pattern$ (3 : 4) + at-point$ (1 : 11) + s$ (4 : 25)$ (0 : 8) + $ (2 : 13) + e + vious-completion$ (2 : 2) + tion + - + flex--make-flex-pattern$ (3 : 5) + at-point$ (1 : 12) + s$ (4 : 26)$ (0 : 9) + $ (2 : 14) + x- + pattern$ (3 : 24) + -make-flex-pattern$ (3 : 13) + - + command-completions$ (4 : 11) + flex-pattern$ (3 : 20) + rn$ (3 : 31) + ldoc-remove-command-completions$ (4 : 0) + move-command-completions$ (4 : 7) + t + - + point$ (1 : 19) + completion-at-point$ (1 : 5) + ion + - + flex--make-flex-pattern$ (3 : 6) + at-point$ (1 : 13) + s$ (4 : 27)$ (0 : 10) + $ (2 : 15) + $ (1 : 25) + tern$ (3 : 29) + ern$ (3 : 30) + n + t + $ (1 : 24) + -completion-at-point$ (1 : 4) + s$ (4 : 30)$ (0 : 13) + - + flex--make-flex-pattern$ (3 : 9) + at-point$ (1 : 16) + $ (3 : 33)$ (2 : 18) + d-completions$ (4 : 18) + s + -completion$ (2 : 7) + $ (4 : 31)$ (0 : 14) + $ (4 : 32)$ (3 : 34)$ (2 : 19)$ (1 : 26)$ (0 : 15) + a + ke-flex-pattern$ (3 : 18) + t + tern$ (3 : 28) + -point$ (1 : 18) + nd-completions$ (4 : 17) + r + n$ (3 : 32) + e + move-command-completions$ (4 : 6) + vious-completion$ (2 : 1) + v + e-command-completions$ (4 : 10) + ious-completion$ (2 : 3) + us-completion$ (2 : 6) + flex- + pattern$ (3 : 22) + -make-flex-pattern$ (3 : 11) + x- + pattern$ (3 : 25) + -make-flex-pattern$ (3 : 14) + ke-flex-pattern$ (3 : 19) + diff --git a/suffix tree/suffix-tree files.zip b/suffix tree/suffix-tree files.zip Binary files differdeleted file mode 100644 index 946352b..0000000 --- a/suffix tree/suffix-tree files.zip +++ /dev/null |