From e2fe5882ad434b433bfba4529abdd2666d9c3d99 Mon Sep 17 00:00:00 2001
From: JSDurand <mmemmew@gmail.com>
Date: Thu, 14 Jan 2021 01:14:27 +0800
Subject: A temporary bug fix

There are more bugs unfortunately.
---
 suffix tree/LCS.el                     |  31 ++++--
 suffix tree/generalized-suffix-tree.el | 122 +++++++++++++++--------
 suffix tree/gst test ground.txt        | 176 +++++++++++++++++++++++++++++++++
 suffix tree/suffix-tree files.zip      | Bin 14879 -> 0 bytes
 4 files changed, 281 insertions(+), 48 deletions(-)
 delete mode 100644 suffix tree/suffix-tree files.zip

(limited to 'suffix tree')

diff --git a/suffix tree/LCS.el b/suffix tree/LCS.el
index 989c25e..41bcec0 100644
--- a/suffix tree/LCS.el	
+++ b/suffix tree/LCS.el	
@@ -24,9 +24,15 @@
 ;;;###autoload
 (defun lcs-edge-length (node str-lens)
   "Return the length of the edge going into NODE.
-Unlike `gst-edge-length', this does not need to know about POSITION
-and NUM, since it is assumed that the tree is already built
-before calling this function."
+
+Unlike `gst-edge-length', this does not need to know about
+POSITION and NUM, since it is assumed that the tree is already
+built before calling this function.
+
+If the node is a leaf, its end is denoted by infty, and we
+wouldn't know the exact length if we don't know the length of the
+string it corresponds to, so we need the argument STR-LENS to
+provide that information."
   (let ((start (car (cdr node)))
         (end (cond
               ((eq (car (cdr (cdr node))) 'infty)
@@ -47,7 +53,11 @@ before calling this function."
 
 ;;;###autoload
 (defun lcs-get-env (node str-lens)
-  "Return the information we want to keep on the stack."
+  "Return the information we want to keep from NODE on the stack.
+
+STR-LENS is here to provide information about the ending index of
+NODE if it is a lead. See the documentation for `lcs-edge-length'
+for more."
   (let ((num (car node))
         (start (car (cdr node)))
         (end (cond
@@ -93,7 +103,10 @@ before calling this function."
 
 ;;;###autoload
 (defun lcs-pretty-stack (stack)
-  "Prettify a stack."
+  "Prettify STACK.
+
+Apply `lcs-pretty-stack-element' to each element, and concatenate
+them in a clean way."
   (concat
    "[ "
    (mapconcat #'lcs-pretty-stack-element
@@ -146,7 +159,7 @@ Of course, without STRS, we don't even know what our strings are."
   "Return the longest common substring of a list STRS of strings.
 
 If JUST-LENGTH is non-nil, then this only returns the length of the
-longest common substring. Otherwise, it returns the length as well as the 
+longest common substring. Otherwise, it returns the length as well as the
 longest common substring itself."
   (let* ((strs (mapcar (lambda (str)
                          (vconcat str (list -1)))
@@ -223,9 +236,9 @@ longest common substring itself."
                                   current-children)
                          temp)
                        ;; An alternative
-                       ;; (apply #'logior                                      
-                       ;;        (mapcar (lambda (num)                         
-                       ;;                  (gethash num bits-table))           
+                       ;; (apply #'logior
+                       ;;        (mapcar (lambda (num)
+                       ;;                  (gethash num bits-table))
                        ;;                (hash-table-values current-children)))
                        ))
               (setq temp nil)
diff --git a/suffix tree/generalized-suffix-tree.el b/suffix tree/generalized-suffix-tree.el
index 227aea9..07d9b18 100644
--- a/suffix tree/generalized-suffix-tree.el	
+++ b/suffix tree/generalized-suffix-tree.el	
@@ -45,7 +45,9 @@ every number."
 (defun gst-edge-length (node position num str-lens)
   "Return the length of the edge into NODE.
 See the comment above this function for the reason
-POSITION and NUM are here."
+POSITION and NUM are here.
+
+And STR-LENS is here so that we know the lengths of previous strings."
   (let* ((node-active-num (car node))
          (leafp (eq (car (cdr (cdr node))) 'infty))
          (leaf-labels (cond (leafp (car (nthcdr 5 node))))))
@@ -83,10 +85,12 @@ LAST-ADDED is the number of elements already in the TREE."
 
 ;;;###autoload
 (defun gst-add-leaf-label (tree leaf number start)
-  "Add a label to LEAF."
+  "Add a label to LEAF in TREE.
+NUMBER and START represent the label."
   (let* ((actual-node (gethash leaf tree))
          (leaf-labels (cdr (cdr (cdr (cdr (cdr actual-node)))))))
-    (cond ((consp leaf-labels)
+    (cond ((and (consp leaf-labels)
+                (not (eq (caaar leaf-labels) number)))
            (setcar leaf-labels
                    (cons (list number start)
                          (car leaf-labels)))))))
@@ -106,7 +110,16 @@ This always returns NODE."
 ;;;###autoload
 (defun gst-canonize (tree node position active-number active-edge-index active-length active-node
                           str-lens)
-  "Walk down TREE to find the correct active point."
+  "Walk down TREE to find the correct active point.
+
+To be precise, we start from NODE, use POSITION, ACTIVE-NUMBER,
+and STR-LENS to calculate the length of the edge, and compare
+with ACTIVE-LENGTH. If it is less than that, then that means we
+shall continue walking down the tree, so we return t in the first
+element, while setting other variables to the correct values so
+that the caller of this function can use the updated values.
+
+ACTIVE-EDGE-INDEX is for the purpose of updating solely."
   (let* ((actual-node (gethash node tree))
          (node-edge-length (gst-edge-length actual-node position active-number
                                             str-lens)))
@@ -128,23 +141,31 @@ This always returns NODE."
 
 ;;;###autoload
 (defsubst gst-aref (strs num index)
-  "Return the INDEX-th element in NUM-th element of STRS."
+  "Return the INDEX th element in NUM th element of STRS."
   (aref (nth num strs) index))
 
 ;;;###autoload
 (defun gst-extend-tree (tree last-added position remain
                              active-node active-number active-edge-index
                              active-length character strs num str-lens)
-  "Extend a tree by CHARACTER in NUM-th string of STRS.
+  "Extend TREE by CHARACTER in NUM-th string of STRS.
 The return value is
-(tree 
+\(tree
  last-added    remain            active-node
- active-number active-edge-index active-length)"
+ active-number active-edge-index active-length)
+
+Other parameters: LAST-ADDED, POSITION, REMAIN, ACTIVE-NODE,
+ACTIVE-NUMBER, ACTIVE-EDGE-INDEX, and ACTIVE-LENGTH have special
+meanings in the algorithm.
+
+STR-LENS can be supplied so that we don't have to calculate the
+lengths repeatedly."
   (let* ((need-sl 0)
          (remain (1+ remain))
-         continue-p breakp)
+         continue-p breakp terminating-no-split)
     (while (and (not breakp) (> remain 0))
       (setq continue-p nil breakp nil)
+      (setq terminating-no-split nil)
       (cond
        ((= active-length 0)
         (setq active-edge-index position)
@@ -153,7 +174,7 @@ The return value is
              (nxt (cond
                    (actual-node
                     (gethash (gst-aref strs active-number active-edge-index)
-                             (cadr (cdr (cdr (cdr actual-node)))))))))
+                             (car (nthcdr 4 actual-node)))))))
         (cond
          ((null nxt)
           (let ((leaf (gst-new-node tree last-added num position nil
@@ -161,7 +182,7 @@ The return value is
             (setq last-added leaf)
             (puthash (gst-aref strs active-number active-edge-index)
                      leaf
-                     (cadr (cdr (cdr (cdr (gethash active-node tree))))))
+                     (car (nthcdr 4 (gethash active-node tree))))
             (setq need-sl (gst-add-suffix-link tree need-sl active-node)))
           ;; rule 2
           )
@@ -188,14 +209,22 @@ The return value is
                 ;; observation 1
                 (setq active-length (1+ active-length))
                 (setq need-sl (gst-add-suffix-link tree need-sl active-node))
-                (setq breakp t)
-                ;; add a label
+                ;; (setq breakp t)
+                ;; terminating symbol special handling
                 (cond
                  ((eq character -1)     ; terminating symbol
-                  (gst-add-leaf-label tree nxt num (- position remain -1))))))
+                  (gst-add-leaf-label tree nxt num (- position remain -1))
+                  (setq active-length (1- active-length))
+                  ;; We don't want to split since this is a match. But
+                  ;; we don't want to break since this is a
+                  ;; terminating symbol. So we invent a new variable
+                  ;; to do this.
+                  (setq terminating-no-split t))
+                 (t
+                  (setq breakp t)))))
               (cond
-               (breakp)
-               (t ;; splitting
+               ((or breakp terminating-no-split))
+               (t                       ; splitting
                 (let ((split (gst-new-node
                               tree last-added (car (gethash nxt tree)) (cadr (gethash nxt tree))
                               (+ (cadr (gethash nxt tree)) active-length))))
@@ -206,11 +235,14 @@ The return value is
                   (let ((leaf (gst-new-node tree last-added num position
                                             nil (- position remain -1))))
                     (setq last-added leaf)
-                    (puthash character leaf
+                    (puthash character
+                             leaf
                              (cadddr (cdr (gethash split tree))))
                     (setcar (cdr (gethash nxt tree))
-                            (+ (cadr (gethash nxt tree))
-                               active-length))
+                            (min
+                             (+ (cadr (gethash nxt tree))
+                                active-length)
+                             (1- (nth (car (gethash nxt tree)) str-lens))))
                     (puthash (gst-aref strs
                                        (car (gethash nxt tree))
                                        (cadr (gethash nxt tree)))
@@ -258,9 +290,9 @@ lengths again."
              (active-number index)
              (active-edge-index 0)
              (active-length 0)
-             old-character result)                    ; temporary holder
+             result)                    ; temporary holder
         (while character
-          (setq old-character character)
+          ;; (setq old-character character)
           (setq result (gst-extend-tree tree last-added position remain
                                         active-node active-number active-edge-index
                                         active-length character strs index
@@ -274,22 +306,24 @@ lengths again."
           (setq active-length (pop result))
           (setq position (1+ position))
           (setq character (ignore-errors (gst-aref strs index position)))
-          (cond
-           ((characterp old-character)
-            (insert (format "After adding character %c:\n" old-character))
-            (gst-print-tree tree
-                            (append (seq-take strs index)
-                                    (list (concat
-                                           (seq-take (nth index strs) position)
-                                           "$"))))
-            (insert "\n\n"))
-           ((= old-character -1)
-            (insert (format "After adding character -1:\n"))
-            (gst-print-tree tree
-                            (seq-take strs (1+ index)))
-            (insert "\n\n")))))
+          ;; (cond
+          ;;  ((characterp old-character)
+          ;;   (insert (format "After adding character %c:\n" old-character))
+          ;;   (gst-print-tree tree
+          ;;                   (append (seq-take strs index)
+          ;;                           (list (concat
+          ;;                                  (seq-take (nth index strs) position)
+          ;;                                  "$"))))
+          ;;   (insert "\n\n"))
+          ;;  ((= old-character -1)
+          ;;   (insert (format "After adding character -1:\n"))
+          ;;   (gst-print-tree tree
+          ;;                   (seq-take strs (1+ index)))
+          ;;   (insert "\n\n")))
+          ))
       (setq index (1+ index))
-      (insert "\n\n"))
+      ;; (insert "\n\n")
+      )
     tree))
 
 ;;; Printing
@@ -312,7 +346,10 @@ lengths again."
 
 ;;;###autoload
 (defun gst-pretty-hash-table (table)
-  "Only returns the useful parts from a table."
+  "Only return the useful parts from TABLE.
+
+To be precise, this returns the keys and values of the
+hash-table, in the form of an alist."
   (let (keys-values-alist)
     (maphash
      (lambda (key value)
@@ -353,7 +390,13 @@ lengths again."
 ;;;###autoload
 (defun gst-print-tree (tree strs)
   "Print TREE with the aid of STRS."
-  (let* ((symbol-tree (make-symbol "new-hierarchy")))
+  (let* ((symbol-tree (make-symbol "new-hierarchy"))
+         (strs (mapcar (lambda (str)
+                         (mapcar (lambda (c)
+                                   (cond ((eq c -1) ?$)
+                                         (c)))
+                                 str))
+                       strs)))
     (set symbol-tree (hierarchy-new))
     (maphash
      (lambda (key value)
@@ -395,4 +438,5 @@ lengths again."
                         (t actual-str)))))))))
 
 (provide 'generalized-suffix-tree)
-;;; generalized-suffiex-tree.el ends here
+
+;;; generalized-suffix-tree.el ends here
diff --git a/suffix tree/gst test ground.txt b/suffix tree/gst test ground.txt
index 2dad02e..84524cf 100644
--- a/suffix tree/gst test ground.txt	
+++ b/suffix tree/gst test ground.txt	
@@ -43,6 +43,8 @@ root
 
 
 
+Debugging...
+
 After adding character i:
 root
   i$ (0 : 0)
@@ -12289,3 +12291,177 @@ root
 
 
 
+
+Debugging continues.
+
+Generalized suffix tree for: ido-completions, comint-completion-at-point, previous-completion, completion-flex--make-flex-pattern, eldoc-remove-command-completions:
+root
+  i
+    o
+      us-completion$ (2 : 4)
+      n
+        -
+          flex--make-flex-pattern$ (3 : 7)
+          at-point$ (1 : 14)
+        s$ (4 : 28)$ (0 : 11)
+        $ (2 : 16)
+    do-completions$ (0 : 0)
+    nt
+      $ (1 : 23)
+      -completion-at-point$ (1 : 3)
+  d
+    -completions$ (4 : 19)
+    o
+      c-remove-command-completions$ (4 : 2)
+      -completions$ (0 : 1)
+  o
+    m
+      int-completion-at-point$ (1 : 1)
+      pletion
+        -
+          flex--make-flex-pattern$ (3 : 1)
+          at-point$ (1 : 8)
+        s$ (4 : 22)$ (0 : 5)
+        $ (2 : 10)
+      mand-completions$ (4 : 14)
+    -completions$ (0 : 2)
+    n
+      -
+        flex--make-flex-pattern$ (3 : 8)
+        at-point$ (1 : 15)
+      s$ (4 : 29)$ (0 : 12)
+      $ (2 : 17)
+    int$ (1 : 22)
+    us-completion$ (2 : 5)
+    c-remove-command-completions$ (4 : 3)
+    ve-command-completions$ (4 : 9)
+  -
+    at-point$ (1 : 17)
+    com
+      mand-completions$ (4 : 12)
+      pletion
+        -at-point$ (1 : 6)
+        s$ (4 : 20)$ (0 : 3)
+        $ (2 : 8)
+    p
+      attern$ (3 : 26)
+      oint$ (1 : 20)
+    flex-
+      pattern$ (3 : 21)
+      -make-flex-pattern$ (3 : 10)
+    -make-flex-pattern$ (3 : 15)
+    make-flex-pattern$ (3 : 16)
+    remove-command-completions$ (4 : 5)
+  c
+    -remove-command-completions$ (4 : 4)
+    om
+      int-completion-at-point$ (1 : 0)
+      pletion
+        -
+          flex--make-flex-pattern$ (3 : 0)
+          at-point$ (1 : 7)
+        s$ (4 : 21)$ (0 : 4)
+        $ (2 : 9)
+      mand-completions$ (4 : 13)
+  m
+    int-completion-at-point$ (1 : 2)
+    pletion
+      -
+        flex--make-flex-pattern$ (3 : 2)
+        at-point$ (1 : 9)
+      s$ (4 : 23)$ (0 : 6)
+      $ (2 : 11)
+    a
+      nd-completions$ (4 : 16)
+      ke-flex-pattern$ (3 : 17)
+    ove-command-completions$ (4 : 8)
+    mand-completions$ (4 : 15)
+  p
+    oint$ (1 : 21)
+    letion
+      -
+        flex--make-flex-pattern$ (3 : 3)
+        at-point$ (1 : 10)
+      s$ (4 : 24)$ (0 : 7)
+      $ (2 : 12)
+    revious-completion$ (2 : 0)
+    attern$ (3 : 27)
+  l
+    doc-remove-command-completions$ (4 : 1)
+    e
+      x-
+        pattern$ (3 : 23)
+        -make-flex-pattern$ (3 : 12)
+      tion
+        -
+          flex--make-flex-pattern$ (3 : 4)
+          at-point$ (1 : 11)
+        s$ (4 : 25)$ (0 : 8)
+        $ (2 : 13)
+  e
+    vious-completion$ (2 : 2)
+    tion
+      -
+        flex--make-flex-pattern$ (3 : 5)
+        at-point$ (1 : 12)
+      s$ (4 : 26)$ (0 : 9)
+      $ (2 : 14)
+    x-
+      pattern$ (3 : 24)
+      -make-flex-pattern$ (3 : 13)
+    -
+      command-completions$ (4 : 11)
+      flex-pattern$ (3 : 20)
+    rn$ (3 : 31)
+    ldoc-remove-command-completions$ (4 : 0)
+    move-command-completions$ (4 : 7)
+  t
+    -
+      point$ (1 : 19)
+      completion-at-point$ (1 : 5)
+    ion
+      -
+        flex--make-flex-pattern$ (3 : 6)
+        at-point$ (1 : 13)
+      s$ (4 : 27)$ (0 : 10)
+      $ (2 : 15)
+    $ (1 : 25)
+    tern$ (3 : 29)
+    ern$ (3 : 30)
+  n
+    t
+      $ (1 : 24)
+      -completion-at-point$ (1 : 4)
+    s$ (4 : 30)$ (0 : 13)
+    -
+      flex--make-flex-pattern$ (3 : 9)
+      at-point$ (1 : 16)
+    $ (3 : 33)$ (2 : 18)
+    d-completions$ (4 : 18)
+  s
+    -completion$ (2 : 7)
+    $ (4 : 31)$ (0 : 14)
+  $ (4 : 32)$ (3 : 34)$ (2 : 19)$ (1 : 26)$ (0 : 15)
+  a
+    ke-flex-pattern$ (3 : 18)
+    t
+      tern$ (3 : 28)
+      -point$ (1 : 18)
+    nd-completions$ (4 : 17)
+  r
+    n$ (3 : 32)
+    e
+      move-command-completions$ (4 : 6)
+      vious-completion$ (2 : 1)
+  v
+    e-command-completions$ (4 : 10)
+    ious-completion$ (2 : 3)
+  us-completion$ (2 : 6)
+  flex-
+    pattern$ (3 : 22)
+    -make-flex-pattern$ (3 : 11)
+  x-
+    pattern$ (3 : 25)
+    -make-flex-pattern$ (3 : 14)
+  ke-flex-pattern$ (3 : 19)
+
diff --git a/suffix tree/suffix-tree files.zip b/suffix tree/suffix-tree files.zip
deleted file mode 100644
index 946352b..0000000
Binary files a/suffix tree/suffix-tree files.zip and /dev/null differ
-- 
cgit v1.2.3-18-g5258