summaryrefslogtreecommitdiff
path: root/src/crf.h
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-02-05 17:30:11 +0800
committerJSDurand <mmemmew@gmail.com>2022-02-05 17:30:11 +0800
commit510b10b96b546fcc6c6b6be85050305ddd192a41 (patch)
tree997d6c3f2c0a1ad6e27127d54a94655527e57864 /src/crf.h
parent3fb5430080199a6d92a63f8259fe4a88df9b83ba (diff)
replace some hash table usage by tuples
Previously I used hash tables, which consume too much memory. Now the critical parts are replaced by a new hand-written library called "tuple.h". Now we can easily parse larger inputs. I haven't tested its limits, though.
Diffstat (limited to 'src/crf.h')
-rw-r--r--src/crf.h80
1 files changed, 54 insertions, 26 deletions
diff --git a/src/crf.h b/src/crf.h
index 9d1cc78..b9955fe 100644
--- a/src/crf.h
+++ b/src/crf.h
@@ -1,5 +1,7 @@
#ifndef CRF_H
#define CRF_H
+#include "tuple.h"
+#include "grammar.h"
#include "ht.h"
/* This file implements some support functions for the Clustered
@@ -48,22 +50,42 @@
tables whose keys are (X, a, b, i) that are in the edge set of the
first type node, and whose values are meaningless. */
+/* Now, with the newly written library tuple.h, we implement this in a
+ different way. We first observe that we don't actually need those
+ nodes. What matters is the edges, the ability to know if an edge
+ exists between two given nodes, and to loop through the edges
+ starting from a given node. Then we record the existence of the
+ edge between (X, i) and (Y, a, b, j) by storing the tuple
+
+ (X, i, Y, a, b, j).
+
+ The only thing that is troublesome is to loop through the edges
+ from a given node. So we also observe that an edge is possible
+ only if j <= i. This ensures that we don't search impossible
+ edges. Hence the worst-case time complexity of the overall
+ algorithm is preserved (for every possibility, there really might
+ exist an edge indeed). The rest is premature optimization, I have
+ to proceed first. :P */
+
typedef struct crf_s crf;
-crf *new_crf();
+crf *new_crf(Grammar *g, NUM input_len);
/* on error return non-zero */
-BOOL crf_add_node(crf * const restrict crfp, pair2 node);
+BOOL crf_add_node(crf *crfp, pair2 node);
-/* if not found return NULL.
+HP_ATTR BOOL crf_find_node(crf *crfp, pair2 node);
- if found but the node has no edges return an empty hash table. */
-ht *crf_find_node(crf * const restrict crfp, pair2 node);
+HP_ATTR BOOL crf_find_edge(crf *crfp, pair2 source, pair4 label);
BOOL crf_add_edge(crf * const restrict crfp, pair2 source,
pair4 label);
-void destroy_crf(crf * const restrict crfp);
+void destroy_crf(crf *crfp);
+
+void crf_print(crf *crfp);
+
+H_ATTR void crf_map(crf *crfp, pair2 label, map_pair6_t fn);
/* We also need to implement the set of pending actions. These are
triples of the form:
@@ -76,25 +98,22 @@ void destroy_crf(crf * const restrict crfp);
Precisely speaking, the values are hash tables whose keys are those
j and whose values are meaningless non-zero values. */
+/* With the new library, this is now implemented as a "luple3". */
+
/* Set of Pending Actions */
typedef struct spa_s spa;
-spa *new_spa();
-
-void destroy_spa(spa * const restrict spap);
+spa *new_spa(Grammar *g, NUM input_len);
-__attribute__((__pure__, __cold__)) ht *spa_ht(CCR_MOD(spa *) spap);
+void destroy_spa(spa *spap);
/* On error return non-zero */
-BOOL spa_insert(spa * const restrict spap, pair3 label);
+BOOL spa_insert(spa *spap, pair3 label);
/* Find if a triple belongs to the set */
-P_ATTR BOOL spa_belong(CCR_MOD(spa *) spap, pair3 label);
+P_ATTR BOOL spa_belong(spa *spap, pair3 label);
-/* Find if for a pair (X, k) there exist some j such that (X, k, j)
- belong to the set. If so, return the hash table whose keys are
- those j; otherwise return NULL. */
-P_ATTR ht *spa_find(CCR_MOD(spa *) spap, pair2 label);
+H_ATTR void spa_map(spa *spap, pair2 label, map_pair3_t fn);
/* We also implement "process descriptors" for the CNP algorithm. Since
a process descriptor is of the form
@@ -130,6 +149,18 @@ P_ATTR ht *spa_find(CCR_MOD(spa *) spap, pair2 label);
descriptors in U_k. I think this is a good way to lower the space
requirements of this algorithm. */
+/* With the new library "tuple.h", we represent a process descriptor
+ as a "luple5". To implement grading, we store
+
+ (X, a, b, j, k)
+
+ as
+
+ (k, X, a, b, j).
+
+ Note that the procr set is still an array of lists, since we need
+ to pop elements from procr efficiently. */
+
typedef pair4 prodecor;
/* The set named R in the paper */
@@ -139,14 +170,13 @@ typedef struct procr_s procr;
typedef struct procu_s procu;
/* constructor */
-procr *new_procr(NUM size, BOOL * const restrict errorp);
-procu *new_procu(NUM size, BOOL * const restrict errorp);
+procr *new_procr(Grammar *g, NUM input_len, BOOL *errorp);
+procu *new_procu(Grammar *g, NUM input_len, BOOL *errorp);
/* insert */
/* the function dscAdd in the paper */
-BOOL desc_add(procr * const restrict pr, procu * const restrict pu,
- NUM grade, prodecor dec);
+BOOL desc_add(procr *pr, procu *pu, NUM grade, prodecor dec);
/* pop */
@@ -156,14 +186,12 @@ BOOL desc_add(procr * const restrict pr, procu * const restrict pu,
corresponding slot in pu will be freed automatically. */
/* if no more elements are left, return NULL */
-prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep);
+prodecor *pop_procr(procr *pr, procu *pu, NUM *gradep);
-void print_procr(CCR_MOD(procr *) pr);
+void print_procr(procr *pr);
/* destructor */
-void destroy_procr(procr * const restrict pr);
-void destroy_procu(procu * const restrict pu);
-
-#define TEST haha
+void destroy_procr(procr *pr);
+void destroy_procu(procu *pu);
#endif