liblevenshtein 4.0.0
A library for generating Finite State Transducers based on Levenshtein Automata.
Loading...
Searching...
No Matches
sorted_dawg.cpp
Go to the documentation of this file.
1#include <algorithm>
2#include <list>
3#include <set>
4#include <vector>
5
7
8
9namespace liblevenshtein {
10
14
15auto SortedDawg::add(const std::string &term) -> bool {
16 if (term < _prev_term) {
17 return false;
18 }
19
20 if (term.empty()) {
21 _root->is_final(true);
22 _prev_term = term;
23 _size += 1;
24 return true;
25 }
26
27 std::size_t upper_bound = std::min(term.length(), _prev_term.length());
28
29 // Find the length of the longest common prefix between term and prev_term
30 std::size_t i = 0;
31 while (i < upper_bound && term[i] == _prev_term[i]) {
32 i += 1;
33 }
34
35 // Check the unchecked nodes for redundancy from the last one down to
36 // the common prefix size. Then, truncate the list at that point.
37 minimize(i);
38
39 // Add the suffix starting from the correct node min-way through the graph
40 DawgNode *source = unchecked_transitions->empty()
41 ? _root
42 : unchecked_transitions->top().target();
43
44 for (std::size_t k = term.length(); i < k; i += 1) {
45 auto *target = new DawgNode();
46 floating_nodes->insert(target);
47 unchecked_transitions->emplace(term[i], source, target);
48 source = target;
49 }
50
51 source->is_final(true);
52 _prev_term = term;
53 _size += 1;
54 return true;
55}
56
58 minimize(0);
59}
60
62 auto iter = minimized_nodes->find(*key);
63 if (iter != minimized_nodes->end()) {
64 return iter->second;
65 }
66 return nullptr;
67}
68
70 // proceed from the leaf up to a certain point
71 for (std::size_t j = unchecked_transitions->size(); j > lower_bound; j -= 1) {
72 Transition transition = unchecked_transitions->top();
74 char label = transition.label();
75 DawgNode* source = transition.source();
76 DawgNode* target = transition.target();
78 if (existing != nullptr) {
79 source->add_edge(label, existing);
80 }
81 else {
82 source->add_edge(label, target);
83 (*minimized_nodes)[*target] = target;
84 }
85 }
86}
87
88auto SortedDawg::remove(const std::string &term) -> bool {
89 return false;
90}
91
93 unchecked_transitions = new std::stack<Transition>();
94 minimized_nodes = new std::unordered_map<DawgNode, DawgNode *>();
95 floating_nodes = new std::unordered_set<DawgNode *>();
96}
97
99 if (unchecked_transitions != nullptr) {
101 unchecked_transitions = nullptr;
102 }
103
104 if (minimized_nodes != nullptr) {
105 delete minimized_nodes;
106 minimized_nodes = nullptr;
107 }
108
109 if (floating_nodes != nullptr) {
110 for (DawgNode *node : all_nodes()) {
111 floating_nodes->erase(node);
112 }
113
114 for (DawgNode *node : *floating_nodes) {
115 delete node;
116 }
117
118 delete floating_nodes;
119 floating_nodes = nullptr;
120 }
121}
122
123template <class IterType>
125 auto *dawg = new SortedDawg();
126 dawg->init();
127
128 while (iter != end) {
129 const std::string &term = *iter;
130 if (!dawg->add(term)) {
131 delete dawg;
132 return nullptr;
133 }
134 ++iter;
135 }
136
137 dawg->finish();
138 dawg->clean_up();
139 return dawg;
140}
141
142template
143auto sorted_dawg(std::list<std::string>::iterator iter,
144 std::list<std::string>::iterator end) -> SortedDawg *;
145
146template
147auto sorted_dawg(std::vector<std::string>::iterator iter,
148 std::vector<std::string>::iterator end) -> SortedDawg *;
149
150template
151auto sorted_dawg(std::set<std::string>::iterator iter,
152 std::set<std::string>::iterator end) -> SortedDawg *;
153
154} // namespace liblevenshtein
Represents a position within one or more terms of a DAWG dictionary.
Definition dawg_node.h:20
void is_final(bool is_final)
Specifies whether this node represents a word boundary, or immediately follows an edge having the fin...
Definition dawg_node.cpp:19
auto add_edge(char label, DawgNode *target) -> DawgNode *
Adds a new outgoing edge to this node.
Definition dawg_node.cpp:41
auto all_nodes() const -> std::unordered_set< DawgNode * >
Returns a set of all nodes reachable from the root, including the root.
Definition dawg.cpp:28
A specific type of Dawg that is constructed over lexicographically sorted terms.
Definition sorted_dawg.h:20
std::unordered_set< DawgNode * > * floating_nodes
Nodes that have not been added to this Dawg.
Definition sorted_dawg.h:95
~SortedDawg() override
Cleans up any residual intermediate members used to construct the Dawg.
auto add(const std::string &term) -> bool override
Adds a new term to this dictionary.
auto minimized_node(DawgNode *key) const -> DawgNode *
Returns a DawgNode whose following suffix is equivalent to the following suffix of the given DawgNode...
std::unordered_map< DawgNode, DawgNode * > * minimized_nodes
Mapping of suffixes to DawgNodes that have been minimized.
Definition sorted_dawg.h:92
auto remove(const std::string &term) -> bool override
Removes a term from this dictionary.
void minimize(std::size_t lower_bound)
Minimizes the suffixes of the unchecked paths until there are no more than lower_bound unchecked tran...
void init()
Prepares this dictionary for initialization.
void finish()
Specifies the dictionary has been fully initialized.
void clean_up()
Cleans up any intermediate members used to initialize this Dawg.
std::stack< Transition > * unchecked_transitions
Transitions whose redundancies have not been checked.
Definition sorted_dawg.h:89
Represents an edge from one DawgNode to another, annotated with a character label from the current po...
Definition transition.h:13
auto source() const -> DawgNode *
Returns the initial DawgNode along the edge of this transition.
auto label() const -> char
Returns the label annotating the edge from the source to the target of this Transition.
auto target() const -> DawgNode *
Returns the destination DawgNode along the edge of this transition.
void query(ll::Dawg *dawg, const std::string &query_term, std::size_t max_distance)
Definition main.cpp:25
Various utilities regarding Levenshtein transducers.
Definition namespaces.dox:9
auto sorted_dawg(IterType iter, IterType end) -> SortedDawg *
Factory function that initializes a new SortedDawg using the terms yielded from an iterator.