liblevenshtein 4.0.0
A library for generating Finite State Transducers based on Levenshtein Automata.
Loading...
Searching...
No Matches
lazy_query.cpp
Go to the documentation of this file.
1#include <cstdint>
2#include <utility>
3
5
6namespace liblevenshtein {
7
8template <class Result>
9LazyQuery<Result>::LazyQuery(const std::string &term, std::size_t max_distance,
11 TransitionFn transition, DistanceFn min_distance)
12 : _term(term), _max_distance(max_distance),
13 transition(std::move(transition)), min_distance(std::move(min_distance)),
14 _a(max_distance < (SIZE_MAX - 1) >> 1 ? (max_distance << 1) + 1
15 : SIZE_MAX) {
16 DawgNode *root = intersection->node();
18 if (root->is_final() && term.length() <= max_distance) { // special case
19 std::string candidate;
21 } else {
22 advance();
23 }
24}
25
26template <class Result> LazyQuery<Result>::~LazyQuery() {
27 for (Intersection *intersection : _intersections) {
28 delete intersection;
29 }
30}
31
32// NOLINTNEXTLINE(readability-function-cognitive-complexity)
33template <class Result> void LazyQuery<Result>::advance() {
34 if (!_edges.empty() || !_pending.empty()) {
35 _is_complete = true;
36 do {
37 if (_edges.empty()) {
38 _intersection = _pending.front();
39 _pending.pop();
40 DawgNode *node = _intersection->node();
41 State *state = _intersection->state();
42 _i = state->head()->term_index();
43 std::size_t b = _term.length() - _i;
44 _k = (_a < b) ? _a : b;
45 node->for_each_edge([&](char label, DawgNode *target) {
46 _edges.emplace(label, target);
47 });
48 } else {
49 DawgNode *node = _intersection->node();
50 State *state = _intersection->state();
51 std::pair<char, DawgNode *> edge = _edges.front();
52 _edges.pop();
53 char next_label = edge.first;
54 DawgNode *next_node = edge.second;
55 std::vector<bool> characteristic_vector =
56 this->characteristic_vector(next_label, _term, _k, _i);
57 State *next_state = transition(state, characteristic_vector);
58 if (next_state != nullptr) {
59 Intersection *next_intersection = build_intersection(
60 next_label, next_node, next_state, _intersection);
61 _pending.push(next_intersection);
62 if (next_node->is_final()) {
63 std::size_t distance = min_distance(next_state, _term.length());
64 if (distance <= _max_distance) {
65 std::string term = next_intersection->str();
66 update_candidate(term, distance);
67 _is_complete = false;
68 break;
69 }
70 }
71 }
72 }
73 } while (!_edges.empty() || !_pending.empty());
74 } else {
75 _is_complete = true;
76 }
77}
78
79template <class Result>
81 State *state, Intersection *parent)
82 -> Intersection * {
83 auto *intersection = new Intersection(label, node, state, parent);
84 _intersections.push_back(intersection);
85 return intersection;
86}
87
88template <class Result>
90 advance();
91 return *this;
92}
93
94template <>
96 std::size_t distance) {
97 _candidate = term;
98}
99
100template <>
102 std::string &term, std::size_t distance) {
103 _candidate.first = term;
104 _candidate.second = distance;
105}
106
107template <class Result>
109 return _candidate;
110}
111
112template <class Result>
114 -> bool {
115 return _is_complete;
116}
117
118template <class Result>
120 std::size_t k, std::size_t i)
121 -> std::vector<bool> {
122 std::vector<bool> characteristic_vector(k);
123 for (int j = 0; j < k; j += 1) {
124 characteristic_vector[j] = (x == term[i + j]);
125 }
126 return characteristic_vector;
127}
128
129template class LazyQuery<std::string>;
131
132template <class Result>
133// NOLINTNEXTLINE(modernize-pass-by-value)
135 std::size_t max_distance,
137 TransitionFn transition,
138 DistanceFn min_distance)
139 : _term(term), _max_distance(max_distance),
140 _intersection(intersection), transition(std::move(transition)),
141 min_distance(std::move(min_distance)) {}
142
143template <class Result>
145 return LazyQuery<Result>(_term, _max_distance, _intersection, transition,
146 min_distance);
147}
148
149template <class Result>
153
154template class LazyIterator<std::string>;
156
157} // namespace liblevenshtein
Represents a position within one or more terms of a DAWG dictionary.
Definition dawg_node.h:20
void is_final(bool is_final)
Specifies whether this node represents a word boundary, or immediately follows an edge having the fin...
Definition dawg_node.cpp:19
void for_each_edge(const std::function< void(char, DawgNode *)> &fn) const
Iterates over each outgoing edge of this node and invokes a callback function with each edge's charac...
Definition dawg_node.cpp:27
Represents an Intersection between a dictionary automaton and Levenshtein automaton,...
Lazily traverses the intersection between the dictionary automaton and Levenshtein automaton,...
Definition lazy_query.h:180
auto end() -> LazyQuery< Result >
Returns a placeholder representing the end of the spelling candidates.
LazyIterator(const std::string &term, std::size_t max_distance, Intersection *intersection, TransitionFn transition, DistanceFn min_distance)
Constructs a new LazyIterator which yields spelling candidates as they are matched while traversing t...
auto begin() -> LazyQuery< Result >
Returns an instance of LazyQuery that yields spelling candidates of the given Type.
Lazily traverses the intersection between the dictionary automaton and Levenshtein automaton,...
Definition lazy_query.h:30
void advance()
Advances to the next spelling candidate.
std::queue< Intersection * > _pending
Intermediate Intersection instances representing paths along the intersection between the dictionary ...
Definition lazy_query.h:89
auto characteristic_vector(char x, std::string &term, std::size_t k, std::size_t i) -> std::vector< bool >
Flags all instances of the given character in the query term within a window of consecutive character...
auto operator==(const LazyQuery &other) const -> bool
Returns whether this LazyQuery has matched all the spelling candidates.
auto operator++() -> LazyQuery &
Advances to the next spelling candidate.
LazyQuery()=default
Constructs a LazyQuery that represents the end of the spelling candidates.
auto build_intersection(char label, DawgNode *node, State *state, Intersection *parent) -> Intersection *
Factory function that constructs and records a new Intersection instance.
auto operator*() const -> const Result &
Returns the current spelling candidate.
void update_candidate(std::string &term, std::size_t distance)
Sets the current spelling candidate according to the desired Result type.
~LazyQuery()
Frees any owned members.
Consists of a closure of Position nodes within the Levenshtein automaton.
Definition state.h:23
void head(Position *head)
Sets the first element in the linked-list of Position nodes.
Definition state.cpp:26
void query(ll::Dawg *dawg, const std::string &query_term, std::size_t max_distance)
Definition main.cpp:25
Various utilities regarding Levenshtein transducers.
Definition namespaces.dox:9
std::function< std::size_t(State *, std::size_t)> DistanceFn
Infers the Levenshtein distance from a final Levenshtein State and the length of the query term.
Definition lazy_query.h:23
auto distance(State *state, std::size_t query_length) -> std::size_t
Infers the Levenshtein distance from the given State and query length.
std::function< State *(State *, std::vector< bool > &)> TransitionFn
Returns the Levenshtein State mapped-to by another State and characteristic vector.
Definition lazy_query.h:19
STL namespace.