liblevenshtein 4.0.0
A library for generating Finite State Transducers based on Levenshtein Automata.
Loading...
Searching...
No Matches
dawg.cpp
Go to the documentation of this file.
1#include <array>
2#include <memory>
3#include <queue>
4
5#include "MurmurHash2.h"
6
8
9
10namespace liblevenshtein {
11
12Dawg::Dawg(DawgNode* root, std::size_t size)
13 : _root(root),
14 _size(size)
15{}
16
18 : _root(new DawgNode()),
19 _size(0)
20{}
21
23 for (DawgNode* node : all_nodes()) {
24 delete node;
25 }
26}
27
28auto Dawg::all_nodes() const -> std::unordered_set<DawgNode *> {
29 std::unordered_set<DawgNode *> nodes;
30 std::queue<DawgNode *> pending;
31 pending.push(_root);
32 while (!pending.empty()) {
33 DawgNode *node = pending.front();
34 pending.pop();
35 nodes.insert(node);
36 node->for_each_edge([&](char label, DawgNode *target) {
37 pending.push(target);
38 });
39 }
40 return nodes;
42
43auto Dawg::contains(const std::string &term) const -> bool {
44 DawgNode* node = _root;
45 for (int i = 0; i < term.length() && node != nullptr; i += 1) {
46 node = node->transition(term[i]);
47 }
48 return node != nullptr && node->is_final();
49}
50
52 return _root;
53}
54
55auto Dawg::size() const -> std::size_t {
56 return _size;
57}
58
60 return {_root};
61}
62
64 return {_size};
65}
66
67auto operator<<(std::ostream &out, const Dawg &dawg) -> std::ostream & {
68 out << "Dawg{size=" << dawg._size << ", root=" << dawg._root << "}";
69 return out;
70}
71
72auto Dawg::operator==(const Dawg &other) const -> bool {
73 return size() == other.size() && *root() == *other.root();
74}
75
76} // namespace liblevenshtein
77
78
79static std::hash<liblevenshtein::DawgNode> node_hash_code;
80
81auto std::hash<liblevenshtein::Dawg>::operator()(
82 const liblevenshtein::Dawg &dawg) const -> std::size_t {
83
84 uint64_t hash_code = 0xDEADBEEF;
85
86 std::array<uint64_t, 1> key = {dawg._size};
88
89 key[0] = node_hash_code(*(dawg._root));
90 return MurmurHash64A(key.data(), 1, hash_code);
91}
Iterates over all the terms in a DAWG dictionary.
Represents a position within one or more terms of a DAWG dictionary.
Definition dawg_node.h:20
void is_final(bool is_final)
Specifies whether this node represents a word boundary, or immediately follows an edge having the fin...
Definition dawg_node.cpp:19
void for_each_edge(const std::function< void(char, DawgNode *)> &fn) const
Iterates over each outgoing edge of this node and invokes a callback function with each edge's charac...
Definition dawg_node.cpp:27
auto transition(char label) const -> DawgNode *
Returns the target node, following this one, along the edge annotated with the given label.
Definition dawg_node.cpp:33
A Directed Acyclic Word Graph (DAWG) maps sequences of characters to form words; the collection of wo...
Definition dawg.h:23
auto operator==(const Dawg &other) const -> bool
Determines whether another DAWG is equivalent to this one.
Definition dawg.cpp:72
auto contains(const std::string &term) const -> bool
Determines whether the given term is contained within this dictionary.
Definition dawg.cpp:43
std::size_t _size
Number of terms reachable from the root node.
Definition dawg.h:129
auto root() const -> DawgNode *
Returns a pointer to the root node of this dictionary from which all its terms may be determined.
Definition dawg.cpp:51
auto all_nodes() const -> std::unordered_set< DawgNode * >
Returns a set of all nodes reachable from the root, including the root.
Definition dawg.cpp:28
DawgNode * _root
Root node of this DAWG from which all its terms may be reached.
Definition dawg.h:126
auto begin() const -> DawgIterator
Returns an iterator pointing to the first term in this dictionary, from which all terms may be iterat...
Definition dawg.cpp:59
virtual ~Dawg()
Deletes all the nodes associated with this dictionary.
Definition dawg.cpp:22
auto end() const -> DawgIterator
Returns an iterator representing the boundary following the final term in this dictionary.
Definition dawg.cpp:63
auto size() const -> size_t
Returns the number of terms in this dictionary.
Definition dawg.cpp:55
Dawg()
Constructs an empty DAWG with a default root node (there are no outgoing edges or reachable terms fro...
Definition dawg.cpp:17
static std::hash< liblevenshtein::DawgNode > node_hash_code
Definition dawg.cpp:79
void query(ll::Dawg *dawg, const std::string &query_term, std::size_t max_distance)
Definition main.cpp:25
Various utilities regarding Levenshtein transducers.
Definition namespaces.dox:9
auto operator<<(std::ostream &out, const Dawg &dawg) -> std::ostream &
Definition dawg.cpp:67
STL namespace.