12#include "liblevenshtein/proto/liblevenshtein.pb.h"
15namespace fs = std::filesystem;
17namespace llp = liblevenshtein::proto;
30 std::fstream
output(
path, std::fstream::out | std::fstream::trunc |
31 std::fstream::binary);
53 std::fstream
input(
path, std::fstream::in | std::fstream::binary);
86 std::pair<uint64_t, char>
key = std::make_pair(
source_id, label);
113 char label =
edge.first.second;
130 for (
int index = 0; index <
dict_proto.final_node_id_size(); index += 1) {
135 std::map<uint64_t, DawgNode *>
nodes;
136 for (
int index = 0; index <
dict_proto.node_id_size(); index += 1) {
140 auto *node =
new DawgNode(is_final);
145 for (
int index = 0; index <
dict_proto.edge_size(); index += 1) {
Represents a position within one or more terms of a DAWG dictionary.
void is_final(bool is_final)
Specifies whether this node represents a word boundary, or immediately follows an edge having the fin...
void for_each_edge(const std::function< void(char, DawgNode *)> &fn) const
Iterates over each outgoing edge of this node and invokes a callback function with each edge's charac...
auto add_edge(char label, DawgNode *target) -> DawgNode *
Adds a new outgoing edge to this node.
A Directed Acyclic Word Graph (DAWG) maps sequences of characters to form words; the collection of wo...
auto contains(const std::string &term) const -> bool
Determines whether the given term is contained within this dictionary.
A specific type of Dawg that is constructed over lexicographically sorted terms.
void query(ll::Dawg *dawg, const std::string &query_term, std::size_t max_distance)
Various utilities regarding Levenshtein transducers.
auto to_protobuf(Dawg *dawg) -> llp::Dictionary *
Serializes a Dawg to its protobuf equivalent.
void collect_nodes(DawgNode *source, std::set< uint64_t > &node_ids, std::set< uint64_t > &final_node_ids)
Collects the DawgNode IDs and final DawgNode IDs of all nodes reachable from the source.
auto from_protobuf(const llp::Dictionary &dict_proto) -> Dawg *
Deserializes a Dawg from its protobuf equivalent.
void collect_edges(DawgNode *source, std::map< std::pair< uint64_t, char >, uint64_t > &edges)
Collects the transitions from each source to its destination, and the respective character labels.
auto deserialize_protobuf(const fs::path &path) -> Dawg *
Deserializes the protobuf containing a Dawg at the given path or returns nullptr if none exists.
auto serialize_protobuf(Dawg *dawg, const fs::path &path) -> bool
Serializes the given Dawg to protobuf at the given path.