aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
DBTranslator4LabelizedVariable.cpp
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
47
49
50#ifndef DOXYGEN_SHOULD_SKIP_THIS
51
53# ifdef GUM_NO_INLINE
55# endif /* GUM_NO_INLINE */
56
57namespace gum {
58
59 namespace learning {
60
63 const std::vector< std::string >& missing_symbols,
64 std::size_t max_dico_entries) :
66 true,
67 missing_symbols,
68 true,
69 max_dico_entries),
70 _variable_("var", "", 0) {
71 GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
72 }
73
75 DBTranslator4LabelizedVariable::DBTranslator4LabelizedVariable(std::size_t max_dico_entries) :
76 DBTranslator(DBTranslatedValueType::DISCRETE, true, true, max_dico_entries),
77 _variable_("var", "", 0) {
78 GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
79 }
80
82 DBTranslator4LabelizedVariable::DBTranslator4LabelizedVariable(
83 const LabelizedVariable& var,
84 const std::vector< std::string >& missing_symbols,
85 const bool editable_dictionary,
86 std::size_t max_dico_entries) :
87 DBTranslator(DBTranslatedValueType::DISCRETE,
88 true,
89 missing_symbols,
90 editable_dictionary,
91 max_dico_entries),
92 _variable_(var) {
93 // check that the variable has not too many entries
94 if (var.domainSize() > max_dico_entries) {
95 GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
96 }
97
98 // add the content of the variable into the back dictionary
99 std::size_t size = 0;
100 for (const auto& label: var.labels()) {
101 // if the label corresponds to a missing value, then remove it from
102 // the set of missing symbols.
103 if (this->missing_symbols_.exists(label)) { this->missing_symbols_.erase(label); }
104
105 // insert the label into the back_dictionary
106 this->back_dico_.insert(size, label);
107 ++size;
108 }
109
110 GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
111 }
112
114 DBTranslator4LabelizedVariable::DBTranslator4LabelizedVariable(const LabelizedVariable& var,
115 const bool editable_dictionary,
116 std::size_t max_dico_entries) :
117 DBTranslator(DBTranslatedValueType::DISCRETE, true, editable_dictionary, max_dico_entries),
118 _variable_(var) {
119 // check that the variable has not too many entries
120 if (var.domainSize() > max_dico_entries) {
121 GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
122 }
123
124 // add the content of the variable into the back dictionary
125 std::size_t size = 0;
126 for (const auto& label: var.labels()) {
127 // insert the label into the back_dictionary
128 this->back_dico_.insert(size, label);
129 ++size;
130 }
131
132 GUM_CONSTRUCTOR(DBTranslator4LabelizedVariable);
133 }
134
136 DBTranslator4LabelizedVariable* DBTranslator4LabelizedVariable::clone() const {
137 return new DBTranslator4LabelizedVariable(*this);
138 }
139
141 DBTranslator4LabelizedVariable&
142 DBTranslator4LabelizedVariable::operator=(const DBTranslator4LabelizedVariable& from) {
143 if (this != &from) {
144 DBTranslator::operator=(from);
145 _variable_ = from._variable_;
146 }
147
148 return *this;
149 }
150
152 DBTranslator4LabelizedVariable&
153 DBTranslator4LabelizedVariable::operator=(DBTranslator4LabelizedVariable&& from) {
154 if (this != &from) {
155 DBTranslator::operator=(std::move(from));
156 _variable_ = std::move(from._variable_);
157 }
158
159 return *this;
160 }
161
163 DBTranslatedValue DBTranslator4LabelizedVariable::translate(const std::string& str) {
164 // try to get the index of str within the labelized variable. If this
165 // cannot be found, try to find if this corresponds to a missing value.
166 // Finally, if this is still not a missing value and, if enabled, try
167 // to add str as a new label
168 try {
169 return DBTranslatedValue{std::size_t(_variable_[str])};
170 } catch (gum::Exception&) {
171 // check that this is not a missing value
172 if (this->isMissingSymbol(str)) {
173 return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
174 }
175
176 // try to add str as a new value if possible
177 if (this->hasEditableDictionary()) {
178 const std::size_t size = _variable_.domainSize();
179 if (size >= this->max_dico_entries_)
181 "String \"" << str << "\" cannot be translated "
182 << "because the dictionary is already full");
183 _variable_.addLabel(str);
184 this->back_dico_.insert(size, str);
185 return DBTranslatedValue{size};
186 } else
188 "The translation of \"" << str << "\" could not be found for variable '"
189 << _variable_ << "'.");
190 }
191 }
192
194 bool DBTranslator4LabelizedVariable::needsReordering() const {
195 // if the variable contains only numbers, they should be increasing
196 const auto& labels = _variable_.labels();
197 float last_number = std::numeric_limits< float >::lowest();
198 float number;
199 bool only_numbers = true;
200 for (const auto& label: labels) {
201 if (!DBCell::isReal(label)) {
202 only_numbers = false;
203 break;
204 }
205 number = std::stof(label);
206 if (number < last_number) return true;
207 last_number = number;
208 }
209
210 if (!only_numbers) {
211 // here we shall examine whether the strings are sorted by
212 // lexicographical order
213 const std::size_t size = labels.size();
214 for (std::size_t i = 1; i < size; ++i) {
215 if (labels[i] < labels[i - 1]) return true;
216 }
217 }
218
219 return false;
220 }
221
223 HashTable< std::size_t, std::size_t > DBTranslator4LabelizedVariable::reorder() {
224 // check whether the variable contains only numeric values. In this
225 // case, we have to sort the values by increasing number
226 const auto& labels = _variable_.labels();
227 const std::size_t size = labels.size();
228
229 bool only_numbers = true;
230 for (const auto& label: labels) {
231 if (!DBCell::isReal(label)) {
232 only_numbers = false;
233 break;
234 }
235 }
236
237 // assign to each label its current index
238 std::vector< std::pair< std::size_t, std::string > > xlabels;
239 xlabels.reserve(size);
240 for (std::size_t i = std::size_t(0); i < size; ++i)
241 xlabels.push_back(std::make_pair(i, labels[i]));
242
243 // reorder by increasing order
244 if (only_numbers)
245 std::sort(xlabels.begin(),
246 xlabels.end(),
247 [](const std::pair< std::size_t, std::string >& a,
248 const std::pair< std::size_t, std::string >& b) -> bool {
249 return std::stof(a.second) < std::stof(b.second);
250 });
251 else
252 std::sort(xlabels.begin(),
253 xlabels.end(),
254 [](const std::pair< std::size_t, std::string >& a,
255 const std::pair< std::size_t, std::string >& b) -> bool {
256 return a.second < b.second;
257 });
258
259 // check whether there were any modification
260 bool modifications = false;
261 for (std::size_t i = std::size_t(0); i < size; ++i) {
262 if (xlabels[i].first != i) {
263 modifications = true;
264 break;
265 }
266 }
267
268 // if there were no modification, return an empty update hashtable
269 if (!modifications) { return HashTable< std::size_t, std::size_t >(); }
270
271 // recreate the variable so that the labels correspond to the
272 // new ordering
273 _variable_.eraseLabels();
274 for (auto& label: xlabels)
275 _variable_.addLabel(label.second);
276
277 // create the hashTable corresponding to the mapping from the old
278 // indices to the new one
279 this->back_dico_.clear();
280 HashTable< std::size_t, std::size_t > mapping((Size)size);
281 for (std::size_t i = std::size_t(0); i < size; ++i) {
282 mapping.insert(xlabels[i].first, i);
283 this->back_dico_.insert(i, xlabels[i].second);
284 }
285
286 return mapping;
287 }
288
289
290 } /* namespace learning */
291
292} /* namespace gum */
293
294#endif /* DOXYGEN_SHOULD_SKIP_THIS */
The databases' cell translators for labelized variables.
The databases' cell translators for labelized variables.
Base class for all aGrUM's exceptions.
Definition exceptions.h:118
Exception : problem with size.
Error: An unknown label is found in the database.
DBTranslator4LabelizedVariable(const std::vector< std::string > &missing_symbols, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max())
default constructor without any initial variable
The base class for all the tabular database cell translators.
#define GUM_ERROR(type, msg)
Definition exceptions.h:72
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
include the inlined functions if necessary
Definition CSVParser.h:54
gum is the global namespace for all aGrUM entities
Definition agrum.h:46