aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
DBTranslator4RangeVariable.cpp
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
47
49
50#ifndef DOXYGEN_SHOULD_SKIP_THIS
51
53# ifdef GUM_NO_INLINE
55# endif /* GUM_NO_INLINE */
56
57namespace gum {
58
59 namespace learning {
60
63 const std::vector< std::string >& missing_symbols,
64 std::size_t max_dico_entries) :
66 true,
67 missing_symbols,
68 true,
69 max_dico_entries),
70 _variable_("var", "", 1, 0) {
71 // assign to each integer missing symbol a Boolean indicating that
72 // we did not translate it yet. If we encounter a non integer missing
73 // symbol, we record it because it cannot be compomised by updating the
74 // domain of the range variable
75 bool non_int_symbol_found = false;
76 for (const auto& symbol: this->missing_symbols_) {
77 if (DBCell::isInteger(symbol)) {
78 _status_int_missing_symbols_.insert(symbol, false);
79 } else if (!non_int_symbol_found) {
80 non_int_symbol_found = true;
81 _nonint_missing_symbol_ = symbol;
82 }
83 }
84
85 GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
86 }
87
89 DBTranslator4RangeVariable::DBTranslator4RangeVariable(std::size_t max_dico_entries) :
90 DBTranslator(DBTranslatedValueType::DISCRETE, true, true, max_dico_entries),
91 _variable_("var", "", 1, 0) {
92 GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
93 }
94
96 DBTranslator4RangeVariable::DBTranslator4RangeVariable(
97 const RangeVariable& var,
98 const std::vector< std::string >& missing_symbols,
99 const bool editable_dictionary,
100 std::size_t max_dico_entries) :
101 DBTranslator(DBTranslatedValueType::DISCRETE,
102 true,
103 missing_symbols,
104 editable_dictionary,
105 max_dico_entries),
106 _variable_(var) {
107 // get the bounds of the range variable
108 const long lower_bound = var.minVal();
109 const long upper_bound = var.maxVal();
110
111 // check that the variable has not too many entries for the dictionary
112 if ((upper_bound >= lower_bound)
113 && (std::size_t(upper_bound - lower_bound + 1) > this->max_dico_entries_)) {
114 GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
115 }
116
117 // if the range variable is not empty, i.e., its upper bound is greater
118 // than or equal to its lower bound, remove all the missing symbols
119 // corresponding to a number between lower_bound and upper_bound
120 if (lower_bound <= upper_bound) {
121 for (auto iter = this->missing_symbols_.beginSafe();
122 iter != this->missing_symbols_.endSafe();
123 ++iter) {
124 if (DBCell::isInteger(*iter)) {
125 const long missing_val = std::stol(*iter);
126 if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
127 this->missing_symbols_.erase(iter);
128 }
129 }
130 }
131 }
132
133 // add the content of the variable into the back dictionary
134 std::size_t size = 0;
135 for (const auto& label: var.labels()) {
136 // insert the label into the back_dictionary
137 this->back_dico_.insert(size, label);
138 ++size;
139 }
140
141 // assign to each integer missing symbol a Boolean indicating that
142 // we did not translate it yet. If we encounter a non integer symbol,
143 // we record it because it cannot be compomised by updating the domain
144 // of the range variable. This will be useful for back translations
145 bool non_int_symbol_found = false;
146 for (const auto& symbol: this->missing_symbols_) {
147 if (DBCell::isInteger(symbol)) {
148 _status_int_missing_symbols_.insert(symbol, false);
149 } else if (!non_int_symbol_found) {
150 non_int_symbol_found = true;
151 _nonint_missing_symbol_ = symbol;
152 }
153 }
154
155 GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
156 }
157
159 DBTranslator4RangeVariable::DBTranslator4RangeVariable(const RangeVariable& var,
160 const bool editable_dictionary,
161 std::size_t max_dico_entries) :
162 DBTranslator(DBTranslatedValueType::DISCRETE, true, editable_dictionary, max_dico_entries),
163 _variable_(var) {
164 // get the bounds of the range variable
165 const long lower_bound = var.minVal();
166 const long upper_bound = var.maxVal();
167
168 // check that the variable has not too many entries for the dictionary
169 if ((upper_bound >= lower_bound)
170 && (std::size_t(upper_bound - lower_bound + 1) > this->max_dico_entries_)) {
171 GUM_ERROR(SizeError, "the dictionary induced by the variable is too large")
172 }
173
174 // add the content of the variable into the back dictionary
175 std::size_t size = 0;
176 for (const auto& label: var.labels()) {
177 // insert the label into the back_dictionary
178 this->back_dico_.insert(size, label);
179 ++size;
180 }
181
182 GUM_CONSTRUCTOR(DBTranslator4RangeVariable);
183 }
184
186 DBTranslator4RangeVariable::DBTranslator4RangeVariable(const DBTranslator4RangeVariable& from) :
187 DBTranslator(from), _variable_(from._variable_),
188 _status_int_missing_symbols_(from._status_int_missing_symbols_),
189 _translated_int_missing_symbols_(from._translated_int_missing_symbols_),
190 _nonint_missing_symbol_(from._nonint_missing_symbol_) {
191 GUM_CONS_CPY(DBTranslator4RangeVariable);
192 }
193
195 DBTranslator4RangeVariable::DBTranslator4RangeVariable(DBTranslator4RangeVariable&& from) :
196 DBTranslator(std::move(from)), _variable_(std::move(from._variable_)),
197 _status_int_missing_symbols_(std::move(from._status_int_missing_symbols_)),
198 _translated_int_missing_symbols_(std::move(from._translated_int_missing_symbols_)),
199 _nonint_missing_symbol_(std::move(from._nonint_missing_symbol_)) {
200 GUM_CONS_MOV(DBTranslator4RangeVariable);
201 }
202
204 DBTranslator4RangeVariable* DBTranslator4RangeVariable::clone() const {
205 return new DBTranslator4RangeVariable(*this);
206 }
207
209 DBTranslator4RangeVariable&
210 DBTranslator4RangeVariable::operator=(const DBTranslator4RangeVariable& from) {
211 if (this != &from) {
212 DBTranslator::operator=(from);
213 _variable_ = from._variable_;
214 _status_int_missing_symbols_ = from._status_int_missing_symbols_;
215 _translated_int_missing_symbols_ = from._translated_int_missing_symbols_;
216 _nonint_missing_symbol_ = from._nonint_missing_symbol_;
217 }
218
219 return *this;
220 }
221
223 DBTranslator4RangeVariable&
224 DBTranslator4RangeVariable::operator=(DBTranslator4RangeVariable&& from) {
225 if (this != &from) {
226 DBTranslator::operator=(std::move(from));
227 _variable_ = std::move(from._variable_);
228 _status_int_missing_symbols_ = std::move(from._status_int_missing_symbols_);
229 _translated_int_missing_symbols_ = std::move(from._translated_int_missing_symbols_);
230 _nonint_missing_symbol_ = std::move(from._nonint_missing_symbol_);
231 }
232
233 return *this;
234 }
235
237 DBTranslatedValue DBTranslator4RangeVariable::translate(const std::string& str) {
238 // try to get the index of str within the labelized variable. If this
239 // cannot be found, try to find if this corresponds to a missing value.
240 // Finally, if this is still not a missing value and, if enabled, try
241 // to add str as a new label
242 try {
243 return DBTranslatedValue{this->back_dico_.first(str)};
244 } catch (gum::Exception&) {
245 // check that this is not a missing value
246 if (this->isMissingSymbol(str)) {
247 try {
248 const bool is_str_translated = _status_int_missing_symbols_[str];
249 if (!is_str_translated) {
250 _status_int_missing_symbols_[str] = true;
251 _translated_int_missing_symbols_.insert(std::stol(str));
252 }
253 } catch (gum::NotFound&) {}
254 return DBTranslatedValue{std::numeric_limits< std::size_t >::max()};
255 }
256
257 // check if we are allowed to update the range variable
258 if (!this->hasEditableDictionary()) {
260 "The translation of String \"" << str << "\" could not be found for variable '"
261 << _variable_ << "'.");
262 }
263
264 // check if str could correspond to a bound of the range variable
265 if (!DBCell::isInteger(str)) {
267 "String \"" << str << "\" cannot be translated because "
268 << "it cannot be converted into an integer");
269 }
270 const long new_value = std::stol(str);
271
272 // if str corresponds to a missing symbol that we already
273 // translated, raise an exception
274 if (_translated_int_missing_symbols_.exists(new_value)) {
276 "String \"" << str << "\" cannot be translated because "
277 << "it corresponds to an already translated missing symbol");
278 }
279
280 // now, we can try to add str as a new bound of the range variable
281 // if possible
282
283 // if the range variable is empty, set the min and max ranges. Here,
284 // there is no need to check whether the new range would contain an
285 // already translated missing symbol because this was already tested
286 // in the above test.
287 if (_variable_.minVal() > _variable_.maxVal()) {
288 if (this->max_dico_entries_ == 0) {
290 "String \"" << str << "\" cannot be translated because "
291 << "the dictionary is already full");
292 }
293 _variable_.setMinVal(new_value);
294 _variable_.setMaxVal(new_value);
295 this->back_dico_.insert(std::size_t(0), str);
296 return DBTranslatedValue{std::size_t(0)};
297 }
298
299 // here, the domain is not empty. So we should update either the
300 // lower bound or the upper bound of the range variable, unless
301 // a missing symbol lies within the new bounds and we have already
302 // translated it.
303 const long lower_bound = _variable_.minVal();
304 const long upper_bound = _variable_.maxVal();
305
306 std::size_t size = upper_bound - lower_bound + 1;
307
308 if (new_value < _variable_.minVal()) {
309 if (std::size_t(upper_bound - new_value + 1) > this->max_dico_entries_)
311 "String \"" << str << "\" cannot be translated because "
312 << "the dictionary is already full");
313
314 // check that there does not already exist a translated missing
315 // value within the new bounds of the range variable
316 for (const auto& missing: _translated_int_missing_symbols_) {
317 if ((missing >= new_value) && (missing <= upper_bound)) {
319 "String \"" << str << "\" cannot be translated "
320 << "because it would induce a new range containing "
321 << "an already translated missing symbol");
322 }
323 }
324
325 // remove all the missing symbols that were not translated yet and
326 // that lie within the new bounds of the range variable
327 for (auto iter = _status_int_missing_symbols_.beginSafe();
328 iter != _status_int_missing_symbols_.endSafe();
329 ++iter) {
330 if (iter.val() == false) {
331 const long missing = std::stol(iter.key());
332 if ((missing >= new_value) && (missing <= upper_bound)) {
333 this->missing_symbols_.erase(iter.key());
334 _status_int_missing_symbols_.erase(iter);
335 }
336 }
337 }
338
339 // update the range and the back dictionary
340 const std::size_t index = size;
341 for (long i = new_value; i < _variable_.minVal(); ++i) {
342 this->back_dico_.insert(size, std::to_string(i));
343 ++size;
344 }
345 _variable_.setMinVal(new_value);
346
347 return DBTranslatedValue{index};
348 } else {
349 if (std::size_t(new_value - lower_bound + 1) > this->max_dico_entries_)
351 "String \"" << str << "\" cannot be translated because "
352 << "the dictionary is already full");
353
354 // check that there does not already exist a translated missing
355 // value within the new bounds of the range variable
356 for (const auto& missing: _translated_int_missing_symbols_) {
357 if ((missing <= new_value) && (missing >= lower_bound)) {
359 "String \"" << str << "\" cannot be translated "
360 << "because it would induce a new range containing "
361 << "an already translated missing symbol");
362 }
363 }
364
365 // remove all the missing symbols that were not translated yet and
366 // that lie within the new bounds of the range variable
367 for (auto iter = _status_int_missing_symbols_.beginSafe();
368 iter != _status_int_missing_symbols_.endSafe();
369 ++iter) {
370 if (iter.val() == false) {
371 const long missing = std::stol(iter.key());
372 if ((missing <= new_value) && (missing >= lower_bound)) {
373 this->missing_symbols_.erase(iter.key());
374 _status_int_missing_symbols_.erase(iter);
375 }
376 }
377 }
378
379 // update the range and the back dictionary
380 for (long i = _variable_.maxVal() + 1; i <= new_value; ++i) {
381 this->back_dico_.insert(size, std::to_string(i));
382 ++size;
383 }
384 _variable_.setMaxVal(new_value);
385
386 return DBTranslatedValue{size - std::size_t(1)};
387 }
388 }
389 }
390
392 bool DBTranslator4RangeVariable::needsReordering() const {
393 // if the variable contains only numbers, they should be increasing
394 const auto& labels = _variable_.labels();
395 std::size_t last_number = std::numeric_limits< std::size_t >::lowest();
396 std::size_t number;
397 for (const auto& label: labels) {
398 number = this->back_dico_.first(label);
399 if (number < last_number) return true;
400 last_number = number;
401 }
402
403 return false;
404 }
405
406
407 } /* namespace learning */
408
409} /* namespace gum */
410
411#endif /* DOXYGEN_SHOULD_SKIP_THIS */
The databases' cell translators for range variables.
The databases' cell translators for range variables.
Base class for all aGrUM's exceptions.
Definition exceptions.h:118
Exception : operation not allowed.
Exception : problem with size.
Exception : wrong type for this operation.
Error: An unknown label is found in the database.
DBTranslator4RangeVariable(const std::vector< std::string > &missing_symbols, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max())
default constructor without any initial variable
The base class for all the tabular database cell translators.
#define GUM_ERROR(type, msg)
Definition exceptions.h:72
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
include the inlined functions if necessary
Definition CSVParser.h:54
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
STL namespace.