aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
DBTranslator4ContinuousVariable.cpp
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
47
49
50#ifndef DOXYGEN_SHOULD_SKIP_THIS
51
53# ifdef GUM_NO_INLINE
55# endif /* GUM_NO_INLINE */
56
57namespace gum {
58
59 namespace learning {
62 const std::vector< std::string >& missing_symbols,
63 const bool fit_range) :
64 DBTranslator(DBTranslatedValueType::CONTINUOUS, true, missing_symbols, fit_range, 1),
65 _variable_("var", ""), _fit_range_(fit_range) {
66 // Here, if fit_range is set to false, and the range of the
67 // random variable will remain (-inf,+inf). So all the missing symbols
68 // that are numbers should be discarded since they lie in the domain
69 // of the variable. On the other hand, if fit_range is true, each newly
70 // observed value will update the range of the variable, so that, again,
71 // all the missing symbols that are numbers should be discarded since
72 // they always end up lying in the domain of the variable.
73 for (auto iter = this->missing_symbols_.beginSafe(); iter != this->missing_symbols_.endSafe();
74 ++iter) {
75 if (DBCell::isReal(*iter)) { this->missing_symbols_.erase(iter); }
76 }
77
78 // the remaining symbols are not numbers. Take the first one as
79 // the default missing symbols for back translations.
80 if (!this->missing_symbols_.empty()) {
81 _nonfloat_missing_symbol_ = *(this->missing_symbols_.begin());
82 }
83
84 // if fit_range is true, we shall be able to update the ranges of
85 // the continuous variable. To indicate that we did not encountered any
86 // value yet in the database, we fix the lower bound of _variable_ to +max
87 if (_fit_range_) _variable_.setLowerBound(std::numeric_limits< float >::infinity());
88
89 // store a copy of the variable, that should be used by method variable ()
90 _real_variable_ = _variable_.clone();
91
92 GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
93 }
94
96 DBTranslator4ContinuousVariable::DBTranslator4ContinuousVariable(const bool fit_range) :
97 DBTranslator(DBTranslatedValueType::CONTINUOUS, true, fit_range, 1), _variable_("var", ""),
98 _fit_range_(fit_range) {
99 // if fit_range is true, we shall be able to update the ranges of
100 // the continuous variable. To indicate that we did not encountered any
101 // value yet in the database, we fix the lower bound of _variable_ to +max
102 if (_fit_range_) _variable_.setLowerBound(std::numeric_limits< float >::infinity());
103
104 // store a copy of the variable, that should be used by method variable ()
105 _real_variable_ = _variable_.clone();
106
107 GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
108 }
109
111 DBTranslator4ContinuousVariable::DBTranslator4ContinuousVariable(
112 const IContinuousVariable& var,
113 const std::vector< std::string >& missing_symbols,
114 const bool fit_range) :
115 DBTranslator(DBTranslatedValueType::CONTINUOUS, true, missing_symbols, fit_range, 1),
116 _variable_(var.name(), var.description()), _fit_range_(fit_range) {
117 // get the bounds of the range variable
118 const auto lower_bound = float(var.lowerBoundAsDouble());
119 const auto upper_bound = float(var.upperBoundAsDouble());
120 _variable_.setLowerBound(lower_bound);
121 _variable_.setUpperBound(upper_bound);
122
123 // remove all the missing symbols corresponding to a number between
124 // lower_bound and upper_bound
125 bool non_float_symbol_found = false;
126 for (auto iter = this->missing_symbols_.beginSafe(); iter != this->missing_symbols_.endSafe();
127 ++iter) {
128 if (DBCell::isReal(*iter)) {
129 const float missing_val = std::stof(*iter);
130 if ((missing_val >= lower_bound) && (missing_val <= upper_bound)) {
131 this->missing_symbols_.erase(iter);
132 } else _status_float_missing_symbols_.insert(*iter, false);
133 } else if (!non_float_symbol_found) {
134 non_float_symbol_found = true;
135 _nonfloat_missing_symbol_ = *iter;
136 }
137 }
138
139 // store a copy of the variable, that should be used by method variable ()
140 _real_variable_ = var.clone();
141
142 GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
143 }
144
146 DBTranslator4ContinuousVariable::DBTranslator4ContinuousVariable(const IContinuousVariable& var,
147 const bool fit_range) :
148 DBTranslator(DBTranslatedValueType::CONTINUOUS, true, fit_range, 1),
149 _variable_(var.name(), var.description()), _fit_range_(fit_range) {
150 // get the bounds of the range variable
151 const auto lower_bound = float(var.lowerBoundAsDouble());
152 const auto upper_bound = float(var.upperBoundAsDouble());
153 _variable_.setLowerBound(lower_bound);
154 _variable_.setUpperBound(upper_bound);
155
156 // store a copy of the variable, that should be used by method variable ()
157 _real_variable_ = var.clone();
158
159 GUM_CONSTRUCTOR(DBTranslator4ContinuousVariable);
160 }
161
163 DBTranslator4ContinuousVariable::DBTranslator4ContinuousVariable(
164 const DBTranslator4ContinuousVariable& from) :
165 DBTranslator(from), _variable_(from._variable_),
166 _status_float_missing_symbols_(from._status_float_missing_symbols_),
167 _nonfloat_missing_symbol_(from._nonfloat_missing_symbol_), _fit_range_(from._fit_range_) {
168 // store a copy of the variable, that should be used by method variable ()
169 _real_variable_ = from._real_variable_->clone();
170
171 GUM_CONS_CPY(DBTranslator4ContinuousVariable);
172 }
173
175 DBTranslator4ContinuousVariable::DBTranslator4ContinuousVariable(
176 DBTranslator4ContinuousVariable&& from) :
177 DBTranslator(std::move(from)), _variable_(std::move(from._variable_)),
178 _status_float_missing_symbols_(std::move(from._status_float_missing_symbols_)),
179 _nonfloat_missing_symbol_(std::move(from._nonfloat_missing_symbol_)),
180 _fit_range_(from._fit_range_) {
181 // store a copy of the variable, that should be used by method variable ()
182 _real_variable_ = from._real_variable_;
183 from._real_variable_ = nullptr;
184
185 GUM_CONS_MOV(DBTranslator4ContinuousVariable);
186 }
187
189 DBTranslator4ContinuousVariable* DBTranslator4ContinuousVariable::clone() const {
190 return new DBTranslator4ContinuousVariable(*this);
191 }
192
194 DBTranslator4ContinuousVariable&
195 DBTranslator4ContinuousVariable::operator=(const DBTranslator4ContinuousVariable& from) {
196 if (this != &from) {
197 DBTranslator::operator=(from);
198 _variable_ = from._variable_;
199 _status_float_missing_symbols_ = from._status_float_missing_symbols_;
200 _nonfloat_missing_symbol_ = from._nonfloat_missing_symbol_;
201 _fit_range_ = from._fit_range_;
202
203 if (_real_variable_ != nullptr) delete _real_variable_;
204 _real_variable_ = from._real_variable_->clone();
205 }
206
207 return *this;
208 }
209
211 DBTranslator4ContinuousVariable&
212 DBTranslator4ContinuousVariable::operator=(DBTranslator4ContinuousVariable&& from) {
213 if (this != &from) {
214 DBTranslator::operator=(std::move(from));
215 _variable_ = std::move(from._variable_);
216 _status_float_missing_symbols_ = std::move(from._status_float_missing_symbols_);
217 _nonfloat_missing_symbol_ = std::move(from._nonfloat_missing_symbol_);
218 _fit_range_ = from._fit_range_;
219
220 if (_real_variable_ != nullptr) delete _real_variable_;
221 _real_variable_ = from._real_variable_;
222 from._real_variable_ = nullptr;
223 }
224
225 return *this;
226 }
227
229 DBTranslatedValue DBTranslator4ContinuousVariable::translate(const std::string& str) {
230 // check if the string is actually a number
231 if (!DBCell::isReal(str)) {
232 if (this->isMissingSymbol(str)) {
233 return DBTranslatedValue{std::numeric_limits< float >::max()};
234 } else
236 "String \"" << str << "\" cannot be translated because it is not a value for "
237 << _variable_);
238 }
239
240 // here we know that the string is a number
241 const float number = std::stof(str);
242
243 // if we are in the range of the variable, return the number
244 if (_variable_.belongs(number)) return DBTranslatedValue{number};
245
246 // check that this is not a missing value
247 if (this->isMissingSymbol(str)) {
248 if (!_status_float_missing_symbols_[str]) { _status_float_missing_symbols_[str] = true; }
249 return DBTranslatedValue{std::numeric_limits< float >::max()};
250 }
251
252 // check if we are allowed to update the domain of the variable
253 if (!_fit_range_) {
255 "String \"" << str
256 << "\" cannot be translated because it is "
257 "out of the domain of the continuous variable");
258 }
259
260 // now, we can try to add str as a new bound of the range variable
261 // if possible
262
263 // if the variable is empty, set the min and max ranges. Here,
264 // there is no need to check whether the new range would contain an
265 // already translated missing symbol because this was already tested
266 // in the above test.
267 if (_variable_.lowerBound() == std::numeric_limits< float >::infinity()) {
268 _variable_.setLowerBound(number);
269 _variable_.setUpperBound(number);
270 return DBTranslatedValue{number};
271 }
272
273 // here, the domain is not empty. So we should update either the
274 // lower bound or the upper bound of the variable, unless
275 // a missing symbol lies within the new bounds and we have already
276 // translated it.
277 const float lower_bound = _variable_.lowerBound();
278 const float upper_bound = _variable_.upperBound();
279 if (number < lower_bound) {
280 // check that there does not already exist a translated missing
281 // value within the new bounds of the variable
282 for (const auto& missing: _status_float_missing_symbols_) {
283 if (missing.second) {
284 const float miss_val = std::stof(missing.first);
285 if ((miss_val >= number) && (miss_val <= upper_bound)) {
287 "String \"" << str << "\" cannot be translated because "
288 << "it would induce a new domain containing an already "
289 << "translated missing symbol");
290 }
291 }
292 }
293
294 // remove all the missing symbols that were not translated yet and
295 // that lie within the new bounds of the variable
296 for (auto iter = _status_float_missing_symbols_.beginSafe();
297 iter != _status_float_missing_symbols_.endSafe();
298 ++iter) {
299 if (iter.val() == false) {
300 const float miss_val = std::stof(iter.key());
301 if ((miss_val >= number) && (miss_val <= upper_bound)) {
302 this->missing_symbols_.erase(iter.key());
303 _status_float_missing_symbols_.erase(iter);
304 }
305 }
306 }
307
308 // update the domain of the continuous variable
309 _variable_.setLowerBound(number);
310
311 return DBTranslatedValue{number};
312 } else {
313 // check that there does not already exist a translated missing
314 // value within the new bounds of the variable
315 for (const auto& missing: _status_float_missing_symbols_) {
316 if (missing.second) {
317 const float miss_val = std::stof(missing.first);
318 if ((miss_val >= lower_bound) && (miss_val <= number)) {
320 "String \"" << str << "\" cannot be translated because "
321 << "it would induce a new domain containing an already "
322 << "translated missing symbol");
323 }
324 }
325 }
326
327 // remove all the missing symbols that were not translated yet and
328 // that lie within the new bounds of the variable
329 for (auto iter = _status_float_missing_symbols_.beginSafe();
330 iter != _status_float_missing_symbols_.endSafe();
331 ++iter) {
332 if (iter.val() == false) {
333 const float miss_val = std::stof(iter.key());
334 if ((miss_val >= lower_bound) && (miss_val <= number)) {
335 this->missing_symbols_.erase(iter.key());
336 _status_float_missing_symbols_.erase(iter);
337 }
338 }
339 }
340
341 // update the domain of the continuous variable
342 _variable_.setUpperBound(number);
343
344 return DBTranslatedValue{number};
345 }
346 }
347
348 } /* namespace learning */
349
350} /* namespace gum */
351
352#endif /* DOXYGEN_SHOULD_SKIP_THIS */
The databases' cell translators for continuous variables.
Exception : operation not allowed.
Exception : wrong type for this operation.
Error: An unknown label is found in the database.
DBTranslator4ContinuousVariable(const std::vector< std::string > &missing_symbols, const bool fit_range=false)
default constructor without any initial variable
The base class for all the tabular database cell translators.
virtual DBTranslator * clone() const =0
virtual copy constructor
#define GUM_ERROR(type, msg)
Definition exceptions.h:72
DBTranslatedValueType
The nature of the elements handled by translators (discrete, continuous).
include the inlined functions if necessary
Definition CSVParser.h:54
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
STL namespace.