aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
DBCell.cpp
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
47#include <cstdio>
48
50
51
52#ifndef DOXYGEN_SHOULD_SKIP_THIS
53
55# ifdef GUM_NO_INLINE
57# endif /* GUM_NO_INLINE */
58
59namespace gum {
60
61 namespace learning {
62
63 // create the static members
64 int DBCell::_string_max_index_ = 0;
65
66 Bijection< std::string, int >& DBCell::_strings_() {
67# ifdef GUM_DEBUG_MODE
68 static bool first_time = true;
69 if (first_time) {
70 first_time = false;
71 __debug__::_dec_creation_("Bijection", " __strings", 0, "BCell string bijection", 0);
72 __debug__::_dec_creation_("BijectionImplementation",
73 " __strings",
74 0,
75 "BCell string bijection",
76 0);
77 __debug__::_dec_creation_("HashTable", " __strings", 0, "BCell string bijection", 0);
78 __debug__::_dec_creation_("HashTable", " __strings", 0, "BCell string bijection", 0);
79 }
80# endif
81 static Bijection< std::string, int > strings;
82 return strings;
83 }
84
85 // determines whether a string corresponds to an integer
86 bool DBCell::isInteger(const std::string& str) {
87 if (str.empty()) return false;
88
89 // trim the string
90 auto start_iter = str.begin() + str.find_first_not_of(" \t");
91 auto end_iter = str.begin() + str.find_last_not_of(" \t\r\n") + 1;
92
93 if (start_iter == end_iter) return false;
94
95 // if the number is negative, pass the '-' sign
96 if (*start_iter == '-') ++start_iter;
97
98 // check wether we have a number
99 for (; start_iter != end_iter; ++start_iter) {
100 if ((*start_iter < '0') || (*start_iter > '9')) return false;
101 }
102
103 return true;
104 }
105
106 // determines whether a string corresponds to an integer
107 bool DBCell::isReal(const std::string& str) {
108 if (str.empty()) return false;
109
110 // trim the string
111 auto start_iter = str.begin() + str.find_first_not_of(" \t");
112 auto end_iter = str.begin() + str.find_last_not_of(" \t\r\n") + 1;
113
114 if (start_iter == end_iter) return false;
115
116 // check wether we have a number
117 bool has_dot = false;
118 bool has_exponent = false;
119 bool has_digit = false;
120 bool has_negation = false;
121 for (; start_iter != end_iter; ++start_iter) {
122 if (*start_iter == '-') {
123 if (has_negation) return false;
124 } else if (*start_iter == '.') {
125 if (has_dot || has_exponent) return false;
126 has_dot = true;
127 } else if ((*start_iter == 'e') || (*start_iter == 'E')) {
128 if (has_exponent || !has_digit) return false;
129 has_exponent = true;
130 has_negation = false;
131 } else if ((*start_iter < '0') || (*start_iter > '9')) return false;
132 else has_digit = true;
133 }
134
135 return true;
136 }
137
138 // try to convert the content of the DBCell into another type
139 bool DBCell::convertType(const EltType new_type) {
140 if (new_type == _type_) return true;
141 switch (new_type) {
142 // ===================================
143 case EltType::REAL :
144 switch (_type_) {
145 case EltType::INTEGER :
146 _val_real_ = float(_val_integer_);
147 _type_ = EltType::REAL;
148 return true;
149
150 case EltType::STRING :
151 try {
152 const std::string& str = _strings_().first(_val_index_);
153 if (!isReal(str)) return false;
154 _val_real_ = std::stof(str);
155 _type_ = EltType::REAL;
156 return true;
157 } catch (std::invalid_argument&) { return false; }
158
159 case EltType::MISSING : return false;
160
161 default : GUM_ERROR(NotImplementedYet, "type not supported by DBCell convertType")
162 }
163
164 // ===================================
165 case EltType::INTEGER :
166 switch (_type_) {
167 case EltType::REAL : {
168 const int nb = int(_val_real_);
169 if (nb == _val_real_) {
170 _val_integer_ = nb;
171 _type_ = EltType::INTEGER;
172 return true;
173 } else return false;
174 }
175
176 case EltType::STRING :
177 try {
178 const std::string& str = _strings_().first(_val_index_);
179 if (!isInteger(str)) return false;
180 _val_integer_ = std::stoi(str);
181 _type_ = EltType::INTEGER;
182 return true;
183 } catch (std::invalid_argument&) { return false; }
184
185 case EltType::MISSING : return false;
186
187 default : GUM_ERROR(NotImplementedYet, "type not supported by DBCell convertType")
188 }
189
190 // ===================================
191 case EltType::STRING :
192 switch (_type_) {
193 case EltType::REAL : {
194 char buffer[100];
195 snprintf(buffer, 100, "%g", _val_real_);
196 const std::string str(buffer);
197 if (!_strings_().existsFirst(str)) {
198 _strings_().insert(str, _string_max_index_);
199 _val_index_ = _string_max_index_;
200 ++_string_max_index_;
201 } else {
202 _val_index_ = _strings_().second(str);
203 }
204 }
205 _type_ = EltType::STRING;
206 return true;
207
208 case EltType::INTEGER : {
209 const std::string str = std::to_string(_val_integer_);
210 if (!_strings_().existsFirst(str)) {
211 _strings_().insert(str, _string_max_index_);
212 _val_index_ = _string_max_index_;
213 ++_string_max_index_;
214 } else {
215 _val_index_ = _strings_().second(str);
216 }
217 }
218 _type_ = EltType::STRING;
219 return true;
220
221 case EltType::MISSING : return false;
222
223 default : GUM_ERROR(NotImplementedYet, "type not supported by DBCell convertType")
224 }
225
226 // ===================================
227 case EltType::MISSING : _type_ = EltType::MISSING; return true;
228
229 default : GUM_ERROR(NotImplementedYet, "type not supported by DBCell convertType")
230 }
231
232 return false;
233 }
234
235 // raises an appropriate exception when encountering a type error
236 std::string DBCell::_typeErrorMsg_(const std::string& true_type) const {
237 std::stringstream str;
238 switch (_type_) {
239 case EltType::REAL :
240 str << "The DBCell contains a real number instead of " << true_type;
241 break;
242
243 case EltType::INTEGER :
244 str << "The DBCell contains an integer instead of " << true_type;
245 break;
246
247 case EltType::STRING :
248 str << "The DBCell contains a string instead of " << true_type;
249 break;
250
251 case EltType::MISSING :
252 str << "The DBCell contains a missing value instead of " << true_type;
253 break;
254
255 default : GUM_ERROR(NotImplementedYet, "DBCell type not implemented yet")
256 }
257
258 return str.str();
259 }
260
261 // returns the content of the DBCell as a string, whatever its type
262 std::string DBCell::toString(const std::vector< std::string >& missingVals) const {
263 switch (_type_) {
264 case EltType::STRING : return _strings_().first(_val_index_);
265
266 case EltType::REAL : {
267 char buffer[100];
268 snprintf(buffer, 100, "%g", _val_real_);
269 return std::string(buffer);
270 }
271
272 case EltType::INTEGER : return std::to_string(_val_integer_);
273
274 case EltType::MISSING :
275 if (missingVals.size()) return missingVals[0];
276 else GUM_ERROR(UndefinedElement, "no missing value symbol found")
277 }
278
279 GUM_ERROR(NotImplementedYet, "type not supported by DBCell toString")
280 }
281
282
283 } /* namespace learning */
284
285} /* namespace gum */
286
287#endif /* DOXYGEN_SHOULD_SKIP_THIS */
The class representing the original values of the cells of databases.
The inlined implementation of DBCells.
static bool isInteger(const std::string &str)
determines whether a string corresponds precisely to an integer
std::string toString(const std::vector< std::string > &missingVals) const
returns the content of the DBCell as a string, whatever its type
static bool isReal(const std::string &str)
determine whether a string corresponds precisely to a real number
bool convertType(const EltType newtype)
try to convert the content of the DBCell into another type
#define GUM_ERROR(type, msg)
Definition exceptions.h:72
include the inlined functions if necessary
Definition CSVParser.h:54
gum is the global namespace for all aGrUM entities
Definition agrum.h:46