aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
IDatabaseTable_tpl.h
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40#pragma once
41
42
49
50#ifndef DOXYGEN_SHOULD_SKIP_THIS
51
52namespace gum {
53
54 namespace learning {
55
56 // ===========================================================================
57 // Unsafe handlers
58 // ===========================================================================
59
60 // default constructor
61 template < typename T_DATA >
63 DBHandler< T_DATA >(), _db_(&db), _row_(&(db.content())),
64 _end_index_(std::size_t(_row_->size())) {
65 GUM_CONSTRUCTOR(IDatabaseTable::Handler);
66 }
67
68 // copy constructor
69 template < typename T_DATA >
70 INLINE IDatabaseTable< T_DATA >::Handler::Handler(
71 const typename IDatabaseTable< T_DATA >::Handler& h) :
72 DBHandler< T_DATA >(), _db_(h._db_), _row_(h._row_), _index_(h._index_),
73 _begin_index_(h._begin_index_), _end_index_(h._end_index_) {
74 GUM_CONS_CPY(IDatabaseTable::Handler);
75 }
76
77 // move constructor
78 template < typename T_DATA >
79 INLINE
80 IDatabaseTable< T_DATA >::Handler::Handler(typename IDatabaseTable< T_DATA >::Handler&& h) :
81 DBHandler< T_DATA >(), _db_(h._db_), _row_(h._row_), _index_(h._index_),
82 _begin_index_(h._begin_index_), _end_index_(h._end_index_) {
83 GUM_CONS_MOV(IDatabaseTable::Handler);
84 }
85
86 // destructor
87 template < typename T_DATA >
88 INLINE IDatabaseTable< T_DATA >::Handler::~Handler() {
89 GUM_DESTRUCTOR(IDatabaseTable::Handler);
90 }
91
92 // copy operator
93 template < typename T_DATA >
94 INLINE typename IDatabaseTable< T_DATA >::Handler& IDatabaseTable< T_DATA >::Handler::operator=(
95 const typename IDatabaseTable< T_DATA >::Handler& h) {
96 _db_ = h._db_;
97 _row_ = h._row_;
98 _index_ = h._index_;
99 _begin_index_ = h._begin_index_;
100 _end_index_ = h._end_index_;
101 return *this;
102 }
103
104 // move operator
105 template < typename T_DATA >
106 INLINE typename IDatabaseTable< T_DATA >::Handler& IDatabaseTable< T_DATA >::Handler::operator=(
107 typename IDatabaseTable< T_DATA >::Handler&& h) {
108 _db_ = h._db_;
109 _row_ = h._row_;
110 _index_ = h._index_;
111 _begin_index_ = h._begin_index_;
112 _end_index_ = h._end_index_;
113 return *this;
114 }
115
116 // returns the current row pointed to by the handler
117 template < typename T_DATA >
118 INLINE typename IDatabaseTable< T_DATA >::Handler::const_reference
119 IDatabaseTable< T_DATA >::Handler::operator*() const {
120 return _row_->operator[](_index_);
121 }
122
123 // Dereferences the value pointed to by the handler (unsafe version)
124 template < typename T_DATA >
125 INLINE typename IDatabaseTable< T_DATA >::Handler::const_pointer
126 IDatabaseTable< T_DATA >::Handler::operator->() const {
127 return &(_row_->operator[](_index_));
128 }
129
130 // makes the handler point to the next row
131 template < typename T_DATA >
132 INLINE typename IDatabaseTable< T_DATA >::Handler&
133 IDatabaseTable< T_DATA >::Handler::operator++() {
134 ++_index_;
135 return *this;
136 }
137
138 // makes the handler point to the previous row
139 template < typename T_DATA >
140 INLINE typename IDatabaseTable< T_DATA >::Handler&
141 IDatabaseTable< T_DATA >::Handler::operator--() {
142 if (_index_ > _begin_index_) --_index_;
143 return *this;
144 }
145
146 // moves the handler by i rows
147 template < typename T_DATA >
148 INLINE typename IDatabaseTable< T_DATA >::Handler&
149 IDatabaseTable< T_DATA >::Handler::operator+=(const std::size_t i) {
150 _index_ += i;
151 return *this;
152 }
153
154 // moves back the handler by i rows
155 template < typename T_DATA >
156 INLINE typename IDatabaseTable< T_DATA >::Handler&
157 IDatabaseTable< T_DATA >::Handler::operator-=(const std::size_t i) {
158 if (_index_ >= _begin_index_ + i) _index_ -= i;
159 else _index_ = _begin_index_;
160 return *this;
161 }
162
163 // checks whether two handlers point on the same row
164 template < typename T_DATA >
165 INLINE bool IDatabaseTable< T_DATA >::Handler::operator==(const Handler& handler) const {
166 return _index_ == handler._index_;
167 }
168
169 // checks whether two handlers point to different rows
170 template < typename T_DATA >
171 INLINE bool IDatabaseTable< T_DATA >::Handler::operator!=(const Handler& handler) const {
172 return _index_ != handler._index_;
173 }
174
175 // returns the number of rows managed by the handler
176 template < typename T_DATA >
177 INLINE std::size_t IDatabaseTable< T_DATA >::Handler::size() const {
178 return _end_index_ - _begin_index_;
179 }
180
181 // return the number of rows of the whole database
182 template < typename T_DATA >
183 INLINE std::size_t IDatabaseTable< T_DATA >::Handler::DBSize() const {
184 if (_row_ != nullptr) return _row_->size();
185 else return std::size_t(0);
186 }
187
188 // returns the current row pointed to by the handler
189 template < typename T_DATA >
190 INLINE typename IDatabaseTable< T_DATA >::Handler::const_reference
191 IDatabaseTable< T_DATA >::Handler::rowSafe() const {
192 if (_index_ >= _end_index_) { GUM_ERROR(OutOfBounds, "the handler has reached its end") }
193
194 return _row_->operator[](_index_);
195 }
196
197 // returns the current row pointed to by the handler
198 template < typename T_DATA >
199 INLINE typename IDatabaseTable< T_DATA >::Handler::reference
200 IDatabaseTable< T_DATA >::Handler::rowSafe() {
201 if (_index_ >= _end_index_) { GUM_ERROR(OutOfBounds, "the handler has reached its end") }
202
203 return const_cast< Matrix< T_DATA >* >(_row_)->operator[](_index_);
204 }
205
206 // returns the current row pointed to by the handler (unsafe version)
207 template < typename T_DATA >
208 INLINE typename IDatabaseTable< T_DATA >::Handler::const_reference
209 IDatabaseTable< T_DATA >::Handler::row() const {
210 return _row_->operator[](_index_);
211 }
212
213 // returns the current row pointed to by the handler (unsafe version)
214 template < typename T_DATA >
215 INLINE typename IDatabaseTable< T_DATA >::Handler::reference
216 IDatabaseTable< T_DATA >::Handler::row() {
217 return const_cast< Matrix< T_DATA >* >(_row_)->operator[](_index_);
218 }
219
220 // makes the handler point to the next row
221 template < typename T_DATA >
222 INLINE void IDatabaseTable< T_DATA >::Handler::nextRow() {
223 ++_index_;
224 }
225
226 // returns the number of the current row
227 template < typename T_DATA >
228 INLINE std::size_t IDatabaseTable< T_DATA >::Handler::numRow() const {
229 return (_index_ >= _begin_index_) ? _index_ - _begin_index_ : 0;
230 }
231
232 // indicates whether the handler has reached its end or not
233 template < typename T_DATA >
234 INLINE bool IDatabaseTable< T_DATA >::Handler::hasRows() const {
235 return (_index_ < _end_index_);
236 }
237
238 // puts the handler to the beginning of the database area it handles
239 template < typename T_DATA >
240 INLINE void IDatabaseTable< T_DATA >::Handler::reset() {
241 _index_ = _begin_index_;
242 }
243
244 // returns a new handler that points to the beginning of the
245 // database area of the current handler */
246 template < typename T_DATA >
247 INLINE typename IDatabaseTable< T_DATA >::Handler
248 IDatabaseTable< T_DATA >::Handler::begin() const {
249 Handler handler(*this);
250 handler.reset();
251 return handler;
252 }
253
254 // returns a new handler that points to the end of the
255 // database area of the current handler */
256 template < typename T_DATA >
257 INLINE typename IDatabaseTable< T_DATA >::Handler
258 IDatabaseTable< T_DATA >::Handler::end() const {
259 Handler handler(*this);
260 handler._index_ = _end_index_;
261 return handler;
262 }
263
264 // sets the area in the database the handler will handle
265 template < typename T_DATA >
266 INLINE void IDatabaseTable< T_DATA >::Handler::setRange(std::size_t first, std::size_t last) {
267 if (first > last) std::swap(first, last);
268
269 // check that the end belongs to the database, else raise an exception
270 if (_row_ == nullptr) GUM_ERROR(NullElement, "the handler does not point to any database")
271
272 if (last > _row_->size())
274 "the database has fewer rows (" << _row_->size() << ") than the upper range ("
275 << last << ") specified to the handler")
276
277 _begin_index_ = first;
278 _end_index_ = last;
279 _index_ = first;
280 }
281
282 // returns the current range of the handler
283 template < typename T_DATA >
284 INLINE std::pair< std::size_t, std::size_t > IDatabaseTable< T_DATA >::Handler::range() const {
285 return std::pair< std::size_t, std::size_t >(_begin_index_, _end_index_);
286 }
287
288 // returns the names of the variables
289 template < typename T_DATA >
290 INLINE const typename IDatabaseTable< T_DATA >::Handler::template DBVector< std::string >&
291 IDatabaseTable< T_DATA >::Handler::variableNames() const {
292 return _db_->variableNames();
293 }
294
295 // returns the number of variables (columns) of the database
296 template < typename T_DATA >
297 INLINE std::size_t IDatabaseTable< T_DATA >::Handler::nbVariables() const {
298 if (_db_ != nullptr) return _db_->variableNames().size();
299 else return 0;
300 }
301
302 // returns a pointer on the database
303 template < typename T_DATA >
304 INLINE const IDatabaseTable< T_DATA >& IDatabaseTable< T_DATA >::Handler::database() const {
305 if (_db_ == nullptr) {
306 GUM_ERROR(NullElement, "The database handler does not point toward a database")
307 }
308 return *_db_;
309 }
310
311 // ===========================================================================
312 // Safe handlers
313 // ===========================================================================
314
315 // attach a new handler to the database
316 template < typename T_DATA >
317 INLINE void IDatabaseTable< T_DATA >::HandlerSafe::_attachHandler_() {
318 if (this->_db_ != nullptr) { this->_db_->_attachHandler_(this); }
319 }
320
321 // detach a handler
322 template < typename T_DATA >
323 INLINE void IDatabaseTable< T_DATA >::HandlerSafe::_detachHandler_() {
324 if (this->_db_ != nullptr) { this->_db_->_detachHandler_(this); }
325 }
326
327 // default constructor
328 template < typename T_DATA >
329 INLINE IDatabaseTable< T_DATA >::HandlerSafe::HandlerSafe(const IDatabaseTable< T_DATA >& db) :
330 IDatabaseTable< T_DATA >::Handler(db) {
331 _attachHandler_();
332 GUM_CONSTRUCTOR(IDatabaseTable::HandlerSafe);
333 }
334
335 // copy constructor
336 template < typename T_DATA >
337 INLINE IDatabaseTable< T_DATA >::HandlerSafe::HandlerSafe(
338 const typename IDatabaseTable< T_DATA >::HandlerSafe& h) :
339 IDatabaseTable< T_DATA >::Handler(h) {
340 _attachHandler_();
341 GUM_CONS_CPY(IDatabaseTable::HandlerSafe);
342 }
343
344 // move constructor
345 template < typename T_DATA >
346 INLINE IDatabaseTable< T_DATA >::HandlerSafe::HandlerSafe(
347 typename IDatabaseTable< T_DATA >::HandlerSafe&& h) :
348 IDatabaseTable< T_DATA >::Handler(std::move(h)) {
349 _attachHandler_();
350 GUM_CONS_MOV(IDatabaseTable::HandlerSafe);
351 }
352
353 // destructor
354 template < typename T_DATA >
355 INLINE IDatabaseTable< T_DATA >::HandlerSafe::~HandlerSafe() {
356 _detachHandler_();
357 GUM_DESTRUCTOR(IDatabaseTable::HandlerSafe);
358 }
359
360 // copy operator
361 template < typename T_DATA >
362 INLINE typename IDatabaseTable< T_DATA >::HandlerSafe&
363 IDatabaseTable< T_DATA >::HandlerSafe::operator=(
364 const typename IDatabaseTable< T_DATA >::HandlerSafe& h) {
365 if (this->_db_ != h._db_) {
366 _detachHandler_();
367 this->_db_ = h._db_;
368 _attachHandler_();
369 }
370
371 IDatabaseTable< T_DATA >::Handler::operator=(h);
372 return *this;
373 }
374
375 // copy operator
376 template < typename T_DATA >
377 INLINE typename IDatabaseTable< T_DATA >::HandlerSafe&
378 IDatabaseTable< T_DATA >::HandlerSafe::operator=(
379 const typename IDatabaseTable< T_DATA >::Handler& h) {
380 return this->operator=(dynamic_cast< const IDatabaseTable< T_DATA >::HandlerSafe& >(h));
381 }
382
383 // move operator
384 template < typename T_DATA >
385 INLINE typename IDatabaseTable< T_DATA >::HandlerSafe&
386 IDatabaseTable< T_DATA >::HandlerSafe::operator=(
387 typename IDatabaseTable< T_DATA >::HandlerSafe&& h) {
388 if (this->_db_ != h._db_) {
389 _detachHandler_();
390 this->_db_ = h._db_;
391 _attachHandler_();
392 }
393
394 IDatabaseTable< T_DATA >::Handler::operator=(std::move(h));
395 return *this;
396 }
397
398 // move operator
399 template < typename T_DATA >
400 INLINE typename IDatabaseTable< T_DATA >::HandlerSafe&
401 IDatabaseTable< T_DATA >::HandlerSafe::operator=(
402 typename IDatabaseTable< T_DATA >::Handler&& h) {
403 return this->operator=(std::move(dynamic_cast< IDatabaseTable< T_DATA >::HandlerSafe& >(h)));
404 }
405
406 // ===========================================================================
407 // Database Tables
408 // ===========================================================================
409
410 // create the end iterators
411 template < typename T_DATA >
412 void IDatabaseTable< T_DATA >::_createEndIterators_() {
413 _end_ = new iterator(*this);
414 try {
415 _end_safe_ = new iterator_safe(*this);
416 } catch (...) {
417 delete _end_;
418 throw;
419 }
420 }
421
422 // default constructor
423 template < typename T_DATA >
424 IDatabaseTable< T_DATA >::IDatabaseTable(
425 const typename IDatabaseTable< T_DATA >::MissingValType& missing_symbols,
426 const std::vector< std::string >& var_names) :
427 variable_names_(var_names), missing_symbols_(missing_symbols) {
428 // create the end iterators
429 _createEndIterators_();
430
431 GUM_CONSTRUCTOR(IDatabaseTable);
432 }
433
434 // copy constructor
435 template < typename T_DATA >
436 IDatabaseTable< T_DATA >::IDatabaseTable(const IDatabaseTable< T_DATA >& from) :
437 variable_names_(from.variable_names_), rows_(from.rows_),
438 missing_symbols_(from.missing_symbols_), has_row_missing_val_(from.has_row_missing_val_),
439 max_nb_threads_(from.max_nb_threads_),
440 min_nb_rows_per_thread_(from.min_nb_rows_per_thread_) {
441 // create the end iterators
442 _createEndIterators_();
443
444 GUM_CONS_CPY(IDatabaseTable);
445 }
446
447 // move constructor
448 template < typename T_DATA >
449 IDatabaseTable< T_DATA >::IDatabaseTable(IDatabaseTable< T_DATA >&& from) :
450 variable_names_(std::move(from.variable_names_)), rows_(std::move(from.rows_)),
451 missing_symbols_(std::move(from.missing_symbols_)),
452 has_row_missing_val_(std::move(from.has_row_missing_val_)),
453 max_nb_threads_(from.max_nb_threads_),
454 min_nb_rows_per_thread_(from.min_nb_rows_per_thread_) {
455 // create the end iterators
456 _createEndIterators_();
457
458 GUM_CONS_MOV(IDatabaseTable);
459 }
460
461 // destructor
462 template < typename T_DATA >
463 IDatabaseTable< T_DATA >::~IDatabaseTable() {
464 // indicate to all the handlers that we are destructing the database
465 _safe_handlers_mutex_.lock();
466 for (auto handler: _list_of_safe_handlers_) {
467 handler->_db_ = nullptr;
468 handler->_row_ = nullptr;
469 handler->_end_index_ = 0;
470 handler->_index_ = 0;
471 }
472 _safe_handlers_mutex_.unlock();
473
474 delete _end_;
475 delete _end_safe_;
476
477 GUM_DESTRUCTOR(IDatabaseTable);
478 }
479
480 // copy operator
481 template < typename T_DATA >
482 IDatabaseTable< T_DATA >&
483 IDatabaseTable< T_DATA >::operator=(const IDatabaseTable< T_DATA >& from) {
484 if (this != &from) {
485 // invalidate the current handlers
486 _safe_handlers_mutex_.lock();
487 for (auto handler: _list_of_safe_handlers_) {
488 handler->_db_ = nullptr;
489 handler->_row_ = nullptr;
490 handler->_end_index_ = 0;
491 handler->_index_ = 0;
492 }
493 _list_of_safe_handlers_.clear();
494 _safe_handlers_mutex_.unlock();
495
496 rows_ = from.rows_;
497 variable_names_ = from.variable_names_;
498 missing_symbols_ = from.missing_symbols_;
499 has_row_missing_val_ = from.has_row_missing_val_;
500 max_nb_threads_ = from.max_nb_threads_;
501 min_nb_rows_per_thread_ = from.min_nb_rows_per_thread_;
502
503 // update the end iterators
504 const std::size_t db_size = rows_.size();
505 _end_->_index_ = db_size;
506 _end_->_end_index_ = db_size;
507 _end_safe_->_index_ = db_size;
508 _end_safe_->_end_index_ = db_size;
509 }
510
511 return *this;
512 }
513
514 // move operator
515 template < typename T_DATA >
516 IDatabaseTable< T_DATA >& IDatabaseTable< T_DATA >::operator=(IDatabaseTable< T_DATA >&& from) {
517 if (this != &from) {
518 // invalidate the current handlers
519 _safe_handlers_mutex_.lock();
520 for (auto handler: _list_of_safe_handlers_) {
521 handler->_db_ = nullptr;
522 handler->_row_ = nullptr;
523 handler->_end_index_ = 0;
524 handler->_index_ = 0;
525 }
526 _safe_handlers_mutex_.unlock();
527
528 rows_ = std::move(from.rows_);
529 variable_names_ = std::move(from.variable_names_);
530 missing_symbols_ = std::move(from.missing_symbols_);
531 has_row_missing_val_ = std::move(from.has_row_missing_val_);
532 max_nb_threads_ = from.max_nb_threads_;
533 min_nb_rows_per_thread_ = from.min_nb_rows_per_thread_;
534
535 // update the end iterators
536 const std::size_t db_size = rows_.size();
537 _end_->_index_ = db_size;
538 _end_->_end_index_ = db_size;
539 _end_safe_->_index_ = db_size;
540 _end_safe_->_end_index_ = db_size;
541 }
542
543 return *this;
544 }
545
546 // returns a new unsafe handler pointing to the beginning of the database
547 template < typename T_DATA >
548 INLINE typename IDatabaseTable< T_DATA >::Handler IDatabaseTable< T_DATA >::begin() const {
549 return Handler(*this);
550 }
551
552 // returns a new safe handler pointing to the beginning of the database
553 template < typename T_DATA >
554 INLINE typename IDatabaseTable< T_DATA >::HandlerSafe
555 IDatabaseTable< T_DATA >::beginSafe() const {
556 return HandlerSafe(*this);
557 }
558
559 // returns a new unsafe handler pointing to the end of the database
560 template < typename T_DATA >
561 INLINE const typename IDatabaseTable< T_DATA >::Handler&
562 IDatabaseTable< T_DATA >::end() const noexcept {
563 return *_end_;
564 }
565
567 template < typename T_DATA >
568 INLINE const typename IDatabaseTable< T_DATA >::HandlerSafe&
569 IDatabaseTable< T_DATA >::endSafe() const noexcept {
570 return *_end_safe_;
571 }
572
573 // returns a new unsafe handler on the database
574 template < typename T_DATA >
575 INLINE typename IDatabaseTable< T_DATA >::Handler IDatabaseTable< T_DATA >::handler() const {
576 return Handler(*this);
577 }
578
579 // returns a new safe handler on the database
580 template < typename T_DATA >
581 INLINE typename IDatabaseTable< T_DATA >::HandlerSafe
582 IDatabaseTable< T_DATA >::handlerSafe() const {
583 return HandlerSafe(*this);
584 }
585
586 // returns the content of the database
587 template < typename T_DATA >
588 INLINE const typename IDatabaseTable< T_DATA >::template Matrix< T_DATA >&
589 IDatabaseTable< T_DATA >::content() const noexcept {
590 return rows_;
591 }
592
594 template < typename T_DATA >
595 bool IDatabaseTable< T_DATA >::hasMissingValues() const {
596 for (const auto& status: has_row_missing_val_)
597 if (status == IsMissing::True) return true;
598 return false;
599 }
600
602 template < typename T_DATA >
603 INLINE bool IDatabaseTable< T_DATA >::hasMissingValues(const std::size_t k) const {
604 return has_row_missing_val_[k] == IsMissing::True;
605 }
606
607 // returns the variable names for all the columns
608 template < typename T_DATA >
609 INLINE const std::vector< std::string >&
610 IDatabaseTable< T_DATA >::variableNames() const noexcept {
611 return variable_names_;
612 }
613
615 template < typename T_DATA >
616 INLINE const std::string& IDatabaseTable< T_DATA >::variableName(const std::size_t k) const {
617 if (variable_names_.size() <= k)
618 GUM_ERROR(OutOfBounds, "the database does not contain Column #" << k)
619 return variable_names_[k];
620 }
621
623 template < typename T_DATA >
624 INLINE std::size_t
625 IDatabaseTable< T_DATA >::columnFromVariableName(const std::string& name) const {
626 const std::size_t size = variable_names_.size();
627 for (std::size_t i = 0; i < size; ++i)
628 if (variable_names_[i] == name) return i;
629
630 GUM_ERROR(UndefinedElement, "the database contains no column whose name is " << name)
631 }
632
634 template < typename T_DATA >
635 INLINE typename IDatabaseTable< T_DATA >::template DBVector< std::size_t >
636 IDatabaseTable< T_DATA >::columnsFromVariableName(const std::string& name) const {
637 const std::size_t size = variable_names_.size();
638 DBVector< std::size_t > cols;
639 for (std::size_t i = 0; i < size; ++i)
640 if (variable_names_[i] == name) cols.push_back(i);
641
642 if (cols.empty())
643 GUM_ERROR(UndefinedElement, "the database contains no column whose name is " << name)
644
645 return cols;
646 }
647
648 // returns the number of variables (columns) of the database
649 template < typename T_DATA >
650 INLINE std::size_t IDatabaseTable< T_DATA >::nbVariables() const noexcept {
651 return variable_names_.size();
652 }
653
654 // returns the number of records in the database
655 template < typename T_DATA >
656 INLINE std::size_t IDatabaseTable< T_DATA >::size() const noexcept {
657 return rows_.size();
658 }
659
660 // returns the number of records in the database
661 template < typename T_DATA >
662 INLINE std::size_t IDatabaseTable< T_DATA >::nbRows() const noexcept {
663 return rows_.size();
664 }
665
666 // indicates whether the database contains some records or not
667 template < typename T_DATA >
668 INLINE bool IDatabaseTable< T_DATA >::empty() const noexcept {
669 return rows_.empty();
670 }
671
672 // update the handlers when the size of the database changes
673 template < typename T_DATA >
674 void IDatabaseTable< T_DATA >::_updateHandlers_(std::size_t new_size) const {
675 const std::size_t db_size = rows_.size();
676
677 _safe_handlers_mutex_.lock();
678 for (auto handler: _list_of_safe_handlers_) {
679 if ((handler->_end_index_ == db_size) || (handler->_end_index_ > new_size)) {
680 handler->_end_index_ = new_size;
681 // there is no need to update the index because, in safe handlers,
682 // we always check that the index is less than end_index when trying
683 // to access the rows
684 }
685 }
686 _safe_handlers_mutex_.unlock();
687
688 // update the end iterators
689 _end_->_index_ = new_size;
690 _end_->_end_index_ = new_size;
691 _end_safe_->_index_ = new_size;
692 _end_safe_->_end_index_ = new_size;
693 }
694
695 // attach a new handler to the database
696 template < typename T_DATA >
697 INLINE void IDatabaseTable< T_DATA >::_attachHandler_(HandlerSafe* handler) const {
698 _safe_handlers_mutex_.lock();
699 _list_of_safe_handlers_.push_back(handler);
700 _safe_handlers_mutex_.unlock();
701 }
702
703 // detach a handler
704 template < typename T_DATA >
705 void IDatabaseTable< T_DATA >::_detachHandler_(HandlerSafe* handler) const {
706 _safe_handlers_mutex_.lock();
707
708 for (auto iter = _list_of_safe_handlers_.rbegin(); iter != _list_of_safe_handlers_.rend();
709 ++iter) {
710 if (*iter == handler) {
711 *iter = _list_of_safe_handlers_.back();
712 _list_of_safe_handlers_.pop_back();
713 break;
714 }
715 }
716
717 _safe_handlers_mutex_.unlock();
718 }
719
720 // checks whether a new row has the same size as the rest of the database
721 template < typename T_DATA >
722 INLINE bool IDatabaseTable< T_DATA >::isRowSizeOK_(const std::size_t size) const {
723 return (size == variable_names_.size());
724 }
725
726 // insert a new DBRow at the end of the database
727 template < typename T_DATA >
728 INLINE void IDatabaseTable< T_DATA >::insertRow(
729 const typename IDatabaseTable< T_DATA >::template Row< T_DATA >& row,
730 const typename IDatabaseTable< T_DATA >::IsMissing contains_missing) {
731 // this will call the insertRow with a Row< T_DATA >&&
732 this->insertRow(typename IDatabaseTable< T_DATA >::template Row< T_DATA >(row),
733 contains_missing);
734 }
735
736 // insert a new DBRow at the end of the database
737 template < typename T_DATA >
738 void IDatabaseTable< T_DATA >::insertRow(
739 typename IDatabaseTable< T_DATA >::template Row< T_DATA >&& new_row,
740 const typename IDatabaseTable< T_DATA >::IsMissing contains_missing) {
741 // check that the size of the row is the same as the rest of the database
742 if (!isRowSizeOK_(new_row.size()))
744 "the new row is of size " << new_row.size()
745 << ", which is different from the number of columns "
746 << "of the database, i.e., " << variable_names_.size());
747
748 _updateHandlers_(rows_.size() + 1);
749 rows_.push_back(std::move(new_row));
750 try {
751 has_row_missing_val_.push_back(contains_missing);
752 } catch (...) {
753 rows_.pop_back();
754 throw;
755 }
756 }
757
758 // insert a set of new DBRow at the end of the database
759 template < typename T_DATA >
760 void IDatabaseTable< T_DATA >::insertRows(
761 typename IDatabaseTable< T_DATA >::template Matrix< T_DATA >&& new_rows,
762 const typename IDatabaseTable< T_DATA >::template DBVector<
763 typename IDatabaseTable< T_DATA >::IsMissing >& rows_have_missing_vals) {
764 if (new_rows.empty()) return;
765
766 // check that the missing values indicators vector has the same size
767 // as the new rows
768 if (rows_have_missing_vals.size() != new_rows.size())
770 "the number of new rows (i.e., "
771 << new_rows.size()
772 << ") is different from the number of missing values indicators ("
773 << rows_have_missing_vals.size());
774
775 // check that all the rows have the same size
776 const std::size_t new_size = new_rows[0].size();
777
778 for (const auto& row: new_rows) {
779 if (row.size() != new_size) {
780 GUM_ERROR(SizeError, "all the new rows do not have the same number of columns")
781 }
782 }
783
784 // check that the sizes of the new rows are the same as the rest of
785 // the database
786 if (!isRowSizeOK_(new_size)) {
788 "the new rows have " << new_size
789 << " columns, which is different from the number of columns "
790 << "of the database, i.e., " << variable_names_.size());
791 }
792
793 const std::size_t nb_new_rows = new_rows.size();
794 const std::size_t new_db_size = rows_.size() + nb_new_rows;
795
796 rows_.reserve(new_db_size);
797 has_row_missing_val_.reserve(new_db_size);
798
799 for (std::size_t i = std::size_t(0); i < nb_new_rows; ++i) {
800 rows_.push_back(std::move(new_rows[i]));
801 has_row_missing_val_.push_back(rows_have_missing_vals[i]);
802 }
803
804 _updateHandlers_(new_db_size);
805 }
806
807 // insert a set of new DBRow at the end of the database
808 template < typename T_DATA >
809 void IDatabaseTable< T_DATA >::insertRows(
810 const typename IDatabaseTable< T_DATA >::template Matrix< T_DATA >& new_rows,
811 const typename IDatabaseTable< T_DATA >::template DBVector<
812 typename IDatabaseTable< T_DATA >::IsMissing >& rows_have_missing_vals) {
813 if (new_rows.empty()) return;
814
815 // check that the missing values indicators vector has the same size
816 // as the new rows
817 if (rows_have_missing_vals.size() != new_rows.size())
819 "the number of new rows (i.e., "
820 << new_rows.size()
821 << ") is different from the number of missing values indicators ("
822 << rows_have_missing_vals.size());
823
824 // check that all the rows have the same size
825 const std::size_t new_size = new_rows[0].size();
826
827 for (const auto& row: new_rows) {
828 if (row.size() != new_size) {
829 GUM_ERROR(SizeError, "all the new rows do not have the same number of columns")
830 }
831 }
832
833 // check that the sizes of the new rows are the same as the rest of
834 // the database
835 std::size_t db_size = rows_.size();
836
837 if (!isRowSizeOK_(new_size)) {
839 "the new rows have " << new_size
840 << " columns, which is different from the number of columns "
841 << "of the database, i.e., " << variable_names_.size());
842 }
843
844 const std::size_t nb_new_rows = new_rows.size();
845 const std::size_t new_db_size = rows_.size() + nb_new_rows;
846
847 rows_.reserve(new_db_size);
848 has_row_missing_val_.reserve(new_db_size);
849
850 for (std::size_t i = std::size_t(0); i < nb_new_rows; ++i) {
851 rows_.push_back(new_rows[i]);
852 has_row_missing_val_.push_back(rows_have_missing_vals[i]);
853 }
854
855 _updateHandlers_(db_size);
856 }
857
858 // erase a given row
859 template < typename T_DATA >
860 INLINE void IDatabaseTable< T_DATA >::eraseRow(std::size_t index) {
861 const std::size_t db_size = rows_.size();
862
863 if (index < db_size) {
864 _updateHandlers_(db_size - 1);
865 rows_.erase(rows_.begin() + index);
866 has_row_missing_val_.erase(has_row_missing_val_.begin() + index);
867 }
868 }
869
870 // erase the last row
871 template < typename T_DATA >
872 INLINE void IDatabaseTable< T_DATA >::eraseLastRow() {
873 const std::size_t db_size = rows_.size();
874
875 if (db_size) {
876 _updateHandlers_(db_size - 1);
877 rows_.pop_back();
878 has_row_missing_val_.pop_back();
879 }
880 }
881
882 // erase the first row
883 template < typename T_DATA >
884 INLINE void IDatabaseTable< T_DATA >::eraseFirstRow() {
885 const std::size_t db_size = rows_.size();
886
887 if (db_size) {
888 _updateHandlers_(db_size - 1);
889 rows_.erase(rows_.begin());
890 has_row_missing_val_.erase(has_row_missing_val_.begin());
891 }
892 }
893
894 // erase all the rows
895 template < typename T_DATA >
896 INLINE void IDatabaseTable< T_DATA >::eraseAllRows() {
897 _updateHandlers_(0);
898 rows_.clear();
899 has_row_missing_val_.clear();
900 }
901
902 // erase the k first rows
903 template < typename T_DATA >
904 INLINE void IDatabaseTable< T_DATA >::eraseFirstRows(const std::size_t nb_rows) {
905 const std::size_t db_size = rows_.size();
906
907 if (nb_rows >= db_size) {
908 eraseAllRows();
909 } else {
910 _updateHandlers_(db_size - nb_rows);
911 rows_.erase(rows_.begin(), rows_.begin() + nb_rows);
912 has_row_missing_val_.erase(has_row_missing_val_.begin(),
913 has_row_missing_val_.begin() + nb_rows);
914 }
915 }
916
917 // erase the k last rows
918 template < typename T_DATA >
919 INLINE void IDatabaseTable< T_DATA >::eraseLastRows(const std::size_t nb_rows) {
920 const std::size_t db_size = rows_.size();
921
922 if (nb_rows >= db_size) {
923 eraseAllRows();
924 } else {
925 _updateHandlers_(db_size - nb_rows);
926 rows_.erase(rows_.begin() + (db_size - nb_rows), rows_.begin() + db_size);
927 has_row_missing_val_.erase(has_row_missing_val_.begin() + (db_size - nb_rows),
928 has_row_missing_val_.begin() + db_size);
929 }
930 }
931
932 // erase the rows from the debth to the endth (not included)
933 template < typename T_DATA >
934 INLINE void IDatabaseTable< T_DATA >::eraseRows(std::size_t deb, std::size_t end) {
935 if (deb > end) std::swap(deb, end);
936
937 const std::size_t db_size = rows_.size();
938
939 if (end >= db_size) {
940 if (deb >= db_size) {
941 return;
942 } else {
943 eraseLastRows(db_size - deb);
944 }
945 } else {
946 _updateHandlers_(db_size - (end - deb));
947 rows_.erase(rows_.begin() + deb, rows_.begin() + end);
948 has_row_missing_val_.erase(has_row_missing_val_.begin() + deb,
949 has_row_missing_val_.begin() + end);
950 }
951 }
952
953 // erase the content of the database, including the names of the variables
954 template < typename T_DATA >
955 INLINE void IDatabaseTable< T_DATA >::clear() {
956 _updateHandlers_(0);
957 rows_.clear();
958 has_row_missing_val_.clear();
959 variable_names_.clear();
960 }
961
962 // returns the set of symbols for the missing values
963 template < typename T_DATA >
964 INLINE const std::vector< std::string >& IDatabaseTable< T_DATA >::missingSymbols() const {
965 return missing_symbols_;
966 }
967
969 template < typename T_DATA >
970 void IDatabaseTable< T_DATA >::setMaxNbThreads(const std::size_t nb) const {
971 if (nb == std::size_t(0)) max_nb_threads_ = std::size_t(1);
972 else max_nb_threads_ = nb;
973 }
974
976 template < typename T_DATA >
977 INLINE std::size_t IDatabaseTable< T_DATA >::nbThreads() const {
978 return max_nb_threads_;
979 }
980
983 template < typename T_DATA >
984 void IDatabaseTable< T_DATA >::setMinNbRowsPerThread(const std::size_t nb) const {
985 if (nb == std::size_t(0)) min_nb_rows_per_thread_ = std::size_t(1);
986 else min_nb_rows_per_thread_ = nb;
987 }
988
990 template < typename T_DATA >
991 INLINE std::size_t IDatabaseTable< T_DATA >::minNbRowsPerThread() const {
992 return min_nb_rows_per_thread_;
993 }
994
996 template < typename T_DATA >
997 std::size_t IDatabaseTable< T_DATA >::nbProcessingThreads_() const {
998 const std::size_t db_size = nbRows();
999 std::size_t nb_threads = db_size / min_nb_rows_per_thread_;
1000 if (nb_threads < 1) nb_threads = 1;
1001 else if (nb_threads > max_nb_threads_) nb_threads = max_nb_threads_;
1002
1003 return nb_threads;
1004 }
1005
1007 template < typename T_DATA >
1008 std::vector< std::pair< std::size_t, std::size_t > >
1009 IDatabaseTable< T_DATA >::rangesProcessingThreads_(const std::size_t nb_threads) const {
1010 std::vector< std::pair< std::size_t, std::size_t > > ranges;
1011 const std::size_t db_size = nbRows();
1012 std::size_t nb_rows_per_thread = db_size / nb_threads;
1013 std::size_t rest_rows = db_size - nb_rows_per_thread * nb_threads;
1014
1015 // assign to threads the ranges over which they should change the
1016 // rows weights
1017 std::size_t begin_index = std::size_t(0);
1018 for (std::size_t i = std::size_t(0); i < nb_threads; ++i) {
1019 std::size_t end_index = begin_index + nb_rows_per_thread;
1020 if (rest_rows != std::size_t(0)) {
1021 ++end_index;
1022 --rest_rows;
1023 }
1024 ranges.push_back(std::pair< std::size_t, std::size_t >(begin_index, end_index));
1025 begin_index = end_index;
1026 }
1027
1028 return ranges;
1029 }
1030
1032 template < typename T_DATA >
1033 void IDatabaseTable< T_DATA >::setAllRowsWeight(const double new_weight) {
1034 // determine the number of threads to use and the rows they should process
1035 const std::size_t nb_threads = nbProcessingThreads_();
1036 const std::vector< std::pair< std::size_t, std::size_t > > ranges
1037 = rangesProcessingThreads_(nb_threads);
1038
1039 // perform the assignment:
1040 // we create the lambda that will be executed by all the threads
1041 auto threadedAssign = [this, &ranges, new_weight](const std::size_t this_thread,
1042 const std::size_t nb_threads) -> void {
1043 const std::size_t begin_index = ranges[this_thread].first;
1044 const std::size_t end_index = ranges[this_thread].second;
1045
1046 for (std::size_t i = begin_index; i < end_index; ++i) {
1047 this->rows_[i].setWeight(new_weight);
1048 }
1049 };
1050
1051 // launch the threads
1052 ThreadExecutor::execute(nb_threads, threadedAssign);
1053 }
1054
1056 template < typename T_DATA >
1057 void IDatabaseTable< T_DATA >::setWeight(const std::size_t i, const double weight) {
1058 // check that i is less than the number of rows
1059 const std::size_t dbsize = nbRows();
1060 if (i >= dbsize) {
1062 "it is impossible to set the weight of record #"
1063 << i << " because the database contains only " << nbRows() << " records");
1064 }
1065
1066 // check that the weight is positive
1067 if (weight < 0) {
1069 "it is impossible to set " << weight << " as a weight of record #" << i
1070 << " because this weight is negative");
1071 }
1072
1073 rows_[i].setWeight(weight);
1074 }
1075
1077 template < typename T_DATA >
1078 double IDatabaseTable< T_DATA >::weight(const std::size_t i) const {
1079 // check that i is less than the number of rows
1080 const std::size_t dbsize = nbRows();
1081 if (i >= dbsize) {
1083 "it is impossible to get the weight of record #"
1084 << i << " because the database contains only " << nbRows() << " records");
1085 }
1086
1087 return rows_[i].weight();
1088 }
1089
1091 template < typename T_DATA >
1092 double IDatabaseTable< T_DATA >::weight() const {
1093 double w = 0.0;
1094 for (const auto& row: rows_)
1095 w += row.weight();
1096 return w;
1097 }
1098
1099
1100 } /* namespace learning */
1101
1102} /* namespace gum */
1103
1104#endif /* DOXYGEN_SHOULD_SKIP_THIS */
The common class for the tabular database tables.
Exception : a pointer or a reference on a nullptr (0) object.
Exception : out of bound.
Exception : problem with size.
Exception : a looked-for element could not be found.
The base class for all database handlers.
Definition DBHandler.h:140
Handler(const IDatabaseTable< T_DATA > &db)
default constructor
The common class for the tabular database tables.
#define GUM_ERROR(type, msg)
Definition exceptions.h:72
include the inlined functions if necessary
Definition CSVParser.h:54
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
STL namespace.