aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
databaseTable.h
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
51#ifndef GUM_DATABASE_TABLE_H
52#define GUM_DATABASE_TABLE_H
53
54#include <algorithm>
55#include <exception>
56#include <functional>
57#include <memory>
58#include <numeric>
59#include <vector>
60
61#include <agrum/agrum.h>
62
65
67
68namespace gum::learning {
69
200 class DatabaseTable final: public IDatabaseTable< DBTranslatedValue > {
201 public:
203 template < typename TX_DATA >
204 using DBVector = std::vector< TX_DATA >;
205
207 template < typename TX_DATA >
209
211 template < typename TX_DATA >
212 using Matrix = std::vector< DBRow< TX_DATA > >;
213
214 using MissingValType = std::vector< std::string >;
215
218
221
223
230 using const_pointer = const value_type*;
231 using size_type = std::size_t;
232 using difference_type = std::ptrdiff_t;
236
237
238 // ##########################################################################
240 // ##########################################################################
242
244 explicit DatabaseTable(const MissingValType& missing_symbols,
245 const DBTranslatorSet& translators = DBTranslatorSet());
246
248 explicit DatabaseTable(const DBTranslatorSet& translators = DBTranslatorSet());
249
252
254 DatabaseTable(DatabaseTable&& from) noexcept;
255
257 DatabaseTable* clone() const override;
258
260 ~DatabaseTable() override;
261
263
264 // ##########################################################################
266 // ##########################################################################
268
271
274
276
277
278 // ##########################################################################
280 // ##########################################################################
282
284
299 const std::size_t input_column,
300 const bool unique_column = true);
301
303
323 std::size_t insertTranslator(const Variable& var,
324 const std::size_t input_column,
325 const bool unique_column = true);
326
328
344 std::size_t insertTranslator(const Variable& var,
345 std::size_t input_column,
346 const std::vector< std::string >& missing_symbols,
347 bool unique_column = true);
348
363 void eraseTranslators(std::size_t k, bool k_is_input_col = false);
364
366
376 void changeTranslator(DBTranslator& new_translator, std::size_t k, bool k_is_input_col = false);
377
379
400 auto changeTranslator(const Variable& var,
401 std::size_t k,
402 bool k_is_input_col = false,
403 const std::vector< std::string >& missing_symbols
404 = std::vector< std::string >(),
405 bool editable_dictionary = false,
406 std::size_t max_dico_entries = std::numeric_limits< std::size_t >::max())
407 -> void;
408
411
425 const DBTranslator& translator(const std::size_t k, const bool k_is_input_col = false) const;
426
428
433 std::vector< std::pair< Idx, std::shared_ptr< DBTranslator > > > betterTranslators() const;
434
448 const Variable& variable(const std::size_t k, const bool k_is_input_col = false) const;
449
452
454
479 void setVariableNames(const std::vector< std::string >& names,
480 const bool from_external_object = true) override;
481
514 void ignoreColumn(const std::size_t k, const bool from_external_object = true) override;
515
517
520
523 const DBVector< std::size_t > inputColumns() const override;
524
539 std::size_t domainSize(const std::size_t k, const bool k_is_input_col = false) const;
540
543
576 bool needsReordering(const std::size_t k, const bool k_is_input_col = false) const;
577
597 void reorder(const std::size_t k, const bool k_is_input_col = false);
598
600
605 void reorder();
606
609
611
633 void insertRow(const std::vector< std::string >& new_row) override;
634
636
646 const IsMissing contains_missing_data) override;
647
649
659 const IsMissing contains_missing_data) override;
660
662
668 void insertRow(const Row< DBCell >& new_row) override;
669
671
677 void insertRow(Row< DBCell >&& new_row) override;
678
680
690 const DBVector< IsMissing >& rows_have_missing_vals) override;
691
693
702 const DBVector< IsMissing >& rows_have_missing_vals) override;
703
705
711 void insertRows(Matrix< DBCell >&& new_rows) override;
712
714
720 void insertRows(const Matrix< DBCell >& new_rows) override;
721
723 void clear() override;
724
725 // substitutes the kth translator by another one
726 /* The method checks that:
727 * 1/ it is possible to get back the original values of the database
728 * for the rows already translated.
729 * 2/ that the new translator is capable of translating these values.
730 *
731 * If both checks passed, then it replaces the kth translator
732 * by the one passed in arguments and retranslates with it the kth
733 * cell of all the rows already contained in the database */
734
736
737
738#ifndef DOXYGEN_SHOULD_SKIP_THIS
739
740 private:
742 DBTranslatorSet _translators_;
743
745 Set< std::size_t > _ignored_cols_;
746
749 bool _isRowCompatible_(const Row< DBTranslatedValue >& row) const;
750
756 std::size_t _getKthIndex_(const std::size_t k, const bool k_is_input_col) const;
757
762 DBVector< std::size_t > _getKthIndices_(const std::size_t k, const bool k_is_input_col) const;
763
765
785 template < typename Functor1, typename Functor2 >
786 void _threadProcessDatabase_(Functor1& exec_func, Functor2& undo_func);
787
788#endif /* DOXYGEN_SHOULD_SKIP_THIS */
789 };
790
791} // namespace gum::learning
792
795
797#ifndef GUM_NO_INLINE
799#endif /* GUM_NO_INLINE */
800
801
802#endif /* GUM_DATABASE_TABLE_H */
A class for storing several translators.
The common class for the tabular database tables.
Base class for every random variable.
Definition variable.h:79
the class for packing together the translators used to preprocess the datasets
The base class for all the tabular database cell translators.
bool needsReordering(const std::size_t k, const bool k_is_input_col=false) const
indicates whether a reordering is needed to sort the translations of the kth translator or those of t...
void changeTranslator(DBTranslator &new_translator, std::size_t k, bool k_is_input_col=false)
change the translator of a database column
void insertRow(Row< DBTranslatedValue > &&new_row, const IsMissing contains_missing_data) override
insert a new DBRow at the end of the database
DBVector< std::size_t > domainSizes() const
returns the domain sizes of all the variables in the database table
DatabaseTable(const MissingValType &missing_symbols, const DBTranslatorSet &translators=DBTranslatorSet())
default constructor
typename IDatabaseTable< DBTranslatedValue >::Handler Handler
the unsafe handler type
typename IDatabaseTable< DBTranslatedValue >::IsMissing IsMissing
void setVariableNames(const std::vector< std::string > &names, const bool from_external_object=true) override
sets the names of the variables
void reorder()
performs a reordering of all the columns
std::size_t size_type
Types for STL compliance.
const value_type * const_pointer
Types for STL compliance.
DatabaseTable(const DatabaseTable &from)
copy constructor
std::ptrdiff_t difference_type
Types for STL compliance.
Handler iterator
Types for STL compliance.
const value_type & const_reference
Types for STL compliance.
void insertRow(const Row< DBCell > &new_row) override
insert a new DBRow of DBCells at the end of the database
void insertRows(const Matrix< DBCell > &new_rows) override
insert a set of new DBRows at the end of the database
DatabaseTable(DatabaseTable &&from) noexcept
move constructor
DatabaseTable & operator=(const DatabaseTable &from)
copy operator
DatabaseTable(const DBTranslatorSet &translators=DBTranslatorSet())
default constructor
std::size_t insertTranslator(const DBTranslator &translator, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
void ignoreColumn(const std::size_t k, const bool from_external_object=true) override
makes the database table ignore from now on the kth column of the input dataset or the column parsed ...
auto changeTranslator(const Variable &var, std::size_t k, bool k_is_input_col=false, const std::vector< std::string > &missing_symbols=std::vector< std::string >(), bool editable_dictionary=false, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max()) -> void
change the translator of a database column
~DatabaseTable() override
destructor
void eraseTranslators(std::size_t k, bool k_is_input_col=false)
erases either the kth translator or all those parsing the kth column of the input dataset
HandlerSafe iterator_safe
Types for STL compliance.
const DBVector< std::size_t > inputColumns() const override
returns the set of columns of the original dataset that are present in the DatabaseTable
std::vector< TX_DATA > DBVector
the type for the vectors used in the DatabaseTable
const DBVector< std::size_t > ignoredColumns() const override
returns the set of columns of the original dataset that are ignored
void insertRow(const Row< DBTranslatedValue > &new_row, const IsMissing contains_missing_data) override
insert a new row at the end of the database
value_type * pointer
Types for STL compliance.
DBRow< TX_DATA > Row
a row of the database
void insertRows(const Matrix< DBTranslatedValue > &new_rows, const DBVector< IsMissing > &rows_have_missing_vals) override
insert a set of new DBRows at the end of the database
std::vector< DBRow< TX_DATA > > Matrix
the type for the matrices stored into the database
void insertRow(Row< DBCell > &&new_row) override
insert a new DBRow of DBCells at the end of the database
void insertRows(Matrix< DBCell > &&new_rows) override
insert a set of new DBRows at the end of the database
void insertRows(Matrix< DBTranslatedValue > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals) override
insert a set of new DBRows at the end of the database
void reorder(const std::size_t k, const bool k_is_input_col=false)
performs a reordering of the kth translator or of the first translator parsing the kth column of the ...
const DBTranslator & translator(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth translator of the database table or the first one reading the kth column of th...
DatabaseTable & operator=(DatabaseTable &&from) noexcept
move constructor
std::size_t domainSize(const std::size_t k, const bool k_is_input_col=false) const
returns the domain size of the kth variable of the database table or of that of the first one corresp...
DatabaseTable * clone() const override
virtual copy constructor
const Variable & variable(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth variable of the database table or the first one corresponding to the kth colum...
Row< DBTranslatedValue > value_type
Types for STL compliance.
typename IDatabaseTable< DBTranslatedValue >::HandlerSafe HandlerSafe
the safe handler type
std::size_t insertTranslator(const Variable &var, std::size_t input_column, const std::vector< std::string > &missing_symbols, bool unique_column=true)
insert a new translator into the database table
std::size_t insertTranslator(const Variable &var, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
void clear() override
erase the content of the database, including the names of the variables
value_type & reference
Types for STL compliance.
std::vector< std::string > MissingValType
void insertRow(const std::vector< std::string > &new_row) override
insert a new row at the end of the database
std::vector< std::pair< Idx, std::shared_ptr< DBTranslator > > > betterTranslators() const
propose a set with translators better suited for the content of the database
const DBTranslatorSet & translatorSet() const
returns the set of translators
the safe handler of the tabular databases
the (unsafe) handler for the tabular databases
IDatabaseTable(const MissingValType &missing_symbols, const std::vector< std::string > &var_names)
The implementation of tabular databases stored in memory (RAM).
Useful macros for maths.
include the inlined functions if necessary
Definition CSVParser.h:54
The union class for storing the translated values in learning databases.