aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
gum::learning::DatabaseTable Class Referencefinal

The class representing a tabular database as used by learning tasks. More...

#include <agrum/base/database/databaseTable.h>

Inheritance diagram for gum::learning::DatabaseTable:
Collaboration diagram for gum::learning::DatabaseTable:

Public Types

template<typename TX_DATA>
using DBVector = std::vector< TX_DATA >
 the type for the vectors used in the DatabaseTable
template<typename TX_DATA>
using Row = DBRow< TX_DATA >
 a row of the database
template<typename TX_DATA>
using Matrix = std::vector< DBRow< TX_DATA > >
 the type for the matrices stored into the database
using MissingValType = std::vector< std::string >
using Handler = typename IDatabaseTable< DBTranslatedValue >::Handler
 the unsafe handler type
using HandlerSafe = typename IDatabaseTable< DBTranslatedValue >::HandlerSafe
 the safe handler type
using IsMissing = typename IDatabaseTable< DBTranslatedValue >::IsMissing
using value_type = Row< DBTranslatedValue >
 Types for STL compliance.
using reference = value_type&
 Types for STL compliance.
using const_reference = const value_type&
 Types for STL compliance.
using pointer = value_type*
 Types for STL compliance.
using const_pointer = const value_type*
 Types for STL compliance.
using size_type = std::size_t
 Types for STL compliance.
using difference_type = std::ptrdiff_t
 Types for STL compliance.
using iterator = Handler
 Types for STL compliance.
using iterator_safe = HandlerSafe
 Types for STL compliance.
using const_iterator
 Types for STL compliance.
using const_iterator_safe
 Types for STL compliance.

Public Member Functions

Constructors / Destructors
 DatabaseTable (const MissingValType &missing_symbols, const DBTranslatorSet &translators=DBTranslatorSet())
 default constructor
 DatabaseTable (const DBTranslatorSet &translators=DBTranslatorSet())
 default constructor
 DatabaseTable (const DatabaseTable &from)
 copy constructor
 DatabaseTable (DatabaseTable &&from) noexcept
 move constructor
DatabaseTableclone () const override
 virtual copy constructor
 ~DatabaseTable () override
 destructor
Operators
DatabaseTableoperator= (const DatabaseTable &from)
 copy operator
DatabaseTableoperator= (DatabaseTable &&from) noexcept
 move constructor
Accessors / Modifiers
std::size_t insertTranslator (const DBTranslator &translator, const std::size_t input_column, const bool unique_column=true)
 insert a new translator into the database table
std::size_t insertTranslator (const Variable &var, const std::size_t input_column, const bool unique_column=true)
 insert a new translator into the database table
std::size_t insertTranslator (const Variable &var, std::size_t input_column, const std::vector< std::string > &missing_symbols, bool unique_column=true)
 insert a new translator into the database table
void eraseTranslators (std::size_t k, bool k_is_input_col=false)
 erases either the kth translator or all those parsing the kth column of the input dataset
void changeTranslator (DBTranslator &new_translator, std::size_t k, bool k_is_input_col=false)
 change the translator of a database column
auto changeTranslator (const Variable &var, std::size_t k, bool k_is_input_col=false, const std::vector< std::string > &missing_symbols=std::vector< std::string >(), bool editable_dictionary=false, std::size_t max_dico_entries=std::numeric_limits< std::size_t >::max()) -> void
 change the translator of a database column
const DBTranslatorSettranslatorSet () const
 returns the set of translators
const DBTranslatortranslator (const std::size_t k, const bool k_is_input_col=false) const
 returns either the kth translator of the database table or the first one reading the kth column of the input database
std::vector< std::pair< Idx, std::shared_ptr< DBTranslator > > > betterTranslators () const
 propose a set with translators better suited for the content of the database
const Variablevariable (const std::size_t k, const bool k_is_input_col=false) const
 returns either the kth variable of the database table or the first one corresponding to the kth column of the input database
void setVariableNames (const std::vector< std::string > &names, const bool from_external_object=true) override
 sets the names of the variables
void ignoreColumn (const std::size_t k, const bool from_external_object=true) override
 makes the database table ignore from now on the kth column of the input dataset or the column parsed by the kth translator
const DBVector< std::size_t > ignoredColumns () const override
 returns the set of columns of the original dataset that are ignored
const DBVector< std::size_t > inputColumns () const override
 returns the set of columns of the original dataset that are present in the DatabaseTable
std::size_t domainSize (const std::size_t k, const bool k_is_input_col=false) const
 returns the domain size of the kth variable of the database table or of that of the first one corresponding to the kth column of the input database
DBVector< std::size_t > domainSizes () const
 returns the domain sizes of all the variables in the database table
bool needsReordering (const std::size_t k, const bool k_is_input_col=false) const
 indicates whether a reordering is needed to sort the translations of the kth translator or those of the first translator parsing the kth column
void reorder (const std::size_t k, const bool k_is_input_col=false)
 performs a reordering of the kth translator or of the first translator parsing the kth column of the input database
void reorder ()
 performs a reordering of all the columns
void insertRow (const std::vector< std::string > &new_row) override
 insert a new row at the end of the database
void insertRow (Row< DBTranslatedValue > &&new_row, const IsMissing contains_missing_data) override
 insert a new DBRow at the end of the database
void insertRow (const Row< DBTranslatedValue > &new_row, const IsMissing contains_missing_data) override
 insert a new row at the end of the database
void insertRow (const Row< DBCell > &new_row) override
 insert a new DBRow of DBCells at the end of the database
void insertRow (Row< DBCell > &&new_row) override
 insert a new DBRow of DBCells at the end of the database
void insertRows (Matrix< DBTranslatedValue > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals) override
 insert a set of new DBRows at the end of the database
void insertRows (const Matrix< DBTranslatedValue > &new_rows, const DBVector< IsMissing > &rows_have_missing_vals) override
 insert a set of new DBRows at the end of the database
void insertRows (Matrix< DBCell > &&new_rows) override
 insert a set of new DBRows at the end of the database
void insertRows (const Matrix< DBCell > &new_rows) override
 insert a set of new DBRows at the end of the database
void clear () override
 erase the content of the database, including the names of the variables
Iterators
iterator begin () const
 returns a new unsafe handler pointing to the beginning of the database
iterator_safe beginSafe () const
 returns a new safe handler pointing to the beginning of the database
const iteratorend () const noexcept
 returns a new unsafe handler pointing to the end of the database
const iterator_safeendSafe () const noexcept
 returns a new safe handler pointing to the end of the database
Accessors / Modifiers
const Matrix< DBTranslatedValue > & content () const noexcept
 returns the content (the records) of the database
iterator handler () const
 returns a new unsafe handler pointing to the 1st record of the database
iterator_safe handlerSafe () const
 returns a new safe handler pointing to the 1st record of the database
const DBVector< std::string > & variableNames () const noexcept
 returns the variable names for all the columns of the database
const std::string & variableName (const std::size_t k) const
 returns the name of the kth column of the IDatabaseTable
std::size_t columnFromVariableName (const std::string &name) const
 returns the index of the column whose name is passed in argument
DBVector< std::size_t > columnsFromVariableName (const std::string &name) const
 returns the indices of all the columns whose name is passed in argument
std::size_t nbVariables () const noexcept
 returns the number of variables (columns) of the database
std::size_t nbRows () const noexcept
 returns the number of records (rows) in the database
std::size_t size () const noexcept
 returns the number of records (rows) in the database
bool empty () const noexcept
 indicates whether the database contains some records or not
virtual void insertRows (Matrix< DBTranslatedValue > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
 insert a set of new DBRows at the end of the database
void eraseRow (std::size_t index)
 erase a given row specified by its index in the table
void eraseFirstRow ()
 erase the first row
void eraseLastRow ()
 erase the last row
void eraseFirstRows (const std::size_t k)
 erase the k first rows
void eraseLastRows (const std::size_t k)
 erase the k last rows
void eraseRows (std::size_t deb, std::size_t end)
 erase the rows from the debth to the endth (not included)
void eraseAllRows ()
 erase all the rows
const DBVector< std::string > & missingSymbols () const
 returns the set of missing symbols
bool hasMissingValues () const
 indicates whether the database contains some missing values
void setMaxNbThreads (const std::size_t nb) const
 changes the max number of threads that a database can use
std::size_t nbThreads () const
 returns the number of threads used to parse the database
void setMinNbRowsPerThread (const std::size_t nb) const
 changes the number min of rows a thread should process in a multithreading context
std::size_t minNbRowsPerThread () const
 returns the minimum of rows that each thread should process
void setAllRowsWeight (const double new_weight)
 assign a given weight to all the rows of the database
void setWeight (const std::size_t i, const double weight)
 assigns a given weight to the ith row of the database
double weight (const std::size_t i) const
 returns the weight of the ith record

Protected Member Functions

bool isRowSizeOK_ (const std::size_t size) const
 checks whether a size corresponds to the number of columns of the database
std::size_t nbProcessingThreads_ () const
 returns the number of threads used to process the current database content
std::vector< std::pair< std::size_t, std::size_t > > rangesProcessingThreads_ (const std::size_t nb_threads) const
 returns the ranges that threads should process

Protected Attributes

DBVector< std::string > variable_names_
 the names of the variables for each column
Matrix< DBTranslatedValuerows_
DBVector< std::string > missing_symbols_
DBVector< IsMissinghas_row_missing_val_
std::size_t max_nb_threads_
std::size_t min_nb_rows_per_thread_

Detailed Description

The class representing a tabular database as used by learning tasks.

Class DatabaseTable represents a tabular database that stores in the computer's random access memory (RAM) its content as a vector of DBRows of DBTranslatedValue instances. This class is very well suited for fast learning algorithms.

Usage example:
// create the database from a CSV. This is not compulsory for
// DatabaseTable instances, but this is how we usually create
// DatabaseTable instances
gum::learning::DBInitializerFromCSV<> initializer ( "asia.csv" );
const auto& var_names = initializer.variableNames ();
for ( std::size_t i = 0; i < var_names.size(); ++i )
translator_set.insertTranslator ( translator, i );
gum::learning::DatabaseTable<> database ( translator_set );
database.setVariableNames( initializer.variableNames () );
// here, database contains the content of the asia.csv file.
// determine how many columns and rows the database contains
std::size_t nb_rows = database.nbRows();
std::size_t nb_cols = database.nbVariables ();
// manually add a new row into the database
std::vector<std::string> row( 8, "toto" ); // asia has 8 columns
database.insertRow ( row );
dbrow ( 8, gum::learning::DBTranslatedValue { std::size_t(0) } );
database.insertRow ( dbrow );
// insert 4 rows in a single call:
database.insertRows(
( 4, dbrow ) );
// erase some rows
database.eraseRow ( 12 ); // erase the 13th row of the database
database.eraseFirstRow (); // erase the first row of the database
database.eraseLastRow (); // erase the last row of the database
database.eraseFirstRows ( 2 ); // erase the first two rows
database.eraseLastRows ( 3 ); // erase the last three rows
database.eraseRows ( 2,4 ); // erase rows indexed from 2 to 4 (excluded)
// parse the content of the database, the usual way
for ( const auto& dbrow : database )
std::cout << dbrow.row() << " weight: " << dbrow.weight() << std::endl;
// ignore some columns of the database, i.e., remove them
database.ignoreColumn ( 3 ); // remove the column X3 of the CSV file
// now, the database contains columns 0, 1, 2, 4, 5, 6, 7 of the
// CSV file. If we wish to remove Column X5 of the CSV file:
database.ignoreColumn ( 5 ); // remove the column X5 of the CSV file
// now, the database contains columns 0, 1, 2, 4, 6, 7 of the CSV file.
// if we wish to remove the 5th column of the DatabaseTable, i.e.,
// column #4 of the CSV, either we determine that this actually correspond
// to column X6 of the CSV and we use database.ignoreColumn ( 6 ) or
// we call:
database.ignoreColumn ( 4, false ); // false => 4 = the 5th column of
// the DatabaseTable, not the 5th column/variable of the CSV file
// (remember that all column numbers start from 0).
// display the columns of the CSV that were ignored and those that
// were kept:
std::vector<std::size_t> ignored_cols = database.ignoredColumns ();
std::vector<std::size_t> kept_cols = database.inputColumns ();
// parse the content of the database using handlers
typename gum::learning::DatabaseTable<>::Handler uhandler( database );
// by default, the handlers range over the whole database
// change the range of rows handled by the DBHandler
std::cout << handler.setRange ( 1, 40 ); // now parses rows [1,40)
std::cout << handler.size (); // displays 39: rows 1,...,39
std::cout << handler.DBSize (); // shows the number of rows in the database
std::cout << handler.numRow (); // displays 0: the handler currently
// points on the first row of its managed area [1,40)
// move the handler to the next row
handler.nextRow();
std::cout << handler.numRow (); // displays 1: the handler points now
// on the second row of its managed area. This corresponds to the third
// DBRow of the database since the range of handler is [1,40)
++handler; // move again to the next row
std::cout << handler.numRow (); // displays 2
handler += 4; // advances the pointer by 4 rows
std::cout << handler.numRow (); // displays 6
// get the DBRow pointed to by the handler: this is the 7th DBRow
// of the database
const auto& xrow7 = handler.row (); // get the DBRow, unsafe version
const auto& yrow7 = handler.rowSafe (); // get the DBRow, safe version
const std::vector<gum::learning::DBCell>& xrow = xrow7.row ();
const double xweight = xrow27.weight ();
// another way to access the row
const auto& zrow7 = *handler; // get the DBRow, unsafe version
// check whether there exist other rows managed by the handler after
// the current row
bool has_rows = handler.hasRows (); // true: there remains 33 rows
// makes the handler point again on the 2nd row of the database
handler.reset (); // the handler points to the beginning of its area
std::cout << handler.numRow (); // displays 0: the handler currently
// points on the first row of its managed area [1,40)
// see the variables' names, i.e., the names of the database's columns
const auto& vars = handler.variableNames();
// parse all the rows managed
handler.reset ();
for ( auto end = handler.end (); handler != end; ++handler )
std::cout << handler.row ().weight () << std::endl;
// another possibility:
for ( const auto& row : handler )
std::cout << row.weight () << std::endl;
// clear the content of the database and update the database's
// handlers
database.clear ();
The class for initializing DatabaseTable and RawDatabaseTable instances from CSV files.
The class for storing a record in a database.
Definition DBRow.h:75
const double & weight() const noexcept
returns the weight assigned to the DBRow
const std::vector< T_DATA > & row() const noexcept
returns the current row (without the weight)
The databases' cell translators for labelized variables.
the class for packing together the translators used to preprocess the datasets
std::size_t insertTranslator(const DBTranslator &translator, const std::size_t column, const bool unique_column=true)
inserts a new translator at the end of the translator set
The class representing a tabular database as used by learning tasks.
typename IDatabaseTable< DBTranslatedValue >::Handler Handler
the unsafe handler type
const DBTranslator & translator(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth translator of the database table or the first one reading the kth column of th...
typename IDatabaseTable< DBTranslatedValue >::HandlerSafe HandlerSafe
the safe handler type
const iterator & end() const noexcept
The union class for storing the translated values in learning databases.

Definition at line 200 of file databaseTable.h.

Member Typedef Documentation

◆ const_iterator

using gum::learning::IDatabaseTable< DBTranslatedValue >::const_iterator
inherited

Types for STL compliance.

Definition at line 772 of file IDatabaseTable.h.

◆ const_iterator_safe

using gum::learning::IDatabaseTable< DBTranslatedValue >::const_iterator_safe
inherited

Types for STL compliance.

Definition at line 773 of file IDatabaseTable.h.

◆ const_pointer

Types for STL compliance.

Definition at line 230 of file databaseTable.h.

◆ const_reference

Types for STL compliance.

Definition at line 228 of file databaseTable.h.

◆ DBVector

template<typename TX_DATA>
using gum::learning::DatabaseTable::DBVector = std::vector< TX_DATA >

the type for the vectors used in the DatabaseTable

Definition at line 204 of file databaseTable.h.

◆ difference_type

Types for STL compliance.

Definition at line 232 of file databaseTable.h.

◆ Handler

the unsafe handler type

Definition at line 217 of file databaseTable.h.

◆ HandlerSafe

the safe handler type

Definition at line 220 of file databaseTable.h.

◆ IsMissing

Definition at line 222 of file databaseTable.h.

◆ iterator

Types for STL compliance.

Definition at line 233 of file databaseTable.h.

◆ iterator_safe

Types for STL compliance.

Definition at line 234 of file databaseTable.h.

◆ Matrix

template<typename TX_DATA>
using gum::learning::DatabaseTable::Matrix = std::vector< DBRow< TX_DATA > >

the type for the matrices stored into the database

Definition at line 212 of file databaseTable.h.

◆ MissingValType

using gum::learning::DatabaseTable::MissingValType = std::vector< std::string >

Definition at line 214 of file databaseTable.h.

◆ pointer

Types for STL compliance.

Definition at line 229 of file databaseTable.h.

◆ reference

Types for STL compliance.

Definition at line 227 of file databaseTable.h.

◆ Row

template<typename TX_DATA>
using gum::learning::DatabaseTable::Row = DBRow< TX_DATA >

a row of the database

Definition at line 208 of file databaseTable.h.

◆ size_type

Types for STL compliance.

Definition at line 231 of file databaseTable.h.

◆ value_type

Types for STL compliance.

Definition at line 226 of file databaseTable.h.

Constructor & Destructor Documentation

◆ DatabaseTable() [1/4]

gum::learning::DatabaseTable::DatabaseTable ( const MissingValType & missing_symbols,
const DBTranslatorSet & translators = DBTranslatorSet() )
explicit

default constructor

Referenced by DatabaseTable(), DatabaseTable(), clone(), operator=(), and operator=().

Here is the caller graph for this function:

◆ DatabaseTable() [2/4]

gum::learning::DatabaseTable::DatabaseTable ( const DBTranslatorSet & translators = DBTranslatorSet())
explicit

default constructor

◆ DatabaseTable() [3/4]

gum::learning::DatabaseTable::DatabaseTable ( const DatabaseTable & from)

copy constructor

References DatabaseTable().

Here is the call graph for this function:

◆ DatabaseTable() [4/4]

gum::learning::DatabaseTable::DatabaseTable ( DatabaseTable && from)
noexcept

move constructor

References DatabaseTable().

Here is the call graph for this function:

◆ ~DatabaseTable()

gum::learning::DatabaseTable::~DatabaseTable ( )
override

destructor

Member Function Documentation

◆ begin()

returns a new unsafe handler pointing to the beginning of the database

◆ beginSafe()

returns a new safe handler pointing to the beginning of the database

◆ betterTranslators()

std::vector< std::pair< Idx, std::shared_ptr< DBTranslator > > > gum::learning::DatabaseTable::betterTranslators ( ) const

propose a set with translators better suited for the content of the database

Returns
A vector indicating for each column of the database in which a better translator than the current one can be used, this better translator.

◆ changeTranslator() [1/2]

auto gum::learning::DatabaseTable::changeTranslator ( const Variable & var,
std::size_t k,
bool k_is_input_col = false,
const std::vector< std::string > & missing_symbols = std::vector< std::string >(),
bool editable_dictionary = false,
std::size_t max_dico_entries = std::numeric_limits< std::size_t >::max() ) -> void

change the translator of a database column

When changing the translator for a column, we update the content of the database.

Parameters
new_varThe variable corresponding to the new translator
kk either the column in the DatabaseTable (if k_is_input_col = false, the default) or the first column in the DatabaseTable which corresponds to the kth column of the input CSV (if k_is_input_col = true)
k_is_input_colsee Parameter k
missing_symbolsif set, this corresponds to the set of missing symbols used by the translator, otherwise (or if it is empty, this is the set of missing symbols of the translator now translating the kth column
editable_dictionaryFor those translators that can enable/disable the update of their dictionary during the reading of databases (e.g., DBTranslator4LabelizedVariable), this indicates whether we allow or not such updates. For DBTranslator4ContinuousVariable, this corresponds to the fit_range constructor's parameter.
max_dico_entriesFor translators that store explicitly their dictionary in memory, this parameter specifies the max number of entries in this dictionary
Warning
if the translator does not exist, nothing is done. In particular, no exception is raised.

◆ changeTranslator() [2/2]

void gum::learning::DatabaseTable::changeTranslator ( DBTranslator & new_translator,
std::size_t k,
bool k_is_input_col = false )

change the translator of a database column

When changing the translator for a column, we update the content of the database.

Parameters
new_translatorthe new translator to use
keither the column in the DatabaseTable (if k_is_input_col = false, the default) or the first column in the DatabaseTable which corresponds to the kth column of the input CSV (if k_is_input_col = true)
k_is_input_colsee Parameter k
Warning
if the translator does not exist, nothing is done. In particular, no exception is raised.

◆ clear()

void gum::learning::DatabaseTable::clear ( )
overridevirtual

erase the content of the database, including the names of the variables

Reimplemented from gum::learning::IDatabaseTable< DBTranslatedValue >.

◆ clone()

DatabaseTable * gum::learning::DatabaseTable::clone ( ) const
overridevirtual

virtual copy constructor

Implements gum::learning::IDatabaseTable< DBTranslatedValue >.

References DatabaseTable().

Here is the call graph for this function:

◆ columnFromVariableName()

std::size_t gum::learning::IDatabaseTable< DBTranslatedValue >::columnFromVariableName ( const std::string & name) const
inherited

returns the index of the column whose name is passed in argument

Warning
If several columns correspond to the name, only the column with the lowest index is returned. If you wish to retrieve all the columns, use method columnsFromVariableName
Exceptions
UndefinedElementis raised if there exists no column with the given name

◆ columnsFromVariableName()

DBVector< std::size_t > gum::learning::IDatabaseTable< DBTranslatedValue >::columnsFromVariableName ( const std::string & name) const
inherited

returns the indices of all the columns whose name is passed in argument

It may happen that several columns correspond to a given variable name. In this case, the function returns the indices of all the columns of the IDatabase that match the name.

◆ content()

const Matrix< DBTranslatedValue > & gum::learning::IDatabaseTable< DBTranslatedValue >::content ( ) const
noexceptinherited

returns the content (the records) of the database

◆ domainSize()

std::size_t gum::learning::DatabaseTable::domainSize ( const std::size_t k,
const bool k_is_input_col = false ) const

returns the domain size of the kth variable of the database table or of that of the first one corresponding to the kth column of the input database

Translators read an input dataset that is not necessarily the same as the content of the DatabaseTable. For instance, a CSV may contain 10 columns, but if a DatabaseTable only contains two translators reading columns 3 and 5 respectively, then the DatabaseTable only contains 2 columns. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., the index of one of these two columns. When k_is_input_col is set to true, the variable is that of the translator that parses the kth column of the input database.

Exceptions
UndefinedElementis raised if there is no translator corresponding to k.

◆ domainSizes()

DBVector< std::size_t > gum::learning::DatabaseTable::domainSizes ( ) const

returns the domain sizes of all the variables in the database table

◆ empty()

bool gum::learning::IDatabaseTable< DBTranslatedValue >::empty ( ) const
noexceptinherited

indicates whether the database contains some records or not

◆ end()

const iterator & gum::learning::IDatabaseTable< DBTranslatedValue >::end ( ) const
noexceptinherited

returns a new unsafe handler pointing to the end of the database

◆ endSafe()

const iterator_safe & gum::learning::IDatabaseTable< DBTranslatedValue >::endSafe ( ) const
noexceptinherited

returns a new safe handler pointing to the end of the database

◆ eraseAllRows()

void gum::learning::IDatabaseTable< DBTranslatedValue >::eraseAllRows ( )
inherited

erase all the rows

◆ eraseFirstRow()

void gum::learning::IDatabaseTable< DBTranslatedValue >::eraseFirstRow ( )
inherited

erase the first row

Warning
if the row does not exist, nothing is done. In particular, no exception is raised.

◆ eraseFirstRows()

void gum::learning::IDatabaseTable< DBTranslatedValue >::eraseFirstRows ( const std::size_t k)
inherited

erase the k first rows

Warning
if there are fewer than k rows in the database, the database is completely emptied

◆ eraseLastRow()

void gum::learning::IDatabaseTable< DBTranslatedValue >::eraseLastRow ( )
inherited

erase the last row

Warning
if the row does not exist, nothing is done. In particular, no exception is raised.

◆ eraseLastRows()

void gum::learning::IDatabaseTable< DBTranslatedValue >::eraseLastRows ( const std::size_t k)
inherited

erase the k last rows

Warning
if there are fewer than k rows in the database, the database is completely emptied

◆ eraseRow()

void gum::learning::IDatabaseTable< DBTranslatedValue >::eraseRow ( std::size_t index)
inherited

erase a given row specified by its index in the table

In the database, rows are indexed, starting from 0.

Warning
If the row does not exist, nothing is done. In particular, no exception is raised.

◆ eraseRows()

void gum::learning::IDatabaseTable< DBTranslatedValue >::eraseRows ( std::size_t deb,
std::size_t end )
inherited

erase the rows from the debth to the endth (not included)

In the database, rows are indexed, starting from 0.

◆ eraseTranslators()

void gum::learning::DatabaseTable::eraseTranslators ( std::size_t k,
bool k_is_input_col = false )

erases either the kth translator or all those parsing the kth column of the input dataset

Translators read an input dataset that is not necessarily the same as the content of the DatabaseTable. For instance, a CSV may contain 10 columns, but if a DatabaseTable only contains two translators reading columns 3 and 5 respectively, then the DatabaseTable only contains 2 columns. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., to the index of one of these two output columns. When k_is_input_col is set to true, the translators to be erased are all those that parse the kth column of the input database.

Warning
if the translator does not exist, nothing is done. In particular, no exception is raised.

◆ handler()

iterator gum::learning::IDatabaseTable< DBTranslatedValue >::handler ( ) const
inherited

returns a new unsafe handler pointing to the 1st record of the database

◆ handlerSafe()

iterator_safe gum::learning::IDatabaseTable< DBTranslatedValue >::handlerSafe ( ) const
inherited

returns a new safe handler pointing to the 1st record of the database

◆ hasMissingValues()

bool gum::learning::IDatabaseTable< DBTranslatedValue >::hasMissingValues ( ) const
inherited

indicates whether the database contains some missing values

◆ ignoreColumn()

void gum::learning::DatabaseTable::ignoreColumn ( const std::size_t k,
const bool from_external_object = true )
overridevirtual

makes the database table ignore from now on the kth column of the input dataset or the column parsed by the kth translator

This method can be called in two different ways: either k refers to the current kth column of the database table (in this case parameter from_external_object is set to false), or k corresponds to the kth column of an original database used to fill the database table (in this case from_external_object is set to true). Depending on from_external_object's value, the ignored columns may differ. As an example, imagine that the database table is created from a CSV file with 5 columns named X0, X1, X2, X3 and X4 respectivly. Then a call to ignoreColumn ( 1, true ) will exclude column X1 from the database table. As a result, the database table columns are X0, X2, X3 and X4. Therefore, subsequently calling ignoreColumn ( 1, false ) will result in excluding X2 since X2 is the 2nd column (columns are indexed starting from 0). So, now the database table's columns are X0, X3 and X4. If, now, we call ignoreColumn ( 3, true ), this will remove column X3 because, in the original database, X3 was the 4th column.

The method also erases all the translators corresponding to column k, if any. If the DatabaseTable contains some rows, then their column corresponding to k is removed. If the resulting DatabaseTable contains only empty rows, then those are removed.

Parameters
kthe column to remove. See Method setVariableNames for a detailed description on how k is computed.
from_external_objectindicates whether k refers to the kth column of an original external database (true) or to the current kth column of the DatabaseTable (false).
Exceptions
UndefinedElementis raised if k refers to the position of a translator that does not exist (k >= number of translators).

Implements gum::learning::IDatabaseTable< DBTranslatedValue >.

◆ ignoredColumns()

const DBVector< std::size_t > gum::learning::DatabaseTable::ignoredColumns ( ) const
overridevirtual

returns the set of columns of the original dataset that are ignored

In this vector, all the column indices greater than or equal to its last element are also ignored.

Implements gum::learning::IDatabaseTable< DBTranslatedValue >.

◆ inputColumns()

const DBVector< std::size_t > gum::learning::DatabaseTable::inputColumns ( ) const
overridevirtual

returns the set of columns of the original dataset that are present in the DatabaseTable

Implements gum::learning::IDatabaseTable< DBTranslatedValue >.

◆ insertRow() [1/5]

void gum::learning::DatabaseTable::insertRow ( const Row< DBCell > & new_row)
override

insert a new DBRow of DBCells at the end of the database

The new_row passed in argument is supposed to come from an external database. So it must contain data for the ignored columns.

Exceptions
SizeErroris raised if the vector of string cannot be inserted in the DatabaseTable because its size does not allow a matching with the columns of the DatabaseTable (taking into account the ignored columns)

◆ insertRow() [2/5]

void gum::learning::DatabaseTable::insertRow ( const Row< DBTranslatedValue > & new_row,
const IsMissing contains_missing_data )
override

insert a new row at the end of the database

Unlike methods insertRow for data whose type is different from DBTranslatedValue, this method assumes that the new row passed in argument does not contain any data of the ignored columns. So, basically, it could be copied as is into the database table.

Exceptions
SizeErroris raised if the size of the new_row is not equal to the number of translators of the DatabaseTable @InvalidArgument is raised if at least one element of new_row does not belong to the domain of its corresponding translator.

◆ insertRow() [3/5]

void gum::learning::DatabaseTable::insertRow ( const std::vector< std::string > & new_row)
overridevirtual

insert a new row at the end of the database

The new_row passed in argument is supposed to come from an external database. So it must contain data for the ignored columns.

Exceptions
SizeErroris raised if the vector of string cannot be inserted in the DatabaseTable because its size does not allow a matching with the columns of the DatabaseTable (taking into account the ignored columns)
UnknownLabelInDatabaseis raised if the translation of an element in the new row cannot be found and the corresponding translator is not in an editable dictionary mode.
SizeErroris raised if the number of entries in the dictionary of a translator has already reached its maximum.
OperationNotAllowedexception is raised if the translation of an element in new_row cannot be found and the insertion of the string into the corresponding translator's dictionary fails because it would induce incoherent behavior (e.g., a DBTranslator4ContinuousVariable that contains a variable whose domain is [x,y] as well as a missing value symbol z \(\in\) [x,y]).
TypeErroris raised if the translation of an element in new_row cannot be found and the insertion of the string into the translator's dictionary fails due to str being impossible to be converted into an appropriate type.

Implements gum::learning::IDatabaseTable< DBTranslatedValue >.

Referenced by reorder(), and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::toDatabaseTable().

Here is the caller graph for this function:

◆ insertRow() [4/5]

void gum::learning::DatabaseTable::insertRow ( Row< DBCell > && new_row)
override

insert a new DBRow of DBCells at the end of the database

The new_row passed in argument is supposed to come from an external database. So it must contain data for the ignored columns.

Exceptions
SizeErroris raised if the vector of string cannot be inserted in the DatabaseTable because its size does not allow a matching with the columns of the DatabaseTable (taking into account the ignored columns)

◆ insertRow() [5/5]

void gum::learning::DatabaseTable::insertRow ( Row< DBTranslatedValue > && new_row,
const IsMissing contains_missing_data )
override

insert a new DBRow at the end of the database

Unlike methods insertRow for data whose type is different from DBTranslatedValue, this method assumes that the new row passed in argument does not contain any data of the ignored columns. So, basically, it could be copied as is into the database table.

Exceptions
SizeErroris raised if the size of the new_row is not equal to the number of translators of the DatabaseTable @InvalidArgument is raised if at least one element of new_row does not belong to the domain of its corresponding translator.

◆ insertRows() [1/5]

void gum::learning::DatabaseTable::insertRows ( const Matrix< DBCell > & new_rows)
override

insert a set of new DBRows at the end of the database

The new rows passed in argument are supposed to come from an external database. So they must contain data for the ignored columns.

Exceptions
SizeErroris raised if the vector of string cannot be inserted in the DatabaseTable because its size does not allow a matching with the columns of the DatabaseTable (taking into account the ignored columns)

◆ insertRows() [2/5]

void gum::learning::DatabaseTable::insertRows ( const Matrix< DBTranslatedValue > & new_rows,
const DBVector< IsMissing > & rows_have_missing_vals )
override

insert a set of new DBRows at the end of the database

Unlike methods insertRows for data whose type is different from DBTranslatedValue, this method assumes that the new rows passed in argument do not contain any data of the ignored columns. So, basically, these rows could be copied as is into the database table.

Exceptions
SizeErroris raised if the size of at least one row in new_rows is not equal to the number of translators in the DatabaseTable @InvalidArgument is raised if at least one element of new_row does not belong to the domain of its corresponding translator.

◆ insertRows() [3/5]

void gum::learning::DatabaseTable::insertRows ( Matrix< DBCell > && new_rows)
override

insert a set of new DBRows at the end of the database

The new rows passed in argument are supposed to come from an external database. So they must contain data for the ignored columns.

Exceptions
SizeErroris raised if the vector of string cannot be inserted in the DatabaseTable because its size does not allow a matching with the columns of the DatabaseTable (taking into account the ignored columns)

◆ insertRows() [4/5]

void gum::learning::DatabaseTable::insertRows ( Matrix< DBTranslatedValue > && new_rows,
const DBVector< IsMissing > & rows_have_missing_vals )
override

insert a set of new DBRows at the end of the database

Unlike methods insertRows for data whose type is different from DBTranslatedValue, this method assumes that the new rows passed in argument do not contain any data of the ignored columns. So, basically, these rows could be copied as is into the database table.

Exceptions
SizeErroris raised if the size of at least one row in new_rows is not equal to the number of translators in the DatabaseTable @InvalidArgument is raised if at least one element of new_row does not belong to the domain of its corresponding translator.

◆ insertRows() [5/5]

virtual void gum::learning::IDatabaseTable< DBTranslatedValue >::insertRows ( Matrix< DBTranslatedValue > && new_rows,
const DBVector< IsMissing > & rows_have_missing_vals )
virtualinherited

insert a set of new DBRows at the end of the database

Unlike methods insertRows for data whose type is different from T_DATA, this method assumes that the new rows passed in argument do not contain any data of the ignored columns. So, basically, these rows could be copied as is into the database table.

Parameters
new_rowsthe new set of rows to be copied as is
rows_have_missing_valsa vector of the same size as new_rows that indicates, for each new row, whether it contains some missing value or not
Exceptions
SizeErroris raised if the size of at least one row in new_rows is not equal to the number of columns retained in the IDatabaseTable. A SizeError exception will also be raised if the number of new rows is not identical to the size of vector rows_have_missing_vals.

◆ insertTranslator() [1/3]

std::size_t gum::learning::DatabaseTable::insertTranslator ( const DBTranslator & translator,
const std::size_t input_column,
const bool unique_column = true )

insert a new translator into the database table

Parameters
translatorThis translator is copied into the DatabaseTable
input_columnindicates which column in the original dataset (usually a CSV file) the translator will read
unique_columnindicates whether the input column can be read by several translators.
Returns
the index of the translator within the set of translators
Exceptions
OperationNotAllowedif the input column is marked as ignored
DuplicateElementif there already exists a translator reading the input column passed in argument, and if the unique_column is set to true
Warning
if the database is not empty, i.e., it contains some records, all the column of the database corresponding to the new translator is filled with missing values.

References translator().

Referenced by gum::learning::BNDatabaseGenerator< GUM_SCALAR >::toDatabaseTable().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ insertTranslator() [2/3]

std::size_t gum::learning::DatabaseTable::insertTranslator ( const Variable & var,
const std::size_t input_column,
const bool unique_column = true )

insert a new translator into the database table

Parameters
varthe variable that will be contained into the translator
input_columnindicates which column in the original dataset (usually a CSV file) the translator will read
unique_columnindicates whether the input column can be read by several translators
missing_symbolsthe set of symbols in the database representing missing values
Returns
the index of the translator within the set of translators
Exceptions
OperationNotAllowedif the input column is marked as ignored
DuplicateElementif there already exists a translator reading the input column passed in argument, and if the unique_column is set to true
ifthe database is not empty, i.e., it contains some records, all the columns of the database corresponding to the new translator should be filled with missing values, which is impossible since we do not know which symbols correspond to missing values. Therefore, we raise a MissingValueInDatabase exception. If you do not want such a behavior, use method insertTranslator in which you specify the set of missing symbols.

◆ insertTranslator() [3/3]

std::size_t gum::learning::DatabaseTable::insertTranslator ( const Variable & var,
std::size_t input_column,
const std::vector< std::string > & missing_symbols,
bool unique_column = true )

insert a new translator into the database table

Parameters
varthe variable that will be contained into the translator
input_columnindicates which column in the original dataset (usually a CSV file) the translator will read
unique_columnindicates whether the input column can be read by several translators
missing_symbolsthe set of symbols in the database representing missing values
Returns
the index of the translator within the set of translators
Exceptions
OperationNotAllowedif the input column is marked as ignored
DuplicateElementif there already exists a translator reading the input column passed in argument, and if the unique_column is set to true
Warning
if the database is not empty, i.e., it contains some records, all the column of the database corresponding to the new translator is filled with missing values.

◆ isRowSizeOK_()

bool gum::learning::IDatabaseTable< DBTranslatedValue >::isRowSizeOK_ ( const std::size_t size) const
protectedinherited

checks whether a size corresponds to the number of columns of the database

◆ minNbRowsPerThread()

std::size_t gum::learning::IDatabaseTable< DBTranslatedValue >::minNbRowsPerThread ( ) const
inherited

returns the minimum of rows that each thread should process

◆ missingSymbols()

const DBVector< std::string > & gum::learning::IDatabaseTable< DBTranslatedValue >::missingSymbols ( ) const
inherited

returns the set of missing symbols

◆ nbProcessingThreads_()

std::size_t gum::learning::IDatabaseTable< DBTranslatedValue >::nbProcessingThreads_ ( ) const
protectedinherited

returns the number of threads used to process the current database content

◆ nbRows()

std::size_t gum::learning::IDatabaseTable< DBTranslatedValue >::nbRows ( ) const
noexceptinherited

returns the number of records (rows) in the database

◆ nbThreads()

std::size_t gum::learning::IDatabaseTable< DBTranslatedValue >::nbThreads ( ) const
inherited

returns the number of threads used to parse the database

◆ nbVariables()

std::size_t gum::learning::IDatabaseTable< DBTranslatedValue >::nbVariables ( ) const
noexceptinherited

returns the number of variables (columns) of the database

◆ needsReordering()

bool gum::learning::DatabaseTable::needsReordering ( const std::size_t k,
const bool k_is_input_col = false ) const

indicates whether a reordering is needed to sort the translations of the kth translator or those of the first translator parsing the kth column

For a given translator, if the strings represented by the translations are only numbers, the translations are considered to be sorted if and only if they are sorted by increasing number. If the strings do not only represent numbers, then translations are considered to be sorted if and only if they are sorted lexicographically.

When constructing dynamically its dictionary, the translator may assign wrong DBTranslatedValue values to strings. For instance, a translator reading sequentially integer strings 4, 1, 3, may map 4 into DBTranslatedValue{std::size_t(0)}, 1 into DBTranslatedValue{std::size_t(1)} and 3 into DBTranslatedValue{std::size_t(2)}, resulting in random variables having domain {4,1,3}. The user may prefer having domain {1,3,4}, i.e., a domain specified with increasing values. This requires a reordering. Method needsReodering() returns a Boolean indicating whether such a reordering should be performed or whether the current order is OK.

Translators read an input dataset that is not necessarily the same as the content of the DatabaseTable. For instance, a CSV may contain 10 columns, but if a DatabaseTable only contains two translators reading columns 3 and 5 respectively, then the DatabaseTable only contains 2 columns. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., the index of one of these two columns. When k_is_input_col is set to true, the translator to be reordered is that which parses the kth column of the input database.

Exceptions
UndefinedElementis raised if there is no translator corresponding to k.

◆ operator=() [1/2]

DatabaseTable & gum::learning::DatabaseTable::operator= ( const DatabaseTable & from)

copy operator

References DatabaseTable().

Here is the call graph for this function:

◆ operator=() [2/2]

DatabaseTable & gum::learning::DatabaseTable::operator= ( DatabaseTable && from)
noexcept

move constructor

References DatabaseTable().

Here is the call graph for this function:

◆ rangesProcessingThreads_()

std::vector< std::pair< std::size_t, std::size_t > > gum::learning::IDatabaseTable< DBTranslatedValue >::rangesProcessingThreads_ ( const std::size_t nb_threads) const
protectedinherited

returns the ranges that threads should process

◆ reorder() [1/2]

void gum::learning::DatabaseTable::reorder ( )

performs a reordering of all the columns

For a given translator, if the strings represented by the translations are only numbers, the translations are considered to be sorted if and only if they are sorted by increasing number. If the strings do not only represent numbers, then translations are considered to be sorted if and only if they are sorted lexicographically.

References gum::learning::IDatabaseTable< DBTranslatedValue >::IDatabaseTable(), and insertRow().

Here is the call graph for this function:

◆ reorder() [2/2]

void gum::learning::DatabaseTable::reorder ( const std::size_t k,
const bool k_is_input_col = false )

performs a reordering of the kth translator or of the first translator parsing the kth column of the input database

For a given translator, if the strings represented by the translations are only numbers, the translations are considered to be sorted if and only if they are sorted by increasing number. If the strings do not only represent numbers, then translations are considered to be sorted if and only if they are sorted lexicographically.

Translators read an input dataset that is not necessarily the same as the content of the DatabaseTable. For instance, a CSV may contain 10 columns, but if a DatabaseTable only contains two translators reading columns 3 and 5 respectively, then the DatabaseTable only contains 2 columns. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., the index of one of these two columns. When k_is_input_col is set to true, the translator to be reordered is that which parses the kth column of the input database.

Exceptions
UndefinedElementis raised if there is no translator corresponding to k.

◆ setAllRowsWeight()

void gum::learning::IDatabaseTable< DBTranslatedValue >::setAllRowsWeight ( const double new_weight)
inherited

assign a given weight to all the rows of the database

◆ setMaxNbThreads()

void gum::learning::IDatabaseTable< DBTranslatedValue >::setMaxNbThreads ( const std::size_t nb) const
inherited

changes the max number of threads that a database can use

Within databases, some methods can be processed in a parallel fashion. This methods indicates the maximum number of threads that can be run in parallel at the same time.

◆ setMinNbRowsPerThread()

void gum::learning::IDatabaseTable< DBTranslatedValue >::setMinNbRowsPerThread ( const std::size_t nb) const
inherited

changes the number min of rows a thread should process in a multithreading context

When a method executes several threads to perform actions on the rows of the database, the MinNbRowsPerThread indicates how many rows each thread should at least process. This is used to compute the number of threads actually run. This number is equal to the min between the max number of threads allowed and the number of records in the database divided by nb.

◆ setVariableNames()

void gum::learning::DatabaseTable::setVariableNames ( const std::vector< std::string > & names,
const bool from_external_object = true )
overridevirtual

sets the names of the variables

This method can be called in two different ways: either the names correspond precisely to the columns stored into the database table (in this case, parameter from_external_object is equal to false), or they correspond to the columns of an external database (e.g., a CSV file) from which we tensorly excluded some columns and, consequently, the latter should not be taken into account (in this case, parameter from_external_object is equal to true). As an example, imagine that the database table is created from a CSV file with 5 columns named X0, X1, X2, X3 and X4 respectively. Suppose that we asked the database table to ignore columns X1 and X3. Then setVariableNames( { "X0", "X1", "X2", "X3", "X4" }, true ) will set the columns of the database table as { "X0", "X2", "X4" }. The same result could be obtained by executing setVariableNames( { "X0", "X2", "X4" }, false ), which specifies directly the set of names to retain in the database table.

Parameters
namesthe names of all the columns, including the ignored columns if from_external_object is set to true, else excluding them (i.e., this should precisely correspond to the columns stored into the database table).
from_external_objecta Boolean indicating whether parameter names includes the columns ignored by the database table (true) or not (false).
Exceptions
SizeErroris raised if the names passed in arguments cannot be assigned to the columns of the DatabaseTable because the size of their vector is inadequate.

Implements gum::learning::IDatabaseTable< DBTranslatedValue >.

Referenced by gum::learning::readFile(), and variable().

Here is the caller graph for this function:

◆ setWeight()

void gum::learning::IDatabaseTable< DBTranslatedValue >::setWeight ( const std::size_t i,
const double weight )
inherited

assigns a given weight to the ith row of the database

Exceptions
OutOfBoundsif i is outside the set of indices of the records or if the weight is negative

◆ size()

std::size_t gum::learning::IDatabaseTable< DBTranslatedValue >::size ( ) const
noexceptinherited

returns the number of records (rows) in the database

◆ translator()

const DBTranslator & gum::learning::DatabaseTable::translator ( const std::size_t k,
const bool k_is_input_col = false ) const

returns either the kth translator of the database table or the first one reading the kth column of the input database

Translators read an input dataset that is not necessarily the same as the content of the DatabaseTable. For instance, a CSV may contain 10 columns, but if a DatabaseTable only contains two translators reading columns 3 and 5 respectively, then the DatabaseTable only contains 2 columns. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., the index of one of these two columns. When k_is_input_col is set to true, the translator returned is the first one that parses the kth column of the input database.

Exceptions
UndefinedElementis raised if there is no translator corresponding to k.

Referenced by insertTranslator(), and gum::learning::BNDatabaseGenerator< GUM_SCALAR >::toDatabaseTable().

Here is the caller graph for this function:

◆ translatorSet()

const DBTranslatorSet & gum::learning::DatabaseTable::translatorSet ( ) const

returns the set of translators

◆ variable()

const Variable & gum::learning::DatabaseTable::variable ( const std::size_t k,
const bool k_is_input_col = false ) const

returns either the kth variable of the database table or the first one corresponding to the kth column of the input database

Translators read an input dataset that is not necessarily the same as the content of the DatabaseTable. For instance, a CSV may contain 10 columns, but if a DatabaseTable only contains two translators reading columns 3 and 5 respectively, then the DatabaseTable only contains 2 columns. When k_is_input_col is set to false, Parameter k passed in argument corresponds to either 0 or 1, i.e., the index of one of these two columns. When k_is_input_col is set to true, the variable is that of the translator that parses the kth column of the input database.

Exceptions
UndefinedElementis raised if there is no translator corresponding to k.

References gum::learning::IDatabaseTable< DBTranslatedValue >::IDatabaseTable(), and setVariableNames().

Referenced by gum::learning::IBNLearner::Database::Database().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ variableName()

const std::string & gum::learning::IDatabaseTable< DBTranslatedValue >::variableName ( const std::size_t k) const
inherited

returns the name of the kth column of the IDatabaseTable

Exceptions
OutOfBoundsis raised if the IDatabaseTable contains fewer than k columns.

◆ variableNames()

const DBVector< std::string > & gum::learning::IDatabaseTable< DBTranslatedValue >::variableNames ( ) const
noexceptinherited

returns the variable names for all the columns of the database

The names do not include the ignored columns.

◆ weight()

double gum::learning::IDatabaseTable< DBTranslatedValue >::weight ( const std::size_t i) const
inherited

returns the weight of the ith record

Exceptions
OutOfBoundsif i is outside the set of indices of the records

Member Data Documentation

◆ has_row_missing_val_

DBVector< IsMissing > gum::learning::IDatabaseTable< DBTranslatedValue >::has_row_missing_val_
protectedinherited

Definition at line 1099 of file IDatabaseTable.h.

◆ max_nb_threads_

std::size_t gum::learning::IDatabaseTable< DBTranslatedValue >::max_nb_threads_
mutableprotectedinherited

Definition at line 1102 of file IDatabaseTable.h.

The common class for the tabular database tables.

◆ min_nb_rows_per_thread_

std::size_t gum::learning::IDatabaseTable< DBTranslatedValue >::min_nb_rows_per_thread_
mutableprotectedinherited

Definition at line 1106 of file IDatabaseTable.h.

1106{100};

◆ missing_symbols_

DBVector< std::string > gum::learning::IDatabaseTable< DBTranslatedValue >::missing_symbols_
protectedinherited

Definition at line 1096 of file IDatabaseTable.h.

◆ rows_

Definition at line 1093 of file IDatabaseTable.h.

◆ variable_names_

DBVector< std::string > gum::learning::IDatabaseTable< DBTranslatedValue >::variable_names_
protectedinherited

the names of the variables for each column

Definition at line 1090 of file IDatabaseTable.h.


The documentation for this class was generated from the following file: