aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
gum::learning::IDatabaseTable< T_DATA > Class Template Referenceabstract

The common class for the tabular database tables. More...

#include <agrum/BN/learning/IDatabaseTable.h>

Inheritance diagram for gum::learning::IDatabaseTable< T_DATA >:
Collaboration diagram for gum::learning::IDatabaseTable< T_DATA >:

Classes

class  Handler
 the (unsafe) handler for the tabular databases More...
class  HandlerSafe
 the safe handler of the tabular databases More...

Public Types

enum  IsMissing : char { False , True }
template<typename TX_DATA>
using DBVector = std::vector< TX_DATA >
 the type for the vectors used in the IDatabaseTable
template<typename TX_DATA>
using Row = DBRow< TX_DATA >
 a row of the database
template<typename TX_DATA>
using Matrix = std::vector< DBRow< TX_DATA > >
 the type for the matrices stored into the database
using MissingValType = std::vector< std::string >
using value_type = Row< T_DATA >
 Types for STL compliance.
using reference = value_type&
 Types for STL compliance.
using const_reference = const value_type&
 Types for STL compliance.
using pointer = value_type*
 Types for STL compliance.
using const_pointer = const value_type*
 Types for STL compliance.
using size_type = std::size_t
 Types for STL compliance.
using difference_type = std::ptrdiff_t
 Types for STL compliance.
using iterator = Handler
 Types for STL compliance.
using iterator_safe = HandlerSafe
 Types for STL compliance.
using const_iterator = const Handler
 Types for STL compliance.
using const_iterator_safe = const HandlerSafe
 Types for STL compliance.

Public Member Functions

Constructors / Destructors
 IDatabaseTable (const MissingValType &missing_symbols, const std::vector< std::string > &var_names)
 default constructor
 IDatabaseTable (const IDatabaseTable< T_DATA > &from)
 copy constructor
 IDatabaseTable (IDatabaseTable< T_DATA > &&from)
 move constructor
virtual IDatabaseTable< T_DATA > * clone () const =0
 virtual copy constructor
virtual ~IDatabaseTable ()
 destructor
Iterators
iterator begin () const
 returns a new unsafe handler pointing to the beginning of the database
iterator_safe beginSafe () const
 returns a new safe handler pointing to the beginning of the database
const iteratorend () const noexcept
 returns a new unsafe handler pointing to the end of the database
const iterator_safeendSafe () const noexcept
 returns a new safe handler pointing to the end of the database
Accessors / Modifiers
const Matrix< T_DATA > & content () const noexcept
 returns the content (the records) of the database
iterator handler () const
 returns a new unsafe handler pointing to the 1st record of the database
iterator_safe handlerSafe () const
 returns a new safe handler pointing to the 1st record of the database
const DBVector< std::string > & variableNames () const noexcept
 returns the variable names for all the columns of the database
virtual void setVariableNames (const std::vector< std::string > &names, const bool from_external_object=true)=0
 sets the names of the variables
const std::string & variableName (const std::size_t k) const
 returns the name of the kth column of the IDatabaseTable
std::size_t columnFromVariableName (const std::string &name) const
 returns the index of the column whose name is passed in argument
DBVector< std::size_t > columnsFromVariableName (const std::string &name) const
 returns the indices of all the columns whose name is passed in argument
std::size_t nbVariables () const noexcept
 returns the number of variables (columns) of the database
std::size_t nbRows () const noexcept
 returns the number of records (rows) in the database
std::size_t size () const noexcept
 returns the number of records (rows) in the database
bool empty () const noexcept
 indicates whether the database contains some records or not
virtual void ignoreColumn (const std::size_t k, const bool from_external_object=true)=0
 makes the database table ignore from now on the kth column
virtual const DBVector< std::size_t > ignoredColumns () const =0
 returns the set of columns of the original dataset that are ignored
virtual const DBVector< std::size_t > inputColumns () const =0
 returns the set of columns of the original dataset that are present in the IDatabaseTable
virtual void insertRow (const std::vector< std::string > &new_row)=0
 insert a new row at the end of the database
virtual void insertRow (Row< T_DATA > &&new_row, const IsMissing contains_missing_data)
 insert a new DBRow at the end of the database
virtual void insertRow (const Row< T_DATA > &new_row, const IsMissing contains_missing_data)
 insert a new row at the end of the database
virtual void insertRows (Matrix< T_DATA > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
 insert a set of new DBRows at the end of the database
virtual void insertRows (const Matrix< T_DATA > &new_rows, const DBVector< IsMissing > &rows_have_missing_vals)
 insert a set of new DBRows at the end of the database
void eraseRow (std::size_t index)
 erase a given row specified by its index in the table
void eraseFirstRow ()
 erase the first row
void eraseLastRow ()
 erase the last row
void eraseFirstRows (const std::size_t k)
 erase the k first rows
void eraseLastRows (const std::size_t k)
 erase the k last rows
void eraseRows (std::size_t deb, std::size_t end)
 erase the rows from the debth to the endth (not included)
void eraseAllRows ()
 erase all the rows
virtual void clear ()
 erase the content of the database, including the names of the variables
const DBVector< std::string > & missingSymbols () const
 returns the set of missing symbols
bool hasMissingValues () const
 indicates whether the database contains some missing values
bool hasMissingValues (const std::size_t k) const
 indicates whether the kth row contains some missing values
void setMaxNbThreads (const std::size_t nb) const
 changes the max number of threads that a database can use
std::size_t nbThreads () const
 returns the number of threads used to parse the database
void setMinNbRowsPerThread (const std::size_t nb) const
 changes the number min of rows a thread should process in a multithreading context
std::size_t minNbRowsPerThread () const
 returns the minimum of rows that each thread should process
void setAllRowsWeight (const double new_weight)
 assign a given weight to all the rows of the database
void setWeight (const std::size_t i, const double weight)
 assigns a given weight to the ith row of the database
double weight (const std::size_t i) const
 returns the weight of the ith record
double weight () const
 returns the weight of the whole database

Protected Member Functions

bool isRowSizeOK_ (const std::size_t size) const
 checks whether a size corresponds to the number of columns of the database
std::size_t nbProcessingThreads_ () const
 returns the number of threads used to process the current database content
std::vector< std::pair< std::size_t, std::size_t > > rangesProcessingThreads_ (const std::size_t nb_threads) const
 returns the ranges that threads should process
IDatabaseTable< T_DATA > & operator= (const IDatabaseTable< T_DATA > &from)
 copy operator
IDatabaseTable< T_DATA > & operator= (IDatabaseTable< T_DATA > &&from)
 move operator

Protected Attributes

DBVector< std::string > variable_names_
 the names of the variables for each column
Matrix< T_DATA > rows_
DBVector< std::string > missing_symbols_
DBVector< IsMissinghas_row_missing_val_
std::size_t max_nb_threads_ {std::size_t(gum::getNumberOfThreads())}
std::size_t min_nb_rows_per_thread_ {100}

Friends

class Handler
 allow the handlers to access the database directly
class HandlerSafe

Detailed Description

template<typename T_DATA>
class gum::learning::IDatabaseTable< T_DATA >

The common class for the tabular database tables.

IDatabases are not intended to be created as is but should be created through the RawDatabaseTable and DatabaseTable classes. They represent the structures shared by these latter classes.

Here is an example of how to use the class, illustrated with the DatabaseTable class (in this case, the T_DATA type is just equal to DBTranslatedValue):

// create the database from a CSV. This is not compulsory for
// IDatabaseTable instances, but this is how we usually create
// DatabaseTable instances
gum::learning::DBInitializerFromCSV<> initializer ( "asia.csv" );
const auto& var_names = initializer.variableNames ();
for ( std::size_t i = 0; i < var_names.size(); ++i )
translator_set.insertTranslator ( translator, i );
gum::learning::DatabaseTable<> database ( translator_set );
database.setVariableNames( initializer.variableNames () );
// here, database contains the content of the asia.csv file.
// determine how many columns and rows the database contains
std::size_t nb_rows = database.nbRows();
std::size_t nb_cols = database.nbVariables ();
// manually add a new row into the database
std::vector<std::string> row( 8, "toto" ); // asia has 8 columns
database.insertRow ( row );
dbrow ( 8, gum::learning::DBTranslatedValue { std::size_t(0) } );
database.insertRow ( dbrow );
// insert 4 rows in a single call
database.insertRows(
( 4, dbrow ) );
// erase some rows
database.eraseRow ( 12 ); // erase the 13th row of the database
database.eraseFirstRow (); // erase the first row of the database
database.eraseLastRow (); // erase the last row of the database
database.eraseFirstRows ( 2 ); // erase the first two rows
database.eraseLastRows ( 3 ); // erase the last three rows
database.eraseRows ( 2,4 ); // erase rows indexed from 2 to 4 (excluded)
// parse the content of the database, the usual way
for ( const auto& dbrow : database )
std::cout << dbrow.row() << " weight: " << dbrow.weight() << std::endl;
// ignore some columns of the database, i.e., remove them
database.ignoreColumn ( 3 ); // remove the column X3 of the CSV file
// now, the database contains columns 0, 1, 2, 4, 5, 6, 7 of the
// CSV file. If we wish to remove Column X5 of the CSV file:
database.ignoreColumn ( 5 ); // remove the column X5 of the CSV file
// now, the database contains columns 0, 1, 2, 4, 6, 7 of the CSV file.
// if we wish to remove the 5th column of the IDatabaseTable, i.e.,
// column #4 of the CSV, either we determine that this actually correspond
// to column X6 of the CSV and we use database.ignoreColumn ( 6 ) or
// we call:
database.ignoreColumn ( 4, false ); // false => 4 = the 5th column of
// the IDatabaseTable, not the 5th column/variable of the CSV file
// (remember that all column numbers start from 0).
// display the columns of the CSV that were ignored and those that
// were kept:
std::vector<std::size_t> ignored_cols = database.ignoredColumns ();
std::vector<std::size_t> kept_cols = database.inputColumns ();
// parse the content of the database using handlers
typename gum::learning::DatabaseTable<>::Handler uhandler( database );
// by default, the handlers range over the whole database
// change the range of rows handled by the DBHandler
std::cout << handler.setRange ( 1, 40 ); // now parses rows [1,40)
std::cout << handler.size (); // displays 39: rows 1,...,39
std::cout << handler.DBSize (); // shows the number of rows in the database
std::cout << handler.numRow (); // displays 0: the handler currently
// points on the first row of its managed area [1,40)
// move the handler to the next row
std::cout << handler.numRow (); // displays 1: the handler points now
// on the second row of its managed area. This corresponds to the third
// DBRow of the database since the range of handler is [1,40)
++handler; // move again to the next row
std::cout << handler.numRow (); // displays 2
handler += 4; // advances the pointer by 4 rows
std::cout << handler.numRow (); // displays 6
// get the DBRow pointed to by the handler: this is the 7th DBRow
// of the database
const auto& xrow7 = handler.row (); // get the DBRow, unsafe version
const auto& yrow7 = handler.rowSafe (); // get the DBRow, safe version
const std::vector<gum::learning::DBCell>& xrow = xrow7.row ();
const double xweight = xrow27.weight ();
// another way to access the row
const auto& zrow7 = *handler; // get the DBRow, unsafe version
// check whether there exist other rows managed by the handler after
// the current row
bool has_rows = handler.hasRows (); // true: there remains 33 rows
// makes the handler point again on the 2nd row of the database
handler.reset (); // the handler points to the beginning of its area
std::cout << handler.numRow (); // displays 0: the handler currently
// points on the first row of its managed area [1,40)
// see the variables' names, i.e., the names of the database's columns
const auto& vars = handler.variableNames();
// parse all the rows managed
for ( auto end = handler.end (); handler != end; ++handler )
std::cout << handler.row ().weight () << std::endl;
// another possibility:
for ( const auto& row : handler )
std::cout << row.weight () << std::endl;
The class for initializing DatabaseTable and RawDatabaseTable instances from CSV files.
The class for storing a record in a database.
Definition DBRow.h:75
const double & weight() const noexcept
returns the weight assigned to the DBRow
const std::vector< T_DATA > & row() const noexcept
returns the current row (without the weight)
The databases' cell translators for labelized variables.
the class for packing together the translators used to preprocess the datasets
std::size_t insertTranslator(const DBTranslator &translator, const std::size_t column, const bool unique_column=true)
inserts a new translator at the end of the translator set
The class representing a tabular database as used by learning tasks.
typename IDatabaseTable< DBTranslatedValue >::Handler Handler
the unsafe handler type
typename IDatabaseTable< DBTranslatedValue >::HandlerSafe HandlerSafe
the safe handler type
virtual const DBVector< std::string > & variableNames() const final
returns the names of the variables
virtual void setRange(std::size_t first, std::size_t last) final
sets the area in the database the handler will handle
virtual bool hasRows() const final
indicates whether the handler has reached its end or not
virtual Handler end() const
returns a new handler that points to the end of the database's area of the current handler
virtual std::size_t size() const final
returns the number of rows managed by the handler
virtual const_reference row() const final
returns the current row pointed to by the handler (unsafe version)
virtual void reset() final
puts the handler to the beginning of the database's area it handles
virtual std::size_t numRow() const final
the number of the current row (0 = the 1st row managed by the handler)
virtual const_reference rowSafe() const final
returns the current row pointed to by the handler (safe version)
virtual void nextRow() final
makes the handler point to the next row, equivalent to operator++
virtual std::size_t DBSize() const final
returns the number of rows of the whole database
const iterator & end() const noexcept
returns a new unsafe handler pointing to the end of the database
iterator handler() const
returns a new unsafe handler pointing to the 1st record of the database
The union class for storing the translated values in learning databases.

Definition at line 268 of file IDatabaseTable.h.

Member Typedef Documentation

◆ const_iterator

template<typename T_DATA>
using gum::learning::IDatabaseTable< T_DATA >::const_iterator = const Handler

Types for STL compliance.

Definition at line 772 of file IDatabaseTable.h.

◆ const_iterator_safe

template<typename T_DATA>
using gum::learning::IDatabaseTable< T_DATA >::const_iterator_safe = const HandlerSafe

Types for STL compliance.

Definition at line 773 of file IDatabaseTable.h.

◆ const_pointer

template<typename T_DATA>
using gum::learning::IDatabaseTable< T_DATA >::const_pointer = const value_type*

Types for STL compliance.

Definition at line 767 of file IDatabaseTable.h.

◆ const_reference

template<typename T_DATA>
using gum::learning::IDatabaseTable< T_DATA >::const_reference = const value_type&

Types for STL compliance.

Definition at line 765 of file IDatabaseTable.h.

◆ DBVector

template<typename T_DATA>
template<typename TX_DATA>
using gum::learning::IDatabaseTable< T_DATA >::DBVector = std::vector< TX_DATA >

the type for the vectors used in the IDatabaseTable

Definition at line 273 of file IDatabaseTable.h.

◆ difference_type

template<typename T_DATA>
using gum::learning::IDatabaseTable< T_DATA >::difference_type = std::ptrdiff_t

Types for STL compliance.

Definition at line 769 of file IDatabaseTable.h.

◆ iterator

template<typename T_DATA>
using gum::learning::IDatabaseTable< T_DATA >::iterator = Handler

Types for STL compliance.

Definition at line 770 of file IDatabaseTable.h.

◆ iterator_safe

template<typename T_DATA>
using gum::learning::IDatabaseTable< T_DATA >::iterator_safe = HandlerSafe

Types for STL compliance.

Definition at line 771 of file IDatabaseTable.h.

◆ Matrix

template<typename T_DATA>
template<typename TX_DATA>
using gum::learning::IDatabaseTable< T_DATA >::Matrix = std::vector< DBRow< TX_DATA > >

the type for the matrices stored into the database

Definition at line 281 of file IDatabaseTable.h.

◆ MissingValType

template<typename T_DATA>
using gum::learning::IDatabaseTable< T_DATA >::MissingValType = std::vector< std::string >

Definition at line 283 of file IDatabaseTable.h.

◆ pointer

template<typename T_DATA>
using gum::learning::IDatabaseTable< T_DATA >::pointer = value_type*

Types for STL compliance.

Definition at line 766 of file IDatabaseTable.h.

◆ reference

template<typename T_DATA>
using gum::learning::IDatabaseTable< T_DATA >::reference = value_type&

Types for STL compliance.

Definition at line 764 of file IDatabaseTable.h.

◆ Row

template<typename T_DATA>
template<typename TX_DATA>
using gum::learning::IDatabaseTable< T_DATA >::Row = DBRow< TX_DATA >

a row of the database

Definition at line 277 of file IDatabaseTable.h.

◆ size_type

template<typename T_DATA>
using gum::learning::IDatabaseTable< T_DATA >::size_type = std::size_t

Types for STL compliance.

Definition at line 768 of file IDatabaseTable.h.

◆ value_type

template<typename T_DATA>
using gum::learning::IDatabaseTable< T_DATA >::value_type = Row< T_DATA >

Types for STL compliance.

Definition at line 763 of file IDatabaseTable.h.

Member Enumeration Documentation

◆ IsMissing

template<typename T_DATA>
enum gum::learning::IDatabaseTable::IsMissing : char
Enumerator
False 
True 

Definition at line 285 of file IDatabaseTable.h.

Constructor & Destructor Documentation

◆ IDatabaseTable() [1/3]

template<typename T_DATA>
gum::learning::IDatabaseTable< T_DATA >::IDatabaseTable ( const MissingValType & missing_symbols,
const std::vector< std::string > & var_names )

◆ IDatabaseTable() [2/3]

template<typename T_DATA>
gum::learning::IDatabaseTable< T_DATA >::IDatabaseTable ( const IDatabaseTable< T_DATA > & from)

copy constructor

References IDatabaseTable().

Here is the call graph for this function:

◆ IDatabaseTable() [3/3]

template<typename T_DATA>
gum::learning::IDatabaseTable< T_DATA >::IDatabaseTable ( IDatabaseTable< T_DATA > && from)

move constructor

References IDatabaseTable().

Here is the call graph for this function:

◆ ~IDatabaseTable()

template<typename T_DATA>
virtual gum::learning::IDatabaseTable< T_DATA >::~IDatabaseTable ( )
virtual

destructor

Member Function Documentation

◆ begin()

template<typename T_DATA>
iterator gum::learning::IDatabaseTable< T_DATA >::begin ( ) const

returns a new unsafe handler pointing to the beginning of the database

◆ beginSafe()

template<typename T_DATA>
iterator_safe gum::learning::IDatabaseTable< T_DATA >::beginSafe ( ) const

returns a new safe handler pointing to the beginning of the database

◆ clear()

template<typename T_DATA>
virtual void gum::learning::IDatabaseTable< T_DATA >::clear ( )
virtual

erase the content of the database, including the names of the variables

Reimplemented in gum::learning::DatabaseTable, and gum::learning::RawDatabaseTable.

References clear().

Referenced by clear().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ clone()

template<typename T_DATA>
virtual IDatabaseTable< T_DATA > * gum::learning::IDatabaseTable< T_DATA >::clone ( ) const
pure virtual

virtual copy constructor

Implemented in gum::learning::DatabaseTable, and gum::learning::RawDatabaseTable.

References IDatabaseTable().

Here is the call graph for this function:

◆ columnFromVariableName()

template<typename T_DATA>
std::size_t gum::learning::IDatabaseTable< T_DATA >::columnFromVariableName ( const std::string & name) const

returns the index of the column whose name is passed in argument

Warning
If several columns correspond to the name, only the column with the lowest index is returned. If you wish to retrieve all the columns, use method columnsFromVariableName
Exceptions
UndefinedElementis raised if there exists no column with the given name

References columnFromVariableName().

Referenced by columnFromVariableName().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ columnsFromVariableName()

template<typename T_DATA>
DBVector< std::size_t > gum::learning::IDatabaseTable< T_DATA >::columnsFromVariableName ( const std::string & name) const

returns the indices of all the columns whose name is passed in argument

It may happen that several columns correspond to a given variable name. In this case, the function returns the indices of all the columns of the IDatabase that match the name.

References columnsFromVariableName().

Referenced by columnsFromVariableName().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ content()

template<typename T_DATA>
const Matrix< T_DATA > & gum::learning::IDatabaseTable< T_DATA >::content ( ) const
noexcept

returns the content (the records) of the database

References content().

Referenced by content().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ empty()

template<typename T_DATA>
bool gum::learning::IDatabaseTable< T_DATA >::empty ( ) const
noexcept

indicates whether the database contains some records or not

References empty().

Referenced by empty().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ end()

template<typename T_DATA>
const iterator & gum::learning::IDatabaseTable< T_DATA >::end ( ) const
noexcept

returns a new unsafe handler pointing to the end of the database

Referenced by eraseRows().

Here is the caller graph for this function:

◆ endSafe()

template<typename T_DATA>
const iterator_safe & gum::learning::IDatabaseTable< T_DATA >::endSafe ( ) const
noexcept

returns a new safe handler pointing to the end of the database

References endSafe().

Referenced by endSafe().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ eraseAllRows()

template<typename T_DATA>
void gum::learning::IDatabaseTable< T_DATA >::eraseAllRows ( )

erase all the rows

References eraseAllRows().

Referenced by eraseAllRows().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ eraseFirstRow()

template<typename T_DATA>
void gum::learning::IDatabaseTable< T_DATA >::eraseFirstRow ( )

erase the first row

Warning
if the row does not exist, nothing is done. In particular, no exception is raised.

References eraseFirstRow().

Referenced by eraseFirstRow().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ eraseFirstRows()

template<typename T_DATA>
void gum::learning::IDatabaseTable< T_DATA >::eraseFirstRows ( const std::size_t k)

erase the k first rows

Warning
if there are fewer than k rows in the database, the database is completely emptied

References eraseFirstRows().

Referenced by eraseFirstRows().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ eraseLastRow()

template<typename T_DATA>
void gum::learning::IDatabaseTable< T_DATA >::eraseLastRow ( )

erase the last row

Warning
if the row does not exist, nothing is done. In particular, no exception is raised.

References eraseLastRow().

Referenced by eraseLastRow().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ eraseLastRows()

template<typename T_DATA>
void gum::learning::IDatabaseTable< T_DATA >::eraseLastRows ( const std::size_t k)

erase the k last rows

Warning
if there are fewer than k rows in the database, the database is completely emptied

References eraseLastRows().

Referenced by eraseLastRows().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ eraseRow()

template<typename T_DATA>
void gum::learning::IDatabaseTable< T_DATA >::eraseRow ( std::size_t index)

erase a given row specified by its index in the table

In the database, rows are indexed, starting from 0.

Warning
If the row does not exist, nothing is done. In particular, no exception is raised.

References eraseRow().

Referenced by eraseRow().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ eraseRows()

template<typename T_DATA>
void gum::learning::IDatabaseTable< T_DATA >::eraseRows ( std::size_t deb,
std::size_t end )

erase the rows from the debth to the endth (not included)

In the database, rows are indexed, starting from 0.

References end(), and eraseRows().

Referenced by eraseRows().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ handler()

template<typename T_DATA>
iterator gum::learning::IDatabaseTable< T_DATA >::handler ( ) const

returns a new unsafe handler pointing to the 1st record of the database

References handler().

Referenced by handler(), gum::learning::IDatabaseTable< T_DATA >::Handler::operator!=(), and gum::learning::IDatabaseTable< T_DATA >::Handler::operator==().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ handlerSafe()

template<typename T_DATA>
iterator_safe gum::learning::IDatabaseTable< T_DATA >::handlerSafe ( ) const

returns a new safe handler pointing to the 1st record of the database

References handlerSafe().

Referenced by handlerSafe().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ hasMissingValues() [1/2]

template<typename T_DATA>
bool gum::learning::IDatabaseTable< T_DATA >::hasMissingValues ( ) const

indicates whether the database contains some missing values

References hasMissingValues().

Referenced by hasMissingValues(), and hasMissingValues().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ hasMissingValues() [2/2]

template<typename T_DATA>
bool gum::learning::IDatabaseTable< T_DATA >::hasMissingValues ( const std::size_t k) const

indicates whether the kth row contains some missing values

References hasMissingValues().

Here is the call graph for this function:

◆ ignoreColumn()

template<typename T_DATA>
virtual void gum::learning::IDatabaseTable< T_DATA >::ignoreColumn ( const std::size_t k,
const bool from_external_object = true )
pure virtual

makes the database table ignore from now on the kth column

This method can be called in two different ways: either k refers to the current kth column of the database table (in this case, parameter from_external_object is set to false), or k corresponds to the kth column of an original dataset used to fill the database table (in this case from_external_object is set to true). Depending on from_external_object's value, the ignored columns may differ. As an example, imagine that the database table is created from a CSV file with 5 columns named X0, X1, X2, X3 and X4 respectivly. Then a call to ignoreColumn ( 1, true ) will exclude column X1 from the database table. As a result, the database table columns are X0, X2, X3 and X4. Therefore, subsequently calling ignoreColumn ( 1, false ) will result in excluding X2 since X2 is the 2nd column (columns are indexed starting from 0). So, now the database table's columns are X0, X3 and X4. If, now, we call ignoreColumn ( 3, true ), this will remove column X3 because, in the original database, X3 was the 4th column.

Warning
If the database table was not empty, then the kth column is removed from all the rows currently stored.
If the kth column does not exist (i.e., the original dataset does not contain the kth column when from_external_object is set to true, or the IDatabaseTable has no kth column when from_external_object is set to false), column k is marked as to be ignored and nothing is done on the content of the IDatabaseTable. No exception is raised.
Parameters
kthe column to remove. See the above detailed description on how k is computed.
from_external_objectindicates whether k refers to the kth column of an original external database or to the current kth column of the database table.

Implemented in gum::learning::DatabaseTable, and gum::learning::RawDatabaseTable.

References ignoreColumn().

Referenced by ignoreColumn().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ ignoredColumns()

template<typename T_DATA>
virtual const DBVector< std::size_t > gum::learning::IDatabaseTable< T_DATA >::ignoredColumns ( ) const
pure virtual

returns the set of columns of the original dataset that are ignored

Implemented in gum::learning::DatabaseTable, and gum::learning::RawDatabaseTable.

References ignoredColumns().

Referenced by ignoredColumns().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ inputColumns()

template<typename T_DATA>
virtual const DBVector< std::size_t > gum::learning::IDatabaseTable< T_DATA >::inputColumns ( ) const
pure virtual

returns the set of columns of the original dataset that are present in the IDatabaseTable

Implemented in gum::learning::DatabaseTable, and gum::learning::RawDatabaseTable.

References inputColumns(), and insertRow().

Referenced by inputColumns().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ insertRow() [1/3]

template<typename T_DATA>
virtual void gum::learning::IDatabaseTable< T_DATA >::insertRow ( const Row< T_DATA > & new_row,
const IsMissing contains_missing_data )
virtual

insert a new row at the end of the database

Unlike methods insertRow for data whose type is different from T_DATA, this method assumes that the new row passed in argument does not contain any data of the ignored columns. So, basically, it could be copied as is into the database table.

Exceptions
SizeErroris raised if the size of the new_row is not equal to the number of columns retained in the IDatabaseTable

References insertRow(), and insertRows().

Here is the call graph for this function:

◆ insertRow() [2/3]

template<typename T_DATA>
virtual void gum::learning::IDatabaseTable< T_DATA >::insertRow ( const std::vector< std::string > & new_row)
pure virtual

insert a new row at the end of the database

The new_row passed in argument is supposed to come from an external database. So it must contain data for the ignored columns.

Exceptions
SizeErroris raised if the vector of string cannot be inserted in the IDatabaseTable because its size does not allow a matching with the columns of the IDatabaseTable (taking into account the ignored columns)

Implemented in gum::learning::DatabaseTable, and gum::learning::RawDatabaseTable.

References insertRow().

Referenced by inputColumns(), insertRow(), insertRow(), and insertRow().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ insertRow() [3/3]

template<typename T_DATA>
virtual void gum::learning::IDatabaseTable< T_DATA >::insertRow ( Row< T_DATA > && new_row,
const IsMissing contains_missing_data )
virtual

insert a new DBRow at the end of the database

Unlike methods insertRow for data whose type is different from T_DATA, this method assumes that the new row passed in argument does not contain any data of the ignored columns. So, basically, it could be copied as is into the database table.

Exceptions
SizeErroris raised if the size of the new_row is not equal to the number of columns retained in the IDatabaseTable

References insertRow().

Here is the call graph for this function:

◆ insertRows() [1/2]

template<typename T_DATA>
virtual void gum::learning::IDatabaseTable< T_DATA >::insertRows ( const Matrix< T_DATA > & new_rows,
const DBVector< IsMissing > & rows_have_missing_vals )
virtual

insert a set of new DBRows at the end of the database

Unlike methods insertRows for data whose type is different from T_DATA, this method assumes that the new rows passed in argument do not contain any data of the ignored columns. So, basically, these rows could be copied as is into the database table.

Parameters
new_rowsthe new set of rows to be copied as is
rows_have_missing_valsa vector of the same size as new_rows that indicates, for each new row, whether it contains some missing value or not
Exceptions
SizeErroris raised if the size of at least one row in new_rows is not equal to the number of columns retained in the IDatabaseTable. A SizeError exception will also be raised if the number of new rows is not identical to the size of vector rows_have_missing_vals.

References insertRows().

Here is the call graph for this function:

◆ insertRows() [2/2]

template<typename T_DATA>
virtual void gum::learning::IDatabaseTable< T_DATA >::insertRows ( Matrix< T_DATA > && new_rows,
const DBVector< IsMissing > & rows_have_missing_vals )
virtual

insert a set of new DBRows at the end of the database

Unlike methods insertRows for data whose type is different from T_DATA, this method assumes that the new rows passed in argument do not contain any data of the ignored columns. So, basically, these rows could be copied as is into the database table.

Parameters
new_rowsthe new set of rows to be copied as is
rows_have_missing_valsa vector of the same size as new_rows that indicates, for each new row, whether it contains some missing value or not
Exceptions
SizeErroris raised if the size of at least one row in new_rows is not equal to the number of columns retained in the IDatabaseTable. A SizeError exception will also be raised if the number of new rows is not identical to the size of vector rows_have_missing_vals.

References insertRows().

Referenced by insertRow(), insertRows(), and insertRows().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ isRowSizeOK_()

template<typename T_DATA>
bool gum::learning::IDatabaseTable< T_DATA >::isRowSizeOK_ ( const std::size_t size) const
protected

checks whether a size corresponds to the number of columns of the database

References size().

Here is the call graph for this function:

◆ minNbRowsPerThread()

template<typename T_DATA>
std::size_t gum::learning::IDatabaseTable< T_DATA >::minNbRowsPerThread ( ) const

returns the minimum of rows that each thread should process

References minNbRowsPerThread().

Referenced by minNbRowsPerThread().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ missingSymbols()

template<typename T_DATA>
const DBVector< std::string > & gum::learning::IDatabaseTable< T_DATA >::missingSymbols ( ) const

returns the set of missing symbols

References missingSymbols().

Referenced by missingSymbols().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nbProcessingThreads_()

template<typename T_DATA>
std::size_t gum::learning::IDatabaseTable< T_DATA >::nbProcessingThreads_ ( ) const
protected

returns the number of threads used to process the current database content

◆ nbRows()

template<typename T_DATA>
std::size_t gum::learning::IDatabaseTable< T_DATA >::nbRows ( ) const
noexcept

returns the number of records (rows) in the database

References nbRows().

Referenced by nbRows().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nbThreads()

template<typename T_DATA>
std::size_t gum::learning::IDatabaseTable< T_DATA >::nbThreads ( ) const

returns the number of threads used to parse the database

References nbThreads().

Referenced by nbThreads().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ nbVariables()

template<typename T_DATA>
std::size_t gum::learning::IDatabaseTable< T_DATA >::nbVariables ( ) const
noexcept

returns the number of variables (columns) of the database

References nbVariables().

Referenced by gum::learning::IBNLearner::Database::Database(), and nbVariables().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ operator=() [1/2]

template<typename T_DATA>
IDatabaseTable< T_DATA > & gum::learning::IDatabaseTable< T_DATA >::operator= ( const IDatabaseTable< T_DATA > & from)
protected

copy operator

References IDatabaseTable().

Here is the call graph for this function:

◆ operator=() [2/2]

template<typename T_DATA>
IDatabaseTable< T_DATA > & gum::learning::IDatabaseTable< T_DATA >::operator= ( IDatabaseTable< T_DATA > && from)
protected

move operator

References IDatabaseTable().

Here is the call graph for this function:

◆ rangesProcessingThreads_()

template<typename T_DATA>
std::vector< std::pair< std::size_t, std::size_t > > gum::learning::IDatabaseTable< T_DATA >::rangesProcessingThreads_ ( const std::size_t nb_threads) const
protected

returns the ranges that threads should process

◆ setAllRowsWeight()

template<typename T_DATA>
void gum::learning::IDatabaseTable< T_DATA >::setAllRowsWeight ( const double new_weight)

assign a given weight to all the rows of the database

References setAllRowsWeight().

Referenced by setAllRowsWeight().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ setMaxNbThreads()

template<typename T_DATA>
void gum::learning::IDatabaseTable< T_DATA >::setMaxNbThreads ( const std::size_t nb) const

changes the max number of threads that a database can use

Within databases, some methods can be processed in a parallel fashion. This methods indicates the maximum number of threads that can be run in parallel at the same time.

References setMaxNbThreads().

Referenced by setMaxNbThreads().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ setMinNbRowsPerThread()

template<typename T_DATA>
void gum::learning::IDatabaseTable< T_DATA >::setMinNbRowsPerThread ( const std::size_t nb) const

changes the number min of rows a thread should process in a multithreading context

When a method executes several threads to perform actions on the rows of the database, the MinNbRowsPerThread indicates how many rows each thread should at least process. This is used to compute the number of threads actually run. This number is equal to the min between the max number of threads allowed and the number of records in the database divided by nb.

References setMinNbRowsPerThread().

Referenced by setMinNbRowsPerThread().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ setVariableNames()

template<typename T_DATA>
virtual void gum::learning::IDatabaseTable< T_DATA >::setVariableNames ( const std::vector< std::string > & names,
const bool from_external_object = true )
pure virtual

sets the names of the variables

This method can be called in two different ways: either the names correspond precisely to the columns stored into the database table (in this case, parameter from_external_object is equal to false), or they corresponds to the columns of an external database (e.g., a CSV file) from which we tensorly excluded some columns and, consequently, the latter should not be taken into account (in this case, parameter from_external_object is equal to true). As an example, imagine that the database table is created from a CSV file with 5 columns named X0, X1, X2, X3 and X4 respectively. Suppose that we asked the database table to ignore columns X1 and X3. Then setVariableNames( { "X0", "X1", "X2", "X3", "X4" }, true ) will set the columns of the database table as { "X0", "X2", "X4" }. The same result could be obtained by executing setVariableNames( { "X0", "X2", "X4" }, false ), which specifies directly the set of names to retain in the database table.

Parameters
namesthe names of all the columns, including the ignored columns if from_external_object is set to true, else excluding them (i.e., this should precisely correspond to the columns stored into the database table).
from_external_objecta Boolean indicating whether parameter names includes the columns ignored by the database table (true) or not (false).
Exceptions
SizeErroris raised if the names passed in arguments cannot be assigned to the columns of the IDatabaseTable because the size of their vector is inadequate.

Implemented in gum::learning::DatabaseTable, and gum::learning::RawDatabaseTable.

References setVariableNames().

Referenced by setVariableNames().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ setWeight()

template<typename T_DATA>
void gum::learning::IDatabaseTable< T_DATA >::setWeight ( const std::size_t i,
const double weight )

assigns a given weight to the ith row of the database

Exceptions
OutOfBoundsif i is outside the set of indices of the records or if the weight is negative

References setWeight(), and weight().

Referenced by setWeight().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ size()

template<typename T_DATA>
std::size_t gum::learning::IDatabaseTable< T_DATA >::size ( ) const
noexcept

returns the number of records (rows) in the database

References size().

Referenced by isRowSizeOK_(), and size().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ variableName()

template<typename T_DATA>
const std::string & gum::learning::IDatabaseTable< T_DATA >::variableName ( const std::size_t k) const

returns the name of the kth column of the IDatabaseTable

Exceptions
OutOfBoundsis raised if the IDatabaseTable contains fewer than k columns.

References variableName().

Referenced by variableName().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ variableNames()

template<typename T_DATA>
const DBVector< std::string > & gum::learning::IDatabaseTable< T_DATA >::variableNames ( ) const
noexcept

returns the variable names for all the columns of the database

The names do not include the ignored columns.

References variableNames().

Referenced by gum::learning::IBNLearner::Database::Database(), and variableNames().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ weight() [1/2]

template<typename T_DATA>
double gum::learning::IDatabaseTable< T_DATA >::weight ( ) const

returns the weight of the whole database

References weight().

Here is the call graph for this function:

◆ weight() [2/2]

template<typename T_DATA>
double gum::learning::IDatabaseTable< T_DATA >::weight ( const std::size_t i) const

returns the weight of the ith record

Exceptions
OutOfBoundsif i is outside the set of indices of the records

References weight().

Referenced by setWeight(), weight(), and weight().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ Handler

template<typename T_DATA>
friend class Handler
friend

allow the handlers to access the database directly

Definition at line 1159 of file IDatabaseTable.h.

◆ HandlerSafe

template<typename T_DATA>
friend class HandlerSafe
friend

Definition at line 1160 of file IDatabaseTable.h.

Member Data Documentation

◆ has_row_missing_val_

template<typename T_DATA>
DBVector< IsMissing > gum::learning::IDatabaseTable< T_DATA >::has_row_missing_val_
protected

Definition at line 1099 of file IDatabaseTable.h.

◆ max_nb_threads_

template<typename T_DATA>
std::size_t gum::learning::IDatabaseTable< T_DATA >::max_nb_threads_ {std::size_t(gum::getNumberOfThreads())}
mutableprotected

Definition at line 1102 of file IDatabaseTable.h.

The common class for the tabular database tables.

◆ min_nb_rows_per_thread_

template<typename T_DATA>
std::size_t gum::learning::IDatabaseTable< T_DATA >::min_nb_rows_per_thread_ {100}
mutableprotected

Definition at line 1106 of file IDatabaseTable.h.

1106{100};

◆ missing_symbols_

template<typename T_DATA>
DBVector< std::string > gum::learning::IDatabaseTable< T_DATA >::missing_symbols_
protected

Definition at line 1096 of file IDatabaseTable.h.

◆ rows_

template<typename T_DATA>
Matrix< T_DATA > gum::learning::IDatabaseTable< T_DATA >::rows_
protected

Definition at line 1093 of file IDatabaseTable.h.

◆ variable_names_

template<typename T_DATA>
DBVector< std::string > gum::learning::IDatabaseTable< T_DATA >::variable_names_
protected

the names of the variables for each column

Definition at line 1090 of file IDatabaseTable.h.


The documentation for this class was generated from the following file: