![]() |
aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
|
The table containing the raw/original data of a database. More...
#include <agrum/base/databaseTable.h>
Public Types | |
| template<typename TX_DATA> | |
| using | DBVector = std::vector< TX_DATA > |
| the type for the vectors used in the RawDatabaseTable | |
| template<typename TX_DATA> | |
| using | Row = DBRow< TX_DATA > |
| a row of the database | |
| template<typename TX_DATA> | |
| using | Matrix = DBVector< Row< TX_DATA > > |
| the type for the matrices stored into the database | |
| using | MissingValType = std::vector< std::string > |
| using | Handler = typename IDatabaseTable< DBCell >::Handler |
| the unsafe handler type | |
| using | HandlerSafe = typename IDatabaseTable< DBCell >::HandlerSafe |
| the safe handler type | |
| using | IsMissing = typename IDatabaseTable< DBCell >::IsMissing |
| using | value_type = Row< DBCell > |
| Types for STL compliance. | |
| using | reference = value_type& |
| Types for STL compliance. | |
| using | const_reference = const value_type& |
| Types for STL compliance. | |
| using | pointer = value_type* |
| Types for STL compliance. | |
| using | const_pointer = const value_type* |
| Types for STL compliance. | |
| using | size_type = std::size_t |
| Types for STL compliance. | |
| using | difference_type = std::ptrdiff_t |
| Types for STL compliance. | |
| using | iterator = Handler |
| Types for STL compliance. | |
| using | iterator_safe = HandlerSafe |
| Types for STL compliance. | |
| using | const_iterator |
| Types for STL compliance. | |
| using | const_iterator_safe |
| Types for STL compliance. | |
Public Member Functions | |
Constructors / Destructors | |
| RawDatabaseTable (const MissingValType &missing_symbols, const std::vector< std::string > &var_names) | |
| default constructor | |
| RawDatabaseTable (const MissingValType &missing_symbols) | |
| default constructor | |
| RawDatabaseTable () | |
| default constructor | |
| RawDatabaseTable (const RawDatabaseTable &from) | |
| copy constructor | |
| RawDatabaseTable (RawDatabaseTable &&from) | |
| move constructor | |
| virtual RawDatabaseTable * | clone () const final |
| virtual copy constructor | |
| virtual | ~RawDatabaseTable () |
| destructor | |
Operators | |
| RawDatabaseTable & | operator= (const RawDatabaseTable &from) |
| copy operator | |
| RawDatabaseTable & | operator= (RawDatabaseTable &&from) |
| move constructor | |
Accessors / Modifiers | |
| void | setVariableNames (const std::vector< std::string > &names, const bool from_external_object=true) final |
| sets the names of the variables | |
| void | ignoreColumn (const std::size_t k, const bool from_external_object=true) final |
| makes the database table ignore from now on the kth column | |
| const DBVector< std::size_t > | ignoredColumns () const final |
| returns the set of columns of the original dataset that are ignored | |
| const DBVector< std::size_t > | inputColumns () const final |
| returns the set of columns of the original dataset that are present in the RawDatabaseTable | |
| void | insertRow (const std::vector< std::string > &new_row) final |
| insert a new row at the end of the database | |
| virtual void | clear () final |
| erase the content of the database, including the names of the variables | |
Iterators | |
| iterator | begin () const |
| returns a new unsafe handler pointing to the beginning of the database | |
| iterator_safe | beginSafe () const |
| returns a new safe handler pointing to the beginning of the database | |
| const iterator & | end () const noexcept |
| returns a new unsafe handler pointing to the end of the database | |
| const iterator_safe & | endSafe () const noexcept |
| returns a new safe handler pointing to the end of the database | |
Accessors / Modifiers | |
| const Matrix< DBCell > & | content () const noexcept |
| returns the content (the records) of the database | |
| iterator | handler () const |
| returns a new unsafe handler pointing to the 1st record of the database | |
| iterator_safe | handlerSafe () const |
| returns a new safe handler pointing to the 1st record of the database | |
| const DBVector< std::string > & | variableNames () const noexcept |
| returns the variable names for all the columns of the database | |
| const std::string & | variableName (const std::size_t k) const |
| returns the name of the kth column of the IDatabaseTable | |
| std::size_t | columnFromVariableName (const std::string &name) const |
| returns the index of the column whose name is passed in argument | |
| DBVector< std::size_t > | columnsFromVariableName (const std::string &name) const |
| returns the indices of all the columns whose name is passed in argument | |
| std::size_t | nbVariables () const noexcept |
| returns the number of variables (columns) of the database | |
| std::size_t | nbRows () const noexcept |
| returns the number of records (rows) in the database | |
| std::size_t | size () const noexcept |
| returns the number of records (rows) in the database | |
| bool | empty () const noexcept |
| indicates whether the database contains some records or not | |
| virtual void | insertRows (Matrix< DBCell > &&new_rows, const DBVector< IsMissing > &rows_have_missing_vals) |
| insert a set of new DBRows at the end of the database | |
| void | eraseRow (std::size_t index) |
| erase a given row specified by its index in the table | |
| void | eraseFirstRow () |
| erase the first row | |
| void | eraseLastRow () |
| erase the last row | |
| void | eraseFirstRows (const std::size_t k) |
| erase the k first rows | |
| void | eraseLastRows (const std::size_t k) |
| erase the k last rows | |
| void | eraseRows (std::size_t deb, std::size_t end) |
| erase the rows from the debth to the endth (not included) | |
| void | eraseAllRows () |
| erase all the rows | |
| const DBVector< std::string > & | missingSymbols () const |
| returns the set of missing symbols | |
| bool | hasMissingValues () const |
| indicates whether the database contains some missing values | |
| void | setMaxNbThreads (const std::size_t nb) const |
| changes the max number of threads that a database can use | |
| std::size_t | nbThreads () const |
| returns the number of threads used to parse the database | |
| void | setMinNbRowsPerThread (const std::size_t nb) const |
| changes the number min of rows a thread should process in a multithreading context | |
| std::size_t | minNbRowsPerThread () const |
| returns the minimum of rows that each thread should process | |
| void | setAllRowsWeight (const double new_weight) |
| assign a given weight to all the rows of the database | |
| void | setWeight (const std::size_t i, const double weight) |
| assigns a given weight to the ith row of the database | |
| double | weight (const std::size_t i) const |
| returns the weight of the ith record | |
Protected Member Functions | |
| bool | isRowSizeOK_ (const std::size_t size) const |
| checks whether a size corresponds to the number of columns of the database | |
| std::size_t | nbProcessingThreads_ () const |
| returns the number of threads used to process the current database content | |
| std::vector< std::pair< std::size_t, std::size_t > > | rangesProcessingThreads_ (const std::size_t nb_threads) const |
| returns the ranges that threads should process | |
Protected Attributes | |
| DBVector< std::string > | variable_names_ |
| the names of the variables for each column | |
| Matrix< DBCell > | rows_ |
| DBVector< std::string > | missing_symbols_ |
| DBVector< IsMissing > | has_row_missing_val_ |
| std::size_t | max_nb_threads_ |
| std::size_t | min_nb_rows_per_thread_ |
The table containing the raw/original data of a database.
Class RawDatabaseTable is intended to store in RAM the raw/original data of a database. Such raw data are not well suited for learning tasks because they need to be interpreted by the learning algorithm, which would incur a strong overhead. However, reading a CSV file and interpreting its data in order to reshape them in a way that will allow fast parsing by learning algorithms is also very time consuming. So, if you are unsure about the correct interpretation and need to change it several times either before processing the learning or during several learning phases, it is efficient to first read the CSV file and store its useful data (removing comment, for instance) into a first database table and, then, use this preprocessed table to produce quickly the interpreted database table that will subsequently be used by the learning. The purpose of the RawDatabaseTable class is precisely to implement this preprocessed table.
Definition at line 134 of file rawDatabaseTable.h.
|
inherited |
Types for STL compliance.
Definition at line 772 of file IDatabaseTable.h.
|
inherited |
Types for STL compliance.
Definition at line 773 of file IDatabaseTable.h.
| using gum::learning::RawDatabaseTable::const_pointer = const value_type* |
Types for STL compliance.
Definition at line 164 of file rawDatabaseTable.h.
| using gum::learning::RawDatabaseTable::const_reference = const value_type& |
Types for STL compliance.
Definition at line 162 of file rawDatabaseTable.h.
| using gum::learning::RawDatabaseTable::DBVector = std::vector< TX_DATA > |
the type for the vectors used in the RawDatabaseTable
Definition at line 138 of file rawDatabaseTable.h.
| using gum::learning::RawDatabaseTable::difference_type = std::ptrdiff_t |
Types for STL compliance.
Definition at line 166 of file rawDatabaseTable.h.
| using gum::learning::RawDatabaseTable::Handler = typename IDatabaseTable< DBCell >::Handler |
the unsafe handler type
Definition at line 151 of file rawDatabaseTable.h.
| using gum::learning::RawDatabaseTable::HandlerSafe = typename IDatabaseTable< DBCell >::HandlerSafe |
the safe handler type
Definition at line 154 of file rawDatabaseTable.h.
| using gum::learning::RawDatabaseTable::IsMissing = typename IDatabaseTable< DBCell >::IsMissing |
Definition at line 156 of file rawDatabaseTable.h.
Types for STL compliance.
Definition at line 167 of file rawDatabaseTable.h.
Types for STL compliance.
Definition at line 168 of file rawDatabaseTable.h.
| using gum::learning::RawDatabaseTable::Matrix = DBVector< Row< TX_DATA > > |
the type for the matrices stored into the database
Definition at line 146 of file rawDatabaseTable.h.
| using gum::learning::RawDatabaseTable::MissingValType = std::vector< std::string > |
Definition at line 148 of file rawDatabaseTable.h.
Types for STL compliance.
Definition at line 163 of file rawDatabaseTable.h.
Types for STL compliance.
Definition at line 161 of file rawDatabaseTable.h.
| using gum::learning::RawDatabaseTable::Row = DBRow< TX_DATA > |
a row of the database
Definition at line 142 of file rawDatabaseTable.h.
| using gum::learning::RawDatabaseTable::size_type = std::size_t |
Types for STL compliance.
Definition at line 165 of file rawDatabaseTable.h.
| using gum::learning::RawDatabaseTable::value_type = Row< DBCell > |
Types for STL compliance.
Definition at line 160 of file rawDatabaseTable.h.
| gum::learning::RawDatabaseTable::RawDatabaseTable | ( | const MissingValType & | missing_symbols, |
| const std::vector< std::string > & | var_names ) |
default constructor
Referenced by RawDatabaseTable(), RawDatabaseTable(), ~RawDatabaseTable(), clone(), operator=(), and operator=().
| gum::learning::RawDatabaseTable::RawDatabaseTable | ( | const MissingValType & | missing_symbols | ) |
default constructor
| gum::learning::RawDatabaseTable::RawDatabaseTable | ( | ) |
default constructor
| gum::learning::RawDatabaseTable::RawDatabaseTable | ( | const RawDatabaseTable & | from | ) |
| gum::learning::RawDatabaseTable::RawDatabaseTable | ( | RawDatabaseTable && | from | ) |
|
virtual |
|
inherited |
returns a new unsafe handler pointing to the beginning of the database
|
inherited |
returns a new safe handler pointing to the beginning of the database
|
finalvirtual |
erase the content of the database, including the names of the variables
Reimplemented from gum::learning::IDatabaseTable< DBCell >.
References clear().
Referenced by clear().
|
finalvirtual |
virtual copy constructor
Implements gum::learning::IDatabaseTable< DBCell >.
References RawDatabaseTable().
|
inherited |
returns the index of the column whose name is passed in argument
| UndefinedElement | is raised if there exists no column with the given name |
|
inherited |
returns the indices of all the columns whose name is passed in argument
It may happen that several columns correspond to a given variable name. In this case, the function returns the indices of all the columns of the IDatabase that match the name.
|
noexceptinherited |
returns the content (the records) of the database
|
noexceptinherited |
indicates whether the database contains some records or not
|
noexceptinherited |
returns a new unsafe handler pointing to the end of the database
|
noexceptinherited |
returns a new safe handler pointing to the end of the database
|
inherited |
erase all the rows
|
inherited |
erase the first row
|
inherited |
erase the k first rows
|
inherited |
erase the last row
|
inherited |
erase the k last rows
|
inherited |
erase a given row specified by its index in the table
In the database, rows are indexed, starting from 0.
|
inherited |
erase the rows from the debth to the endth (not included)
In the database, rows are indexed, starting from 0.
|
inherited |
returns a new unsafe handler pointing to the 1st record of the database
|
inherited |
returns a new safe handler pointing to the 1st record of the database
|
inherited |
indicates whether the database contains some missing values
|
finalvirtual |
makes the database table ignore from now on the kth column
This method can be called in two different ways: either k refers to the current kth column of the database table (in this case parameter from_external_object is set to false), or k corresponds to the kth column of an original database used to fill the database table (in this case from_external_object is set to true). Depending on from_external_object's value, the ignored columns may differ. As an example, imagine that the database table is created from a CSV file with 5 columns named X0, X1, X2, X3 and X4 respectivly. Then a call to ignoreColumn ( 1, true ) will exclude column X1 from the database table. As a result, the database table columns are X0, X2, X3 and X4. Therefore, subsequently calling ignoreColumn ( 1, false ) will result in excluding X2 since X2 is the 2nd column (columns are indexed starting from 0). So, now the database table's columns are X0, X3 and X4. If, now, we call ignoreColumn ( 3, true ), this will remove column X3 because, in the original database, X3 was the 4th column.
| k | the column to remove. See Method setVariableNames for a detailed description on how k is computed. |
| from_external_object | indicates whether k refers to the kth column of an original external database (true) or to the current kth column of the RawDatabaseTable. |
Implements gum::learning::IDatabaseTable< DBCell >.
References ignoreColumn().
Referenced by ignoreColumn().
|
finalvirtual |
returns the set of columns of the original dataset that are ignored
Implements gum::learning::IDatabaseTable< DBCell >.
References ignoredColumns().
Referenced by ignoredColumns().
|
finalvirtual |
returns the set of columns of the original dataset that are present in the RawDatabaseTable
Implements gum::learning::IDatabaseTable< DBCell >.
References gum::learning::IDatabaseTable< DBCell >::IDatabaseTable(), inputColumns(), and insertRow().
Referenced by inputColumns().
|
finalvirtual |
insert a new row at the end of the database
The new_row passed in argument is supposed to come from an external database. So it must contain data for the ignored columns.
| SizeError | is raised if the vector of string cannot be inserted in the RawDatabaseTable because its size does not allow a matching with the columns of the RawDatabaseTable (taking into account the ignored columns) |
Implements gum::learning::IDatabaseTable< DBCell >.
References insertRow().
Referenced by inputColumns(), and insertRow().
|
virtualinherited |
insert a set of new DBRows at the end of the database
Unlike methods insertRows for data whose type is different from T_DATA, this method assumes that the new rows passed in argument do not contain any data of the ignored columns. So, basically, these rows could be copied as is into the database table.
| new_rows | the new set of rows to be copied as is |
| rows_have_missing_vals | a vector of the same size as new_rows that indicates, for each new row, whether it contains some missing value or not |
| SizeError | is raised if the size of at least one row in new_rows is not equal to the number of columns retained in the IDatabaseTable. A SizeError exception will also be raised if the number of new rows is not identical to the size of vector rows_have_missing_vals. |
|
protectedinherited |
checks whether a size corresponds to the number of columns of the database
|
inherited |
returns the minimum of rows that each thread should process
|
inherited |
returns the set of missing symbols
|
protectedinherited |
returns the number of threads used to process the current database content
|
noexceptinherited |
returns the number of records (rows) in the database
|
inherited |
returns the number of threads used to parse the database
|
noexceptinherited |
returns the number of variables (columns) of the database
| RawDatabaseTable & gum::learning::RawDatabaseTable::operator= | ( | const RawDatabaseTable & | from | ) |
| RawDatabaseTable & gum::learning::RawDatabaseTable::operator= | ( | RawDatabaseTable && | from | ) |
move constructor
References gum::learning::IDatabaseTable< DBCell >::IDatabaseTable(), RawDatabaseTable(), and setVariableNames().
|
protectedinherited |
returns the ranges that threads should process
|
inherited |
assign a given weight to all the rows of the database
|
inherited |
changes the max number of threads that a database can use
Within databases, some methods can be processed in a parallel fashion. This methods indicates the maximum number of threads that can be run in parallel at the same time.
|
inherited |
changes the number min of rows a thread should process in a multithreading context
When a method executes several threads to perform actions on the rows of the database, the MinNbRowsPerThread indicates how many rows each thread should at least process. This is used to compute the number of threads actually run. This number is equal to the min between the max number of threads allowed and the number of records in the database divided by nb.
|
finalvirtual |
sets the names of the variables
This method can be called in two different ways: either the names correspond precisely to the columns stored into the database table (in this case, parameter from_external_object is equal to false), or they corresponds to the columns of an external database (e.g., a CSV file) from which we tensorly excluded some columns and, consequently, these columns should not be taken into account (in this case, parameter from_external_object is equal to true). As an example, imagine that the database table is created from a CSV file with 5 columns named X0, X1, X2, X3 and X4 respectivly. Suppose that we asked the database table to ignore columns X1 and X3. Then setVariableNames( { "X0", "X1", "X2", "X3", "X4" }, true ) will set the columns of the database table as { "X0", "X2", "X4" }. The same result could be obtained by executing setVariableNames( { "X0", "X2", "X4" }, false ), which specifies directly the set of names to retain in the database table.
| names | the names of all the columns, including the ignored columns if from_external_object is set to true, else excluding them (i.e., this should precisely correspond to the columns stored into the database table). |
| from_external_object | a Boolean indicating whether parameter names includes the columns ignored by the database table (true) or not (false). |
| SizeError | is raised if the names passed in arguments cannot be assigned to the columns of the RawDatabaseTable because the size of their vector is inadequate. |
Implements gum::learning::IDatabaseTable< DBCell >.
References setVariableNames().
Referenced by operator=(), and setVariableNames().
|
inherited |
assigns a given weight to the ith row of the database
| OutOfBounds | if i is outside the set of indices of the records or if the weight is negative |
|
noexceptinherited |
returns the number of records (rows) in the database
|
inherited |
returns the name of the kth column of the IDatabaseTable
| OutOfBounds | is raised if the IDatabaseTable contains fewer than k columns. |
|
noexceptinherited |
returns the variable names for all the columns of the database
The names do not include the ignored columns.
|
inherited |
returns the weight of the ith record
| OutOfBounds | if i is outside the set of indices of the records |
|
protectedinherited |
Definition at line 1099 of file IDatabaseTable.h.
|
mutableprotectedinherited |
Definition at line 1102 of file IDatabaseTable.h.
|
mutableprotectedinherited |
Definition at line 1106 of file IDatabaseTable.h.
|
protectedinherited |
Definition at line 1096 of file IDatabaseTable.h.
|
protectedinherited |
Definition at line 1093 of file IDatabaseTable.h.
|
protectedinherited |
the names of the variables for each column
Definition at line 1090 of file IDatabaseTable.h.