56 template <
typename GUM_SCALAR >
63 for (
const auto& var:
_bn_.dag()) {
64 auto name =
_bn_.variable(var).name();
75 template <
typename GUM_SCALAR >
81 template <
typename GUM_SCALAR >
88 template <
typename GUM_SCALAR >
102 const auto topOrder =
_bn_.topologicalOrder();
107 particule.
add(
_bn_.variable(node));
115 while (idSample < nbSamples) {
117 auto p = int((idSample * 100) / nbSamples);
123 std::vector< Idx >& sample =
_database_.at(idSample);
126 const NodeId node = topOrder[rank];
127 const auto& var =
_bn_.variable(node);
128 const auto& cpt =
_bn_.cpt(node);
133 cumul += cpt[particule];
134 if (cumul >= nb)
break;
143 sample.at(node) = particule.
val(var);
146 if (timeout > 0 && timer.
step() > timeout) {
break; }
147 if (reject) {
continue; }
152 if (idSample < nbSamples)
_database_.resize(idSample);
159 std::stringstream ss;
160 ss <<
"Database of size " << idSample <<
"(" << nbSamples <<
") generated in " << timer.
step()
168 template <
typename GUM_SCALAR >
175 template <
typename GUM_SCALAR >
182 template <
typename GUM_SCALAR >
188 template <
typename GUM_SCALAR >
195 template <
typename GUM_SCALAR >
200 template <
typename GUM_SCALAR >
205 template <
typename GUM_SCALAR >
211 template <
typename GUM_SCALAR >
215 std::string csvSeparator,
216 bool checkOnAppend)
const {
219 if (csvSeparator.find(
'\n') != std::string::npos) {
223 bool includeHeader =
true;
225 std::ifstream csvFile(csvFileURL);
230 "Inconsistent variable order in csvFile when appending. You "
231 "can use setVarOrderFromCSV(url) function to get the right "
232 "order. You could also set parameter checkOnAppend=false if you "
233 "know what you are doing.")
234 includeHeader =
false;
240 auto ofstreamFlag = append ? std::ofstream::app : std::ofstream::out;
242 std::ofstream os(csvFileURL, ofstreamFlag);
243 bool firstCol =
true;
251 os <<
_bn_.variable(i).name();
256 bool firstRow =
true;
271 const auto& v =
_bn_.variable(i);
276 os << static_cast< const IDiscretizedVariable& >(v).draw(row.at(i));
281 os << v.label(row.at(i));
292 template <
typename GUM_SCALAR >
305 return v.
label(row.at(i));
309 template <
typename GUM_SCALAR >
314 std::vector< std::string > varNames;
321 for (std::size_t i = 0; i <
_nbVars_; ++i) {
327 std::vector< std::string > xrow(
_nbVars_);
336 std::vector< DBTranslatedValueType > translatorType(
_nbVars_);
337 for (std::size_t i = 0; i <
_nbVars_; ++i) {
341 const auto xmiss = gum::learning::DatabaseTable::IsMissing::False;
346 xrow[i].discr_val = std::size_t(row.at(j));
347 else xrow[i].cont_val = float(row.at(j));
357 template <
typename GUM_SCALAR >
371 template <
typename GUM_SCALAR >
376 std::vector< bool > usedVars(
_nbVars_,
false);
380 usedVars.at(i) =
true;
383 if (std::find(usedVars.begin(), usedVars.end(),
false) != usedVars.end()) {
391 template <
typename GUM_SCALAR >
393 std::vector< Idx > varOrderIdx;
394 varOrderIdx.reserve(
varOrder.size());
402 template <
typename GUM_SCALAR >
404 const std::string& csvSeparator) {
409 template <
typename GUM_SCALAR >
413 for (
const auto& v:
_bn_.topologicalOrder()) {
420 template <
typename GUM_SCALAR >
424 for (
const auto& v:
_bn_.topologicalOrder()) {
432 template <
typename GUM_SCALAR >
434 std::vector< std::string >
varOrder;
436 for (
const auto& var:
_bn_.dag()) {
444 template <
typename GUM_SCALAR >
450 template <
typename GUM_SCALAR >
452 std::vector< std::string > varNames;
462 template <
typename GUM_SCALAR >
469 template <
typename GUM_SCALAR >
472 const std::string& csvSeparator)
const {
473 std::ifstream csvFile(csvFileURL);
486 template <
typename GUM_SCALAR >
489 const std::string& csvSeparator)
const {
491 std::vector< std::string > header_found;
493 while (std::getline(csvFile, line)) {
495 auto pos = line.find(csvSeparator);
496 while (pos != std::string::npos) {
497 header_found.push_back(line.substr(i, pos - i));
498 pos += csvSeparator.length();
500 pos = line.find(csvSeparator, pos);
502 if (pos == std::string::npos) header_found.push_back(line.substr(i, line.length()));
510 for (
const auto& hf: header_found) {
Base class for discrete random variable.
virtual double numerical(Idx indice) const =0
get a numerical representation of the indice-th value.
VarType varType() const override=0
returns the varType of variable
virtual std::string label(Idx i) const =0
get the indice-th label. This method is pure virtual.
Exception : fatal (unknown ?) error.
A base class for discretized variables, independent of the ticks type.
Class for assigning/browsing values to tuples of discrete variables.
bool end() const
Returns true if the Instantiation reached the end.
void incVar(const DiscreteVariable &v)
Operator increment for variable v only.
void add(const DiscreteVariable &v) final
Adds a new variable in the Instantiation.
virtual bool empty() const final
Returns true if the instantiation is empty.
bool contains(const DiscreteVariable &v) const final
Indicates whether a given variable belongs to the Instantiation.
void setFirstVar(const DiscreteVariable &v)
Assign the first value in the Instantiation for var v.
Idx val(Idx i) const
Returns the current value of the variable at position i.
void setLastVar(const DiscreteVariable &v)
Assign the last value in the Instantiation for var v.
Exception: at least one argument passed to a function is not what was expected.
Exception : the element we looked for cannot be found.
Exception : operation not allowed.
Signaler2< Size, double > onProgress
Progression (percent) and time.
Signaler1< const std::string & > onStop
with a possible explanation for stopping
Class used to compute response times for benchmark purposes.
void reset()
Reset the timer.
double step() const
Returns the delta time between now and the last reset() call (or the constructor).
Base class for every random variable.
bool _drawnSamples_
whether drawSamples has been already called.
std::vector< Idx > varOrder() const
returns variable order indexes
std::string samplesLabelAt(Idx row, Idx col) const
generate and stock database, returns log2likelihood using ProgressNotifier as notification
DatabaseTable toDatabaseTable(bool useLabels=true) const
generates a DatabaseVectInRAM
void setVarOrderFromCSV(const std::string &csvFileURL, const std::string &csvSeparator=",")
change columns order according to a csv file
Size samplesNbRows() const
generate and stock database, returns log2likelihood using ProgressNotifier as notification
std::string _label_(const std::vector< Idx > &row, const DiscreteVariable &v, Idx i) const
return the final string for a label (taking into account the behavior for DiscretizedVariable) from a...
std::vector< std::vector< Idx > > database() const
generates database according to bn into a std::vector
void setDiscretizedLabelModeRandom()
set the behaviour of sampling for discretized variable to uniformly draw double value
Idx samplesAt(Idx row, Idx col) const
generate and stock database, returns log2likelihood using ProgressNotifier as notification
double _log2likelihood_
log2Likelihood of generated samples
const BayesNet< GUM_SCALAR > & bn(void)
return const ref to the Bayes Net
BNDatabaseGenerator(const BayesNet< GUM_SCALAR > &bn)
default constructor
void setDiscretizedLabelModeInterval()
set the behaviour of sampling for discretized variable to select the label : "[min,...
Bijection< std::string, NodeId > _names2ids_
bijection nodes names
std::vector< std::vector< Idx > > _database_
generated database
DiscretizedLabelMode _discretizedLabelMode_
Size samplesNbCols() const
generate and stock database, returns log2likelihood using ProgressNotifier as notification
const BayesNet< GUM_SCALAR > & _bn_
Bayesian network.
void setAntiTopologicalVarOrder()
set columns in antiTopoligical order
double log2likelihood() const
returns log2Likelihood of generated samples
void setTopologicalVarOrder()
set columns in topoligical order
void setDiscretizedLabelModeMedian()
set the behaviour of sampling for discretized variable to deterministic select double median of inter...
double drawSamples(Size nbSamples)
generate and stock database, returns log2likelihood using ProgressNotifier as notification
std::vector< Idx > _varOrder_
variable order in generated database
std::vector< std::string > varOrderNames() const
returns variable order.
std::vector< Idx > _varOrderFromCSV_(const std::string &csvFileURL, const std::string &csvSeparator=",") const
returns varOrder from a csv file
Size _nbVars_
number of variables
void setVarOrder(const std::vector< Idx > &varOrder)
change columns order
void toCSV(const std::string &csvFileURL, bool useLabels=true, bool append=false, std::string csvSeparator=",", bool checkOnAppend=false) const
generates csv representing the generated database
void setRandomVarOrder()
set columns in random order
~BNDatabaseGenerator()
destructor
The class for storing a record in a database.
DBTranslatedValueType getValType() const
returns the type of values handled by the translator
The class representing a tabular database as used by learning tasks.
std::size_t insertTranslator(const DBTranslator &translator, const std::size_t input_column, const bool unique_column=true)
insert a new translator into the database table
const DBTranslator & translator(const std::size_t k, const bool k_is_input_col=false) const
returns either the kth translator of the database table or the first one reading the kth column of th...
void insertRow(const std::vector< std::string > &new_row) override
insert a new row at the end of the database
#define GUM_ERROR(type, msg)
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Size Idx
Type for indexes.
Size NodeId
Type for node ids.
std::mt19937 & randomGenerator()
define a random_engine with correct seed
double randomProba()
Returns a random double between 0 and 1 included (i.e.
include the inlined functions if necessary
#define GUM_EMIT1(signal, arg1)
#define GUM_EMIT2(signal, arg1, arg2)
Class used to compute response times for benchmark purposes.