aGrUM 2.3.2
a C++ library for (probabilistic) graphical models
recordCounter.h
Go to the documentation of this file.
1/****************************************************************************
2 * This file is part of the aGrUM/pyAgrum library. *
3 * *
4 * Copyright (c) 2005-2025 by *
5 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
6 * - Christophe GONZALES(_at_AMU) *
7 * *
8 * The aGrUM/pyAgrum library is free software; you can redistribute it *
9 * and/or modify it under the terms of either : *
10 * *
11 * - the GNU Lesser General Public License as published by *
12 * the Free Software Foundation, either version 3 of the License, *
13 * or (at your option) any later version, *
14 * - the MIT license (MIT), *
15 * - or both in dual license, as here. *
16 * *
17 * (see https://agrum.gitlab.io/articles/dual-licenses-lgplv3mit.html) *
18 * *
19 * This aGrUM/pyAgrum library is distributed in the hope that it will be *
20 * useful, but WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, *
21 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES MERCHANTABILITY or FITNESS *
22 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE *
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER *
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
25 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
26 * OTHER DEALINGS IN THE SOFTWARE. *
27 * *
28 * See LICENCES for more details. *
29 * *
30 * SPDX-FileCopyrightText: Copyright 2005-2025 *
31 * - Pierre-Henri WUILLEMIN(_at_LIP6) *
32 * - Christophe GONZALES(_at_AMU) *
33 * SPDX-License-Identifier: LGPL-3.0-or-later OR MIT *
34 * *
35 * Contact : info_at_agrum_dot_org *
36 * homepage : http://agrum.gitlab.io *
37 * gitlab : https://gitlab.com/agrumery/agrum *
38 * *
39 ****************************************************************************/
40
41
50#ifndef GUM_LEARNING_RECORD_COUNTER_H
51#define GUM_LEARNING_RECORD_COUNTER_H
52
53#include <sstream>
54#include <string>
55#include <utility>
56#include <vector>
57
58#include <agrum/agrum.h>
59
63
64namespace gum {
65
66 namespace learning {
67
128 public:
129 // ##########################################################################
131 // ##########################################################################
133
135
153 const std::vector< std::pair< std::size_t, std::size_t > >& ranges,
154 const Bijection< NodeId, std::size_t >& nodeId2columns
156
158
170 const Bijection< NodeId, std::size_t >& nodeId2columns
172
175
178
180 virtual RecordCounter* clone() const;
181
183 virtual ~RecordCounter();
184
186
187
188 // ##########################################################################
190 // ##########################################################################
191
193
196
199
201
202
203 // ##########################################################################
205 // ##########################################################################
206
208
210 void clear();
211
213
217 virtual void setNumberOfThreads(Size nb);
218
228 void setMinNbRowsPerThread(const std::size_t nb) const;
229
231 std::size_t minNbRowsPerThread() const;
232
234
271 const std::vector< double >& counts(const IdCondSet& ids,
272 const bool check_discrete_vars = false);
273
275
281 void setRanges(const std::vector< std::pair< std::size_t, std::size_t > >& new_ranges);
282
285
287 const std::vector< std::pair< std::size_t, std::size_t > >& ranges() const;
288
290
293 template < typename GUM_SCALAR >
294 void setBayesNet(const BayesNet< GUM_SCALAR >& new_bn);
295
297
301
303 const DatabaseTable& database() const;
304
306
307#ifndef DOXYGEN_SHOULD_SKIP_THIS
308
309 private:
310 // the parsers used by the threads
311 std::vector< ThreadData< DBRowGeneratorParser > > _parsers_;
312
313 // the set of ranges of the database's rows indices over which the user
314 // wishes to perform the counting
315 std::vector< std::pair< std::size_t, std::size_t > > _ranges_;
316
317 // the ranges actually used by the threads: there is a hopefully clever
318 // algorithm that split the rows ranges into another set of ranges that
319 // are assigned to the threads. For instance, if the database has 1000
320 // rows and there are 10 threads, each one will be passed a set of 100
321 // rows. These sets are precisely what are stored in the field below
322 mutable std::vector< std::pair< std::size_t, std::size_t > > _thread_ranges_;
323
324 // the mapping from the NodeIds of the variables to the indices of the
325 // columns in the database
326 Bijection< NodeId, std::size_t > _nodeId2columns_;
327
328 // the last database-parsed counting
329 std::vector< double > _last_DB_counting_;
330
331 // the ids of the nodes for the last database-parsed counting
332 IdCondSet _last_DB_ids_;
333
334 // the last counting deduced from _last_DB_counting_
335 std::vector< double > _last_nonDB_counting_;
336
337 // the ids of the nodes of last counting deduced from _last_DB_counting_
338 IdCondSet _last_nonDB_ids_;
339
340 // the min number of rows that a thread should process in a
341 // multithreading context
342 mutable std::size_t _min_nb_rows_per_thread_{512};
343
344 // returns a mapping from the nodes ids to the columns of the database
345 // for a given sequence of ids. This is especially convenient when
346 // _nodeId2columns_ is empty (which means that there is an identity mapping)
347 HashTable< NodeId, std::size_t > _getNodeIds2Columns_(const IdCondSet& ids) const;
348
350 std::vector< double >& _extractFromCountings_(const IdCondSet& subset_ids,
351 const IdCondSet& superset_ids,
352 const std::vector< double >& superset_vect);
353
355 std::vector< double >& _countFromDatabase_(const IdCondSet& ids);
356
358
361 void _checkRanges_(
362 const std::vector< std::pair< std::size_t, std::size_t > >& new_ranges) const;
363
365
367 void _checkDiscreteVariables_(const IdCondSet& ids) const;
368
370
373 void _raiseCheckException_(const std::vector< std::string >& bad_vars) const;
374
376 void _dispatchRangesToThreads_();
377
378#endif /* DOXYGEN_SHOULD_SKIP_THIS */
379 };
380
381 } /* namespace learning */
382
383} /* namespace gum */
384
387
389#ifndef GUM_NO_INLINE
391#endif /* GUM_NO_INLINE */
392
393#endif /* GUM_LEARNING_RECORD_COUNTER_H */
The class for parsing DatabaseTable rows and generating output rows.
The class for generic Hash Tables.
Definition hashTable.h:637
ThreadNumberManager(Size nb_threads=0)
default constructor
the class used to read a row in the database and to transform it into a set of DBRow instances that c...
The class representing a tabular database as used by learning tasks.
A class for storing a pair of sets of NodeIds, the second one corresponding to a conditional set.
Definition idCondSet.h:214
RecordCounter & operator=(RecordCounter &&from)
move operator
RecordCounter(const RecordCounter &from)
copy constructor
std::size_t minNbRowsPerThread() const
returns the minimum of rows that each thread should process
RecordCounter(const DBRowGeneratorParser &parser, const std::vector< std::pair< std::size_t, std::size_t > > &ranges, const Bijection< NodeId, std::size_t > &nodeId2columns=Bijection< NodeId, std::size_t >())
default constructor
RecordCounter & operator=(const RecordCounter &from)
copy operator
const std::vector< std::pair< std::size_t, std::size_t > > & ranges() const
returns the current ranges
void setRanges(const std::vector< std::pair< std::size_t, std::size_t > > &new_ranges)
sets new ranges to perform the counting
virtual void setNumberOfThreads(Size nb)
sets the number max of threads that can be used
void setBayesNet(const BayesNet< GUM_SCALAR > &new_bn)
assign a new Bayes net to all the counter's generators depending on a BN
const DatabaseTable & database() const
returns the database on which we perform the counts
void clear()
clears all the last database-parsed counting from memory
virtual RecordCounter * clone() const
virtual copy constructor
RecordCounter(RecordCounter &&from)
move constructor
void clearRanges()
reset the ranges to the one range corresponding to the whole database
const Bijection< NodeId, std::size_t > & nodeId2Columns() const
returns the mapping from ids to column positions in the database
RecordCounter(const DBRowGeneratorParser &parser, const Bijection< NodeId, std::size_t > &nodeId2columns=Bijection< NodeId, std::size_t >())
default constructor
const std::vector< double > & counts(const IdCondSet &ids, const bool check_discrete_vars=false)
returns the counts over all the variables in an IdCondSet
void setMinNbRowsPerThread(const std::size_t nb) const
changes the number min of rows a thread should process in a multithreading context
virtual ~RecordCounter()
destructor
std::size_t Size
In aGrUM, hashed values are unsigned long int.
Definition types.h:74
A class used by learning caches to represent uniquely sets of variables.
include the inlined functions if necessary
Definition CSVParser.h:54
gum is the global namespace for all aGrUM entities
Definition agrum.h:46
The class that computes counts of observations from the database.
A wrapper that enables to store data in a way that prevents false cacheline sharing.