30#ifndef DOXYGEN_SHOULD_SKIP_THIS
43 const std::string& filename,
44 const std::string& delimiter,
45 const char commentmarker,
46 const char quoteMarker) :
48 _delimiter_(delimiter), _spaces_(
" \t\r"), _delimiterPlusSpaces_(_delimiter_ + _spaces_),
49 _nbLine_(std::size_t(0)), _commentMarker_(commentmarker), _quoteMarker_(quoteMarker),
50 _emptyData_(true), _instream_(&instream), _filename_(filename) {
51 GUM_CONSTRUCTOR(CSVParser);
56 CSVParser::~CSVParser() { GUM_DESTRUCTOR(CSVParser); }
59 void CSVParser::_getNextTriplet_(
const std::string& str,
60 std::size_t& first_letter_token,
61 std::size_t& next_token,
62 std::size_t& last_letter_token,
63 std::size_t from)
const {
64 first_letter_token = str.find_first_not_of(_spaces_, from);
66 if (first_letter_token == std::string::npos) {
67 next_token = last_letter_token = first_letter_token;
71 if (str.at(first_letter_token) == _quoteMarker_) {
72 last_letter_token = _correspondingQuoteMarker_(str, first_letter_token);
74 if (last_letter_token == std::string::npos)
75 GUM_SYNTAX_ERROR(
"String quote missing", _filename_, (Size)nbLine(), first_letter_token);
77 next_token = str.find_first_of(_delimiter_, last_letter_token + 1);
78 std::size_t next_char = str.find_first_not_of(_spaces_, last_letter_token + 1);
80 if (next_char < next_token) {
81 GUM_SYNTAX_ERROR(
"Delimiter missing", _filename_, (Size)nbLine(), next_char);
85 first_letter_token += 1;
86 last_letter_token -= 1;
88 next_token = str.find_first_of(_delimiter_, first_letter_token);
90 if (next_token == std::string::npos) {
91 last_letter_token = str.find_last_not_of(_spaces_, next_token);
92 }
else if (next_token == first_letter_token) {
93 last_letter_token = first_letter_token;
95 last_letter_token = str.find_last_not_of(_delimiterPlusSpaces_, next_token - 1);
101 void CSVParser::_tokenize_(
const std::string& s) {
103 std::size_t commentMarker = s.find_first_of(_commentMarker_, 0);
104 std::size_t quoteMarker = s.find_first_of(_quoteMarker_, 0);
105 std::size_t quoteMarkerEnd;
107 while (quoteMarker < commentMarker) {
108 quoteMarkerEnd = _correspondingQuoteMarker_(s, quoteMarker);
110 if (quoteMarkerEnd == std::string::npos)
111 GUM_SYNTAX_ERROR(
"String quote missing", _filename_, (Size)nbLine(), quoteMarker);
113 while (commentMarker < quoteMarkerEnd) {
114 commentMarker = s.find_first_of(_commentMarker_, commentMarker + 1);
117 quoteMarker = s.find_first_of(_quoteMarker_, quoteMarkerEnd + 1);
120 std::string str = s.substr(0, commentMarker);
122 std::size_t counter = 0, first_letter_token, next_token, last_letter_token;
124 _getNextTriplet_(str, first_letter_token, next_token, last_letter_token, 0);
126 while ((std::string::npos != first_letter_token)
127 && (std::string::npos != last_letter_token)) {
128 if (_data_.size() <= counter) _data_.resize(counter + 1);
130 if (first_letter_token == next_token) {
131 _data_[counter] =
"";
132 }
else if (last_letter_token >= first_letter_token) {
133 const std::size_t fieldlength = last_letter_token + 1 - first_letter_token;
134 _data_[counter].resize(fieldlength);
135 _data_[counter].assign(str, first_letter_token, fieldlength);
137 _data_[counter] =
"";
142 if (next_token == std::string::npos)
break;
144 _getNextTriplet_(str, first_letter_token, next_token, last_letter_token, next_token + 1);
148 if ((first_letter_token == std::string::npos) && (last_letter_token == first_letter_token)
149 && (next_token == first_letter_token)) {
151 _data_.resize(counter);
152 _data_[counter - 1] =
"";
154 _data_.resize(counter);
162 void CSVParser::useNewStream(std::istream& instream,
163 const std::string& delimiter,
164 const char commentmarker,
165 const char quoteMarker) {
167 _delimiter_ = delimiter;
169 _delimiterPlusSpaces_ = _delimiter_ + _spaces_;
170 _nbLine_ = std::size_t(0);
171 _commentMarker_ = commentmarker;
172 _quoteMarker_ = quoteMarker;
174 _instream_ = &instream;
Class for fast parsing of CSV file (never more than one line in application memory).
CSVParser(std::istream &in, const std::string &filename, const std::string &delimiter=",", const char commentmarker='#', const char quoteMarker='"')
default constructor
#define GUM_SYNTAX_ERROR(msg, filename, line, column)
include the inlined functions if necessary
gum is the global namespace for all aGrUM entities