xtensor
 
Loading...
Searching...
No Matches
xcsv.hpp
1/***************************************************************************
2 * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht *
3 * Copyright (c) QuantStack *
4 * *
5 * Distributed under the terms of the BSD 3-Clause License. *
6 * *
7 * The full license is in the file LICENSE, distributed with this software. *
8 ****************************************************************************/
9
10#ifndef XTENSOR_CSV_HPP
11#define XTENSOR_CSV_HPP
12
13#include <istream>
14#include <iterator>
15#include <sstream>
16#include <string>
17#include <utility>
18
19#include "../containers/xtensor.hpp"
20#include "../core/xtensor_config.hpp"
21
22namespace xt
23{
24
25 /**************************************
26 * load_csv and dump_csv declarations *
27 **************************************/
28
29 template <class T, class A = std::allocator<T>>
31
32 template <class T, class A = std::allocator<T>>
33 xcsv_tensor<T, A> load_csv(
34 std::istream& stream,
35 const char delimiter = ',',
36 const std::size_t skip_rows = 0,
37 const std::ptrdiff_t max_rows = -1,
38 const std::string comments = "#"
39 );
40
41 template <class E>
42 void dump_csv(std::ostream& stream, const xexpression<E>& e);
43
44 /*****************************************
45 * load_csv and dump_csv implementations *
46 *****************************************/
47
48 namespace detail
49 {
50 template <class T>
51 inline T lexical_cast(const std::string& cell)
52 {
53 T res;
54 std::istringstream iss(cell);
55 iss >> res;
56 return res;
57 }
58
59 template <>
60 inline std::string lexical_cast(const std::string& cell)
61 {
62 size_t first = cell.find_first_not_of(' ');
63 if (first == std::string::npos)
64 {
65 return cell;
66 }
67
68 size_t last = cell.find_last_not_of(' ');
69 return cell.substr(first, last == std::string::npos ? cell.size() : last - first + 1);
70 }
71
72 template <>
73 inline float lexical_cast<float>(const std::string& cell)
74 {
75 return std::stof(cell);
76 }
77
78 template <>
79 inline double lexical_cast<double>(const std::string& cell)
80 {
81 return std::stod(cell);
82 }
83
84 template <>
85 inline long double lexical_cast<long double>(const std::string& cell)
86 {
87 return std::stold(cell);
88 }
89
90 template <>
91 inline int lexical_cast<int>(const std::string& cell)
92 {
93 return std::stoi(cell);
94 }
95
96 template <>
97 inline signed char lexical_cast<signed char>(const std::string& cell)
98 {
99 return static_cast<signed char>(std::stoi(cell));
100 }
101
102 template <>
103 inline unsigned char lexical_cast<unsigned char>(const std::string& cell)
104 {
105 return static_cast<unsigned char>(std::stoul(cell));
106 }
107
108 template <>
109 inline long lexical_cast<long>(const std::string& cell)
110 {
111 return std::stol(cell);
112 }
113
114 template <>
115 inline long long lexical_cast<long long>(const std::string& cell)
116 {
117 return std::stoll(cell);
118 }
119
120 template <>
121 inline unsigned int lexical_cast<unsigned int>(const std::string& cell)
122 {
123 return static_cast<unsigned int>(std::stoul(cell));
124 }
125
126 template <>
127 inline unsigned long lexical_cast<unsigned long>(const std::string& cell)
128 {
129 return std::stoul(cell);
130 }
131
132 template <>
133 inline unsigned long long lexical_cast<unsigned long long>(const std::string& cell)
134 {
135 return std::stoull(cell);
136 }
137
138 template <class ST, class T, class OI>
139 ST load_csv_row(std::istream& row_stream, OI output, std::string cell, const char delimiter = ',')
140 {
141 ST length = 0;
142 while (std::getline(row_stream, cell, delimiter))
143 {
144 *output++ = lexical_cast<T>(cell);
145 ++length;
146 }
147 return length;
148 }
149 }
150
162 template <class T, class A>
163 xcsv_tensor<T, A> load_csv(
164 std::istream& stream,
165 const char delimiter,
166 const std::size_t skip_rows,
167 const std::ptrdiff_t max_rows,
168 const std::string comments
169 )
170 {
171 using tensor_type = xcsv_tensor<T, A>;
172 using storage_type = typename tensor_type::storage_type;
173 using size_type = typename tensor_type::size_type;
174 using inner_shape_type = typename tensor_type::inner_shape_type;
175 using inner_strides_type = typename tensor_type::inner_strides_type;
176 using output_iterator = std::back_insert_iterator<storage_type>;
177
178 storage_type data;
179 size_type nbrow = 0, nbcol = 0, nhead = 0;
180 {
181 output_iterator output(data);
182 std::string row, cell;
183 while (std::getline(stream, row))
184 {
185 if (nhead < skip_rows)
186 {
187 ++nhead;
188 continue;
189 }
190 if (std::equal(comments.begin(), comments.end(), row.begin()))
191 {
192 continue;
193 }
194 if (0 < max_rows && max_rows <= static_cast<const long long>(nbrow))
195 {
196 break;
197 }
198 std::stringstream row_stream(row);
199 nbcol = detail::load_csv_row<size_type, T, output_iterator>(row_stream, output, cell, delimiter);
200 ++nbrow;
201 }
202 }
203 inner_shape_type shape = {nbrow, nbcol};
204 inner_strides_type strides; // no need for initializer list for stack-allocated strides_type
205 size_type data_size = compute_strides(shape, layout_type::row_major, strides);
206 // Sanity check for data size.
207 if (data.size() != data_size)
208 {
209 XTENSOR_THROW(std::runtime_error, "Inconsistent row lengths in CSV");
210 }
211 return tensor_type(std::move(data), std::move(shape), std::move(strides));
212 }
213
220 template <class E>
221 void dump_csv(std::ostream& stream, const xexpression<E>& e)
222 {
223 using size_type = typename E::size_type;
224 const E& ex = e.derived_cast();
225 if (ex.dimension() == 1)
226 {
227 const size_type n = ex.shape()[0];
228 for (size_type i = 0; i != n; ++i)
229 {
230 stream << ex(i);
231 if (i != n - 1)
232 {
233 stream << ',';
234 }
235 }
236 stream << std::endl;
237 }
238 else if (ex.dimension() == 2)
239 {
240 const size_type nbrows = ex.shape()[0];
241 const size_type nbcols = ex.shape()[1];
242 for (size_type r = 0; r != nbrows; ++r)
243 {
244 for (size_type c = 0; c != nbcols; ++c)
245 {
246 stream << ex(r, c);
247 if (c != nbcols - 1)
248 {
249 stream << ',';
250 }
251 }
252 stream << std::endl;
253 }
254 }
255 else
256 {
257 XTENSOR_THROW(std::runtime_error, "Only 1-D and 2-D expressions can be serialized to CSV");
258 }
259 }
260
261 struct xcsv_config
262 {
263 char delimiter;
264 std::size_t skip_rows;
265 std::ptrdiff_t max_rows;
266 std::string comments;
267
268 xcsv_config()
269 : delimiter(',')
270 , skip_rows(0)
271 , max_rows(-1)
272 , comments("#")
273 {
274 }
275 };
276
277 template <class E>
278 void dump_csv(std::ostream& stream, const xexpression<E>& e, const xcsv_config& config)
279 {
280 using size_type = typename E::size_type;
281 const E& ex = e.derived_cast();
282 if (ex.dimension() == 1)
283 {
284 const size_type n = ex.shape()[0];
285 for (size_type i = 0; i != n; ++i)
286 {
287 stream << ex(i);
288 if (i != n - 1)
289 {
290 stream << config.delimiter;
291 }
292 }
293 stream << std::endl;
294 }
295 else if (ex.dimension() == 2)
296 {
297 const size_type nbrows = ex.shape()[0];
298 const size_type nbcols = ex.shape()[1];
299 for (size_type r = 0; r != nbrows; ++r)
300 {
301 for (size_type c = 0; c != nbcols; ++c)
302 {
303 stream << ex(r, c);
304 if (c != nbcols - 1)
305 {
306 stream << config.delimiter;
307 }
308 }
309 stream << std::endl;
310 }
311 }
312 else
313 {
314 XTENSOR_THROW(std::runtime_error, "Only 1-D and 2-D expressions can be serialized to CSV");
315 }
316 }
317
318 template <class E>
319 void load_file(std::istream& stream, xexpression<E>& e, const xcsv_config& config)
320 {
321 e.derived_cast() = load_csv<typename E::value_type>(
322 stream,
323 config.delimiter,
324 config.skip_rows,
325 config.max_rows,
326 config.comments
327 );
328 }
329
330 template <class E>
331 void dump_file(std::ostream& stream, const xexpression<E>& e, const xcsv_config& config)
332 {
333 dump_csv(stream, e, config);
334 }
335}
336
337#endif
Base class for xexpressions.
derived_type & derived_cast() &noexcept
Returns a reference to the actual derived type of the xexpression.
Dense multidimensional container with tensor semantic and fixed dimension.
Definition xtensor.hpp:86
std::size_t compute_strides(const shape_type &shape, layout_type l, strides_type &strides)
Compute the strides given the shape and the layout of an array.
Definition xstrides.hpp:570
auto strides(const E &e, stride_type type=stride_type::normal) noexcept
Get strides of an object.
Definition xstrides.hpp:250
standard mathematical functions for xexpressions
xcsv_tensor< T, A > load_csv(std::istream &stream, const char delimiter=',', const std::size_t skip_rows=0, const std::ptrdiff_t max_rows=-1, const std::string comments="#")
Load tensor from CSV.
Definition xcsv.hpp:163
auto row(E &&e, std::ptrdiff_t index)
Constructs and returns a row (sliced view) on the specified expression.
Definition xview.hpp:1912
void dump_csv(std::ostream &stream, const xexpression< E > &e)
Dump tensor to CSV.
Definition xcsv.hpp:221