xtensor
 
Loading...
Searching...
No Matches
xcsv.hpp
1/***************************************************************************
2 * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht *
3 * Copyright (c) QuantStack *
4 * *
5 * Distributed under the terms of the BSD 3-Clause License. *
6 * *
7 * The full license is in the file LICENSE, distributed with this software. *
8 ****************************************************************************/
9
10#ifndef XTENSOR_CSV_HPP
11#define XTENSOR_CSV_HPP
12
13#include <istream>
14#include <iterator>
15#include <sstream>
16#include <string>
17#include <utility>
18
19#include "../containers/xtensor.hpp"
20#include "../core/xtensor_config.hpp"
21
22namespace xt
23{
24
25 /**************************************
26 * load_csv and dump_csv declarations *
27 **************************************/
28
29 template <class T, class A = std::allocator<T>>
31
32 template <class T, class A = std::allocator<T>>
33 xcsv_tensor<T, A> load_csv(
34 std::istream& stream,
35 const char delimiter = ',',
36 const std::size_t skip_rows = 0,
37 const std::ptrdiff_t max_rows = -1,
38 const std::string comments = "#"
39 );
40
41 template <class E>
42 void dump_csv(std::ostream& stream, const xexpression<E>& e);
43
44 /*****************************************
45 * load_csv and dump_csv implementations *
46 *****************************************/
47
48 namespace detail
49 {
50 template <class T>
51 inline T lexical_cast(const std::string& cell)
52 {
53 T res;
54 std::istringstream iss(cell);
55 iss >> res;
56 return res;
57 }
58
59 template <>
60 inline std::string lexical_cast(const std::string& cell)
61 {
62 size_t first = cell.find_first_not_of(' ');
63 if (first == std::string::npos)
64 {
65 return cell;
66 }
67
68 size_t last = cell.find_last_not_of(' ');
69 return cell.substr(first, last == std::string::npos ? cell.size() : last + 1);
70 }
71
72 template <>
73 inline float lexical_cast<float>(const std::string& cell)
74 {
75 return std::stof(cell);
76 }
77
78 template <>
79 inline double lexical_cast<double>(const std::string& cell)
80 {
81 return std::stod(cell);
82 }
83
84 template <>
85 inline long double lexical_cast<long double>(const std::string& cell)
86 {
87 return std::stold(cell);
88 }
89
90 template <>
91 inline int lexical_cast<int>(const std::string& cell)
92 {
93 return std::stoi(cell);
94 }
95
96 template <>
97 inline long lexical_cast<long>(const std::string& cell)
98 {
99 return std::stol(cell);
100 }
101
102 template <>
103 inline long long lexical_cast<long long>(const std::string& cell)
104 {
105 return std::stoll(cell);
106 }
107
108 template <>
109 inline unsigned int lexical_cast<unsigned int>(const std::string& cell)
110 {
111 return static_cast<unsigned int>(std::stoul(cell));
112 }
113
114 template <>
115 inline unsigned long lexical_cast<unsigned long>(const std::string& cell)
116 {
117 return std::stoul(cell);
118 }
119
120 template <>
121 inline unsigned long long lexical_cast<unsigned long long>(const std::string& cell)
122 {
123 return std::stoull(cell);
124 }
125
126 template <class ST, class T, class OI>
127 ST load_csv_row(std::istream& row_stream, OI output, std::string cell, const char delimiter = ',')
128 {
129 ST length = 0;
130 while (std::getline(row_stream, cell, delimiter))
131 {
132 *output++ = lexical_cast<T>(cell);
133 ++length;
134 }
135 return length;
136 }
137 }
138
150 template <class T, class A>
151 xcsv_tensor<T, A> load_csv(
152 std::istream& stream,
153 const char delimiter,
154 const std::size_t skip_rows,
155 const std::ptrdiff_t max_rows,
156 const std::string comments
157 )
158 {
159 using tensor_type = xcsv_tensor<T, A>;
160 using storage_type = typename tensor_type::storage_type;
161 using size_type = typename tensor_type::size_type;
162 using inner_shape_type = typename tensor_type::inner_shape_type;
163 using inner_strides_type = typename tensor_type::inner_strides_type;
164 using output_iterator = std::back_insert_iterator<storage_type>;
165
166 storage_type data;
167 size_type nbrow = 0, nbcol = 0, nhead = 0;
168 {
169 output_iterator output(data);
170 std::string row, cell;
171 while (std::getline(stream, row))
172 {
173 if (nhead < skip_rows)
174 {
175 ++nhead;
176 continue;
177 }
178 if (std::equal(comments.begin(), comments.end(), row.begin()))
179 {
180 continue;
181 }
182 if (0 < max_rows && max_rows <= static_cast<const long long>(nbrow))
183 {
184 break;
185 }
186 std::stringstream row_stream(row);
187 nbcol = detail::load_csv_row<size_type, T, output_iterator>(row_stream, output, cell, delimiter);
188 ++nbrow;
189 }
190 }
191 inner_shape_type shape = {nbrow, nbcol};
192 inner_strides_type strides; // no need for initializer list for stack-allocated strides_type
193 size_type data_size = compute_strides(shape, layout_type::row_major, strides);
194 // Sanity check for data size.
195 if (data.size() != data_size)
196 {
197 XTENSOR_THROW(std::runtime_error, "Inconsistent row lengths in CSV");
198 }
199 return tensor_type(std::move(data), std::move(shape), std::move(strides));
200 }
201
208 template <class E>
209 void dump_csv(std::ostream& stream, const xexpression<E>& e)
210 {
211 using size_type = typename E::size_type;
212 const E& ex = e.derived_cast();
213 if (ex.dimension() == 1)
214 {
215 const size_type n = ex.shape()[0];
216 for (size_type i = 0; i != n; ++i)
217 {
218 stream << ex(i);
219 if (i != n - 1)
220 {
221 stream << ',';
222 }
223 }
224 stream << std::endl;
225 }
226 else if (ex.dimension() == 2)
227 {
228 const size_type nbrows = ex.shape()[0];
229 const size_type nbcols = ex.shape()[1];
230 for (size_type r = 0; r != nbrows; ++r)
231 {
232 for (size_type c = 0; c != nbcols; ++c)
233 {
234 stream << ex(r, c);
235 if (c != nbcols - 1)
236 {
237 stream << ',';
238 }
239 }
240 stream << std::endl;
241 }
242 }
243 else
244 {
245 XTENSOR_THROW(std::runtime_error, "Only 1-D and 2-D expressions can be serialized to CSV");
246 }
247 }
248
249 struct xcsv_config
250 {
251 char delimiter;
252 std::size_t skip_rows;
253 std::ptrdiff_t max_rows;
254 std::string comments;
255
256 xcsv_config()
257 : delimiter(',')
258 , skip_rows(0)
259 , max_rows(-1)
260 , comments("#")
261 {
262 }
263 };
264
265 template <class E>
266 void load_file(std::istream& stream, xexpression<E>& e, const xcsv_config& config)
267 {
269 stream,
270 config.delimiter,
271 config.skip_rows,
272 config.max_rows,
273 config.comments
274 );
275 }
276
277 template <class E>
278 void dump_file(std::ostream& stream, const xexpression<E>& e, const xcsv_config&)
279 {
280 dump_csv(stream, e);
281 }
282}
283
284#endif
Base class for xexpressions.
derived_type & derived_cast() &noexcept
Returns a reference to the actual derived type of the xexpression.
Dense multidimensional container with tensor semantic and fixed dimension.
Definition xtensor.hpp:86
std::size_t compute_strides(const shape_type &shape, layout_type l, strides_type &strides)
Compute the strides given the shape and the layout of an array.
Definition xstrides.hpp:566
auto strides(const E &e, stride_type type=stride_type::normal) noexcept
Get strides of an object.
Definition xstrides.hpp:248
standard mathematical functions for xexpressions
xcsv_tensor< T, A > load_csv(std::istream &stream, const char delimiter=',', const std::size_t skip_rows=0, const std::ptrdiff_t max_rows=-1, const std::string comments="#")
Load tensor from CSV.
Definition xcsv.hpp:151
auto row(E &&e, std::ptrdiff_t index)
Constructs and returns a row (sliced view) on the specified expression.
Definition xview.hpp:1911
void dump_csv(std::ostream &stream, const xexpression< E > &e)
Dump tensor to CSV.
Definition xcsv.hpp:209