Coverage for src/thunderlab/tabledata.py: 85%
2113 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-18 22:10 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-18 22:10 +0000
1"""Tables with hierarchical headers and units
3## Classes
5- `class TableData`: tables with hierarchical header including units
6 and column-specific formats. Kind of similar to a pandas data frame,
7 but without index column and with intuitive numpy-style indexing and
8 nicely formatted output to csv, markdown, html, and latex.
11## Helper functions
13- `write()`: shortcut for constructing and writing a TableData.
14- `latex_unit()`: translate unit string into SIunit LaTeX code.
15- `index2aa()`: convert an integer into an alphabetical representation.
16- `aa2index()`: convert an alphabetical representation to an index.
19## Configuration
21- `add_write_table_config()`: add parameter specifying how to write a table to a file as a new section to a configuration.
22- `write_table_args()`: translates a configuration to the respective parameter names for writing a table to a file.
24"""
26import sys
27import os
28import re
29import math as m
30import numpy as np
31from pathlib import Path
32from itertools import product
33from io import StringIO
34try:
35 import pandas as pd
36 has_pandas = True
37except ImportError:
38 has_pandas = False
41__pdoc__ = {}
42__pdoc__['TableData.__contains__'] = True
43__pdoc__['TableData.__len__'] = True
44__pdoc__['TableData.__iter__'] = True
45__pdoc__['TableData.__next__'] = True
46__pdoc__['TableData.__setupkey__'] = True
47__pdoc__['TableData.__call__'] = True
48__pdoc__['TableData.__getitem__'] = True
49__pdoc__['TableData.__setitem__'] = True
50__pdoc__['TableData.__delitem__'] = True
51__pdoc__['TableData.__str__'] = True
54default_missing_str = '-'
55"""Default string indicating nan data elements when outputting data."""
57default_missing_inputs = ['na', 'NA', 'nan', 'NAN', '-']
58"""Default strings that are translated to nan when loading table data."""
61class TableData(object):
62 """Table with numpy-style indexing and hierarchical header including units and formats.
64 Parameters
65 ----------
66 data: str, stream, ndarray
67 - a filename: load table from file with name `data`.
68 - a stream/file handle: load table from that stream.
69 - 1-D or 2-D ndarray of data: the data of the table.
70 Requires als a specified `header`.
71 - pandas data frame.
72 header: TableData, dict, list of str, list of list of str
73 Header labels for each column.
74 See `set_labels()' for details.
75 units: None, TableData, dict, list of str, str
76 Optional unit strings for each column.
77 See `set_units()' for details.
78 formats: None, TableData, dict, list of str, str
79 Optional format strings for each column.
80 See `set_formats()' for details.
81 descriptions: None, TableData, dict, list of str, str
82 Optional description strings for each column.
83 See `set_descriptions()' for details.
84 missing: list of str
85 Missing data are indicated by one of these strings.
86 sep: str or None
87 If `data` is a file, force `sep` as column separator.
88 stop: str or None
89 If a line matches `stop`, stop reading the file. `stop`
90 can be an empty string to stop reading at the first empty
91 line.
93 Manipulate table header
94 -----------------------
96 Each column of the table has a label (the name of the column), a
97 unit, and a format specifier. Sections group columns into a hierarchy.
99 - `__init__()`: initialize a TableData from data or a file.
100 - `append()`: append column to the table.
101 - `insert()`: insert a table column at a given position.
102 - `remove()`: remove columns from the table.
103 - `section()`: the section name of a specified column.
104 - `set_section()`: set a section name.
105 - `append_section()`: add sections to the table header.
106 - `insert_section()`: insert a section at a given position of the table header.
107 - `label()`: the name of a column.
108 - `set_label()`: set the name of a column.
109 - `set_labels()`: set the labels of some columns.
110 - `unit()`: the unit of a column.
111 - `set_unit()`: set the unit of a column.
112 - `set_units()`: set the units of some columns.
113 - `format()`: the format string of the column.
114 - `set_format()`: set the format string of a column.
115 - `set_formats()`: set the format strings of some columns.
116 - `description()`: the description of a column.
117 - `set_description()`: set the description of a column.
118 - `set_descriptions()`: set the descriptions of some columns.
120 For example:
121 ```
122 tf = TableData('data.csv')
123 ```
124 loads a table directly from a file. See `load()` for details.
125 ```
126 tf = TableData(np.random.randn(4,3), header=['aaa', 'bbb', 'ccc'], units=['m', 's', 'g'], formats='%.2f')
127 ```
128 results in
129 ``` plain
130 aaa bbb ccc
131 m s g
132 1.45 0.01 0.16
133 -0.74 -0.58 -1.34
134 -2.06 0.08 1.47
135 -0.43 0.60 1.38
136 ```
138 A more elaborate way to construct a table is:
139 ```
140 df = TableData()
141 # first column with section names and 3 data values:
142 df.append(["data", "specimen", "size"], "m", "%6.2f",
143 [2.34, 56.7, 8.9])
144 # next columns with single data values:
145 df.append("full weight", "kg", "%.0f", 122.8)
146 df.append_section("all measures")
147 df.append("speed", "m/s", "%.3g", 98.7)
148 df.append("median jitter", "mm", "%.1f", 23)
149 df.append("size", "g", "%.2e", 1.234)
150 # add a missing value to the second column:
151 df.add(np.nan, 1)
152 # fill up the remaining columns of the row:
153 df.add((0.543, 45, 1.235e2))
154 # add data to the next row starting at the second column:
155 df.add([43.21, 6789.1, 3405, 1.235e-4], 1) # next row
156 ```
157 results in
158 ``` plain
159 data
160 specimen all measures
161 size full weight speed median jitter size
162 m kg m/s mm g
163 2.34 123 98.7 23.0 1.23e+00
164 56.70 - 0.543 45.0 1.24e+02
165 8.90 43 6.79e+03 3405.0 1.23e-04
166 ```
168 Table columns
169 -------------
171 Columns can be specified by an index or by the name of a column. In
172 table headers with sections the colum can be specified by the
173 section names and the column name separated by '>'.
175 - `index()`: the column index of a column specifier.
176 - `__contains__()`: check for existence of a column.
177 - `find_col()`: find the start and end index of a column specification.
178 - `column_spec()`: full specification of a column with all its section names.
179 - `column_head()`: the name, unit, and format of a column.
180 - `table_header()`: the header of the table without content.
182 For example:
183 ```
184 df.index('all measures>size) # returns 4
185 'speed' in df # is True
186 ```
188 Iterating over columns
189 ----------------------
191 A table behaves like an ordered dictionary with column names as
192 keys and the data of each column as values.
193 Iterating over a table goes over columns.
194 Note, however, that the len() of a table is the number of rows,
195 not the number of columns!
197 - `keys()`: list of unique column keys for all available columns.
198 - `values()`: list of column data corresponding to keys().
199 - `items()`: generator over column names and the corresponding data.
200 - `__iter__()`: initialize iteration over data columns.
201 - `__next__()`: return unique column key of next column.
202 - `data`: the table data as a list over columns each containing a list of data elements.
204 For example:
205 ```
206 print('column specifications:')
207 for c in range(df.columns()):
208 print(df.column_spec(c))
209 print('iterating over column specifications:')
210 for c, k in enumerate(df):
211 print(f'{c}: {k}')
212 print('keys():')
213 for c, k in enumerate(df.keys()):
214 print(f'{c}: {k}')
215 print('values():')
216 for c, v in enumerate(df.values()):
217 print(v)
218 print('iterating over the table:')
219 for v in df:
220 print(v)
221 ```
222 results in
223 ``` plain
224 column specifications:
225 data>specimen>size
226 data>specimen>full weight
227 data>all measures>speed
228 data>all measures>median jitter
229 data>all measures>size
230 iterating over column specifications:
231 0: data>specimen>size
232 1: data>specimen>full weight
233 2: data>all measures>speed
234 3: data>all measures>median jitter
235 4: data>all measures>size
236 keys():
237 0: data>specimen>size
238 1: data>specimen>full weight
239 2: data>all measures>speed
240 3: data>all measures>median jitter
241 4: data>all measures>size
242 values():
243 [2.34, 56.7, 8.9]
244 [122.8, nan, 43.21]
245 [98.7, 0.543, 6789.1]
246 [23, 45, 3405]
247 [1.234, 123.5, 0.0001235]
248 iterating over the table:
249 [2.34, 56.7, 8.9]
250 [122.8, nan, 43.21]
251 [98.7, 0.543, 6789.1]
252 [23, 45, 3405]
253 [1.234, 123.5, 0.0001235]
254 ```
256 Accessing data
257 --------------
259 In contrast to the iterator functions the [] operator treats the
260 table as a 2D-array where the first index indicates the row and
261 the second index the column.
263 Rows are indexed by integer row numbers or boolean arrays.
264 Columns are also indexed by integer column numbers, but in
265 addition can be index by their names.
267 A single index selects rows, unless it is specified by
268 strings. Since strings can only specify column names, this selects
269 whole columns.
271 Like a numpy array the table can be sliced, and logical indexing can
272 be used to select specific parts of the table.
274 As for any function, columns can be specified as indices or strings.
276 - `rows()`: the number of rows.
277 - `columns()`: the number of columns.
278 - `__len__()`: the number of rows.
279 - `ndim`: always 2.
280 - `size`: number of elements (sum of length of all data columns), can be smaller than `columns()*rows()`.
281 - `shape`: number of rows and columns.
282 - `row()`: a single row of the table as TableData.
283 - `row_list()`: a single row of the table as list.
284 - `row_data()`: a generator for iterating over rows of the table.
285 - `row_dict()`: a single row of the table as dictionary.
286 - `col()`: a single column of the table as TableData.
287 - `__call__()`: a single column of the table as ndarray.
288 - `__getitem__()`: data elements specified by slice.
289 - `__setitem__()`: assign values to data elements specified by slice.
290 - `__delitem__()`: delete data elements or whole columns or rows.
291 - `array()`: the table data as a ndarray.
292 - `data_frame()`: the table data as a pandas DataFrame.
293 - `dicts()`: the table as a list of dictionaries.
294 - `dict()`: the table as a dictionary.
295 - `add()`: add data elements row-wise.
296 - `append_data_column()`: append data elements to a column.
297 - `set_column()`: set the column where to add data.
298 - `fill_data()`: fill up all columns with missing data.
299 - `clear_data()`: clear content of the table but keep header.
300 - `clear()`: clear the table of any content and header information.
301 - `key_value()`: a data element returned as a key-value pair.
302 - `aggregate()`: apply functions to columns.
303 - `groupby()`: iterate through unique values of columns.
305 - `sort()`: sort the table rows in place.
306 - `statistics()`: descriptive statistics of each column.
308 For example:
309 ```
310 # single column:
311 df('size') # data of 'size' column as ndarray
312 df['size'] # data of 'size' column as ndarray
313 df[:, 'size'] # data of 'size' column as ndarray
314 df.col('size') # table with the single column 'size'
316 # single row:
317 df[2, :] # table with data of only the third row
318 df.row(2) # table with data of only the third row
320 # slices:
321 df[2:5,['size','jitter']] # sub-table
322 df[2:5,['size','jitter']].array() # ndarray with data only
324 # logical indexing:
325 df[df['speed'] > 100.0, 'size'] = 0.0 # set size to 0 if speed is > 100
327 # delete:
328 del df[3:6, 'weight'] # delete rows 3-6 from column 'weight'
329 del df[3:5, :] # delete rows 3-5 completeley
330 del df[:, 'speed'] # remove column 'speed' from table
331 del df['speed'] # remove column 'speed' from table
332 df.remove('weight') # remove column 'weigth' from table
334 # sort and statistics:
335 df.sort(['weight', 'jitter'])
336 df.statistics()
337 ```
338 statistics() returns a table with standard descriptive statistics:
339 ``` plain
340 statistics data
341 - specimen all measures
342 - size full weight speed median jitter size
343 - m kg m/s mm g
344 mean 22.65 83 2.3e+03 1157.7 4.16e+01
345 std 24.23 40 3.18e+03 1589.1 5.79e+01
346 min 2.34 43 0.543 23.0 1.23e-04
347 quartile1 5.62 83 49.6 34.0 6.17e-01
348 median 8.90 123 98.7 45.0 1.23e+00
349 quartile3 32.80 - 3.44e+03 1725.0 6.24e+01
350 max 56.70 123 6.79e+03 3405.0 1.24e+02
351 count 3.00 2 3 3.0 3.00e+00
352 ```
354 Write and load tables
355 ---------------------
357 Table data can be written to a variety of text-based formats
358 including comma separated values, latex and html files. Which
359 columns are written can be controlled by the hide() and show()
360 functions. TableData can be loaded from all the written file formats
361 (except html), also directly via the constructor.
363 - `hide()`: hide a column or a range of columns.
364 - `hide_all()`: hide all columns.
365 - `hide_empty_columns()`: hide all columns that do not contain data.
366 - `show()`: show a column or a range of columns.
367 - `write()`: write table to a file or stream.
368 - `write_file_stream()`: write table to file or stream and return appropriate file name.
369 - `__str__()`: write table to a string.
370 - `write_descriptions()`: write column descriptions of the table to a file or stream.
371 - `load()`: load table from file or stream.
372 - `formats`: list of supported file formats for writing.
373 - `descriptions`: dictionary with descriptions of the supported file formats.
374 - `extensions`: dictionary with default filename extensions for each of the file formats.
375 - `ext_formats`: dictionary mapping filename extensions to file formats.
377 See documentation of the `write()` function for examples of the supported file formats.
379 """
381 formats = ['dat', 'ascii', 'csv', 'rtai', 'md', 'tex', 'html']
382 """list of strings: Supported output formats."""
384 descriptions = {'dat': 'data text file', 'ascii': 'ascii-art table',
385 'csv': 'comma separated values', 'rtai': 'rtai-style table',
386 'md': 'markdown', 'tex': 'latex tabular',
387 'html': 'html markup'}
388 """dict: Decription of output formats corresponding to `formats`."""
390 extensions = {'dat': 'dat', 'ascii': 'txt', 'csv': 'csv', 'rtai': 'dat',
391 'md': 'md', 'tex': 'tex', 'html': 'html'}
392 """dict: Default file extensions for the output `formats`. """
394 ext_formats = {'dat': 'dat', 'DAT': 'dat', 'txt': 'dat', 'TXT': 'dat',
395 'csv': 'csv', 'CSV': 'csv', 'md': 'md', 'MD': 'md',
396 'tex': 'tex', 'TEX': 'tex', 'html': 'html', 'HTML': 'html'}
397 """dict: Mapping of file extensions to the output formats."""
399 stdev_labels = ['sd', 'std', 's.d.', 'stdev', 'error']
400 """list: column labels recognized as standard deviations."""
402 def __init__(self, data=None, header=None, units=None, formats=None,
403 descriptions=None, missing=default_missing_inputs,
404 sep=None, stop=None):
405 self.clear()
406 if header is not None and len(header) > 0:
407 for h in header:
408 self.append(h)
409 if data is not None:
410 if isinstance(data, TableData):
411 self.ndim = data.ndim
412 self.size = data.size
413 self.shape = data.shape
414 self.nsecs = data.nsecs
415 self.setcol = data.setcol
416 self.addcol = data.addcol
417 for c in range(data.columns()):
418 self.header.append(list(data.header[c]))
419 self.units.append(data.units[c])
420 self.formats.append(data.formats[c])
421 self.descriptions.append(data.descriptions[c])
422 self.hidden.append(data.hidden[c])
423 self.data.append(list(data.data[c]))
424 self.set_labels(header)
425 self.set_units(units)
426 self.set_formats(formats)
427 self.set_descriptions(descriptions)
428 elif has_pandas and isinstance(data, pd.DataFrame):
429 for c, key in enumerate(data.keys()):
430 new_key = key
431 new_unit = ''
432 if '/' in key:
433 p = key.split('/')
434 new_key = p[0].strip()
435 new_unit = '/'.join(p[1:])
436 formats = '%s' if isinstance(values[0], str) else '%g'
437 values = data[key].tolist()
438 self.append(new_key, new_unit, formats, value=values)
439 self.set_labels(header)
440 self.set_units(units)
441 self.set_formats(formats)
442 self.set_descriptions(descriptions)
443 elif isinstance(data, (list, tuple, np.ndarray)) and not \
444 (isinstance(data, np.ndarray) and len(data.shape) == 0):
445 if len(data) > 0 and \
446 isinstance(data[0], (list, tuple, np.ndarray)) and not \
447 (isinstance(data[0], np.ndarray) and \
448 len(data[0].shape) == 0):
449 # 2D list, rows first:
450 for row in data:
451 for c, val in enumerate(row):
452 self.data[c].append(val)
453 elif len(data) > 0 and isinstance(data[0], dict):
454 # list of dictionaries:
455 for d in data:
456 self._add_dict(d, True)
457 self.fill_data()
458 self.set_labels(header)
459 self.set_units(units)
460 self.set_formats(formats)
461 self.set_descriptions(descriptions)
462 else:
463 # 1D list:
464 for c, val in enumerate(data):
465 self.data[c].append(val)
466 elif isinstance(data, (dict)):
467 self._add_dict(data, True)
468 self.fill_data()
469 self.set_labels(header)
470 self.set_units(units)
471 self.set_formats(formats)
472 self.set_descriptions(descriptions)
473 else:
474 self.load(data, missing, sep, stop)
475 self.set_labels(header)
476 self.set_units(units)
477 self.set_formats(formats)
478 self.set_descriptions(descriptions)
479 # fill in missing units and formats:
480 for k in range(len(self.header)):
481 if self.units[k] is None:
482 self.units[k] = ''
483 if self.formats[k] is None:
484 self.formats[k] = '%g'
485 if self.descriptions[k] is None:
486 self.descriptions[k] = ''
488 def __recompute_shape(self):
489 self.size = sum(map(len, self.data))
490 self.shape = (self.rows(), self.columns())
492 def append(self, label, unit=None, formats=None, description=None,
493 value=None, fac=None, key=None):
494 """Append column to the table.
496 Parameters
497 ----------
498 label: str or list of str
499 Optional section titles and the name of the column.
500 unit: str or None
501 The unit of the column contents.
502 formats: str or None
503 The C-style format string used for printing out the column content, e.g.
504 '%g', '%.2f', '%s', etc.
505 If None, the format is set to '%g'.
506 description: str or None
507 The description of the column contents.
508 value: None, float, int, str, etc. or list thereof, or list of dict
509 If not None, data for the column.
510 If list of dictionaries, extract from each dictionary in the list
511 the value specified by `key`. If `key` is `None` use `label` as
512 the key.
513 fac: float
514 If not None, multiply the data values by this number.
515 key: None or key of a dictionary
516 If not None and `value` is a list of dictionaries,
517 extract from each dictionary in the list the value specified
518 by `key` and assign the resulting list as data to the column.
520 Returns
521 -------
522 self: TableData
523 This TableData
524 """
525 if self.addcol >= len(self.data):
526 if isinstance(label, (list, tuple, np.ndarray)):
527 label = list(reversed(label))
528 # number of sections larger than what we have so far:
529 n = max(0, len(label) - 1 - self.nsecs)
530 # find matching sections:
531 found = False
532 for s in range(1, len(label)):
533 for c in range(len(self.header) - 1, -1, -1):
534 if len(self.header[c]) > s - n:
535 if s - n >= 0 and \
536 self.header[c][s - n] == label[s]:
537 # remove matching sections:
538 label = label[:s]
539 found = True
540 break
541 if found:
542 break
543 # add label and unique sections:
544 self.header.append(label)
545 label = label[0]
546 if n > 0:
547 # lift previous header label and sections:
548 for c in range(len(self.header) - 1):
549 self.header[c] = ['-']*n + self.header[c]
550 else:
551 self.header.append([label])
552 self.units.append(unit or '')
553 self.formats.append(formats or '%g')
554 self.descriptions.append(description or '')
555 self.hidden.append(False)
556 self.data.append([])
557 self.nsecs = max(map(len, self.header)) - 1
558 else:
559 if isinstance(label, (list, tuple, np.ndarray)):
560 self.header[self.addcol] = list(reversed(label)) + self.header[self.addcol]
561 label = label[-1]
562 else:
563 self.header[self.addcol] = [label] + self.header[self.addcol]
564 self.units[self.addcol] = unit or ''
565 self.formats[self.addcol] = formats or '%g'
566 self.descriptions[self.addcol] = description or ''
567 if self.nsecs < len(self.header[self.addcol]) - 1:
568 self.nsecs = len(self.header[self.addcol]) - 1
569 if not key:
570 key = label
571 if value is not None:
572 if isinstance(value, (list, tuple, np.ndarray)):
573 if key and len(value) > 0 and isinstance(value[0], dict):
574 value = [d[key] if key in d else float('nan') for d in value]
575 self.data[-1].extend(value)
576 else:
577 self.data[-1].append(value)
578 if fac:
579 for k in range(len(self.data[-1])):
580 self.data[-1][k] *= fac
581 self.addcol = len(self.data)
582 self.__recompute_shape()
583 return self
585 def insert(self, column, label, unit=None, formats=None, description=None,
586 value=None, fac=None, key=None):
587 """Insert a table column at a given position.
589 .. WARNING::
590 If no `value` is given, the inserted column is an empty list.
592 Parameters
593 ----------
594 columns int or str
595 Column before which to insert the new column.
596 Column can be specified by index or name,
597 see `index()` for details.
598 label: str or list of str
599 Optional section titles and the name of the column.
600 unit: str or None
601 The unit of the column contents.
602 formats: str or None
603 The C-style format string used for printing out the column content, e.g.
604 '%g', '%.2f', '%s', etc.
605 If None, the format is set to '%g'.
606 description: str or None
607 The description of the column contents.
608 value: None, float, int, str, etc. or list thereof, or list of dict
609 If not None, data for the column.
610 If list of dictionaries, extract from each dictionary in the list
611 the value specified by `key`. If `key` is `None` use `label` as
612 the key.
613 fac: float
614 If not None, multiply the data values by this number.
615 key: None or key of a dictionary
616 If not None and `value` is a list of dictionaries,
617 extract from each dictionary in the list the value specified
618 by `key` and assign the resulting list as data to the column.
620 Returns
621 -------
622 index: int
623 The index of the inserted column.
625 Raises
626 ------
627 self: TableData
628 This TableData
629 """
630 col = self.index(column)
631 if col is None:
632 raise IndexError(f'Cannot insert before non-existing column "{column}"')
633 if isinstance(label, (list, tuple, np.ndarray)):
634 self.header.insert(col, list(reversed(label)))
635 else:
636 self.header.insert(col, [label])
637 self.units.insert(col, unit or '')
638 self.formats.insert(col, formats or '%g')
639 self.descriptions.insert(col, description or '')
640 self.hidden.insert(col, False)
641 self.data.insert(col, [])
642 if self.nsecs < len(self.header[col]) - 1:
643 self.nsecs = len(self.header[col]) - 1
644 if not key:
645 key = label
646 if value is not None:
647 if isinstance(value, (list, tuple, np.ndarray)):
648 if key and len(value) > 0 and isinstance(value[0], dict):
649 value = [d[key] if key in d else float('nan') for d in value]
650 self.data[col].extend(value)
651 else:
652 self.data[col].append(value)
653 if fac:
654 for k in range(len(self.data[col])):
655 self.data[col][k] *= fac
656 self.addcol = len(self.data)
657 self.__recompute_shape()
658 return self
660 def remove(self, columns):
661 """Remove columns from the table.
663 Parameters
664 -----------
665 columns: int or str or list of int or str
666 Columns can be specified by index or name,
667 see `index()` for details.
669 Raises
670 ------
671 IndexError:
672 If an invalid column was specified.
673 """
674 # fix columns:
675 if not isinstance(columns, (list, tuple, np.ndarray)):
676 columns = [ columns ]
677 if not columns:
678 return
679 # remove:
680 for col in columns:
681 c = self.index(col)
682 if c is None:
683 if isinstance(col, (np.integer, int)):
684 col = '%d' % col
685 raise IndexError('Cannot remove non-existing column ' + col)
686 continue
687 if c+1 < len(self.header):
688 self.header[c+1].extend(self.header[c][len(self.header[c+1]):])
689 del self.header[c]
690 del self.units[c]
691 del self.formats[c]
692 del self.descriptions[c]
693 del self.hidden[c]
694 del self.data[c]
695 if self.setcol >= len(self.data):
696 self.setcol = 0
697 self.__recompute_shape()
699 def section(self, column, level):
700 """The section name of a specified column.
702 Parameters
703 ----------
704 column: None, int, or str
705 A specification of a column.
706 See self.index() for more information on how to specify a column.
707 level: int
708 The level of the section to be returned. The column label itself is level=0.
710 Returns
711 -------
712 name: str
713 The name of the section at the specified level containing
714 the column.
715 index: int
716 The column index that contains this section
717 (equal or smaller thant `column`).
719 Raises
720 ------
721 IndexError:
722 If `level` exceeds the maximum possible level.
723 """
724 if level < 0 or level > self.nsecs:
725 raise IndexError('Invalid section level')
726 column = self.index(column)
727 while len(self.header[column]) <= level:
728 column -= 1
729 return self.header[column][level], column
731 def set_section(self, column, label, level):
732 """Set a section name.
734 Parameters
735 ----------
736 column: None, int, or str
737 A specification of a column.
738 See self.index() for more information on how to specify a column.
739 label: str
740 The new name to be used for the section.
741 level: int
742 The level of the section to be set. The column label itself is level=0.
743 """
744 column = self.index(column)
745 self.header[column][level] = label
746 return column
748 def append_section(self, label):
749 """Add sections to the table header.
751 Each column of the table has a header label. Columns can be
752 grouped into sections. Sections can be nested arbitrarily.
754 Parameters
755 ----------
756 label: stri or list of str
757 The name(s) of the section(s).
759 Returns
760 -------
761 index: int
762 The column index where the section was appended.
763 """
764 if self.addcol >= len(self.data):
765 if isinstance(label, (list, tuple, np.ndarray)):
766 self.header.append(list(reversed(label)))
767 else:
768 self.header.append([label])
769 self.units.append('')
770 self.formats.append('')
771 self.descriptions.append('')
772 self.hidden.append(False)
773 self.data.append([])
774 else:
775 if isinstance(label, (list, tuple, np.ndarray)):
776 self.header[self.addcol] = list(reversed(label)) + self.header[self.addcol]
777 else:
778 self.header[self.addcol] = [label] + self.header[self.addcol]
779 if self.nsecs < len(self.header[self.addcol]):
780 self.nsecs = len(self.header[self.addcol])
781 self.addcol = len(self.data) - 1
782 self.__recompute_shape()
783 return self.addcol
785 def insert_section(self, column, section):
786 """Insert a section at a given position of the table header.
788 Parameters
789 ----------
790 columns int or str
791 Column before which to insert the new section.
792 Column can be specified by index or name,
793 see `index()` for details.
794 section: str
795 The name of the section.
797 Returns
798 -------
799 index: int
800 The index of the column where the section was inserted.
802 Raises
803 ------
804 IndexError:
805 If an invalid column was specified.
806 """
807 col = self.index(column)
808 if col is None:
809 if isinstance(column, (np.integer, int)):
810 column = '%d' % column
811 raise IndexError('Cannot insert at non-existing column ' + column)
812 self.header[col].append(section)
813 if self.nsecs < len(self.header[col]) - 1:
814 self.nsecs = len(self.header[col]) - 1
815 return col
817 def label(self, column):
818 """The name of a column.
820 Parameters
821 ----------
822 column: None, int, or str
823 A specification of a column.
824 See self.index() for more information on how to specify a column.
826 Returns
827 -------
828 self: TableData
829 This TableData
830 """
831 column = self.index(column)
832 return self.header[column][0]
834 def set_label(self, column, label):
835 """Set the name of a column.
837 Parameters
838 ----------
839 column: None, int, or str
840 A specification of a column.
841 See self.index() for more information on how to specify a column.
842 label: str
843 The new name to be used for the column.
845 Returns
846 -------
847 self: TableData
848 This TableData
849 """
850 column = self.index(column)
851 self.header[column][0] = label
852 return self
854 def set_labels(self, labels):
855 """Set the labels of some columns.
857 Parameters
858 ----------
859 labels: TableData, dict, list of str, list of list of str, None
860 The new labels to be used.
861 If TableData, take the labels of the respective column indices.
862 If dict, keys are column labels (see self.index() for more
863 information on how to specify a column), and values are
864 the new labels for the respective columns as str or list of str.
865 If list of str or list of list of str,
866 set labels of the first successive columns to the list elements.
867 If `None`, do nothing.
869 Returns
870 -------
871 self: TableData
872 This TableData
873 """
874 if isinstance(labels, TableData):
875 for c in range(min(self.columns(), labels.columns())):
876 self.header[c] = labels.header[c]
877 elif isinstance(labels, dict):
878 for c in labels:
879 i = self.index(c)
880 if i is None:
881 continue
882 l = labels[c]
883 if isinstance(l, (list, tuple)):
884 self.header[i] = l
885 else:
886 self.header[i] = [l]
887 elif isinstance(labels, (list, tuple, np.ndarray)) and not \
888 (isinstance(labels, np.ndarray) and len(labels.shape) == 0):
889 for c, l in enumerate(labels):
890 if isinstance(l, (list, tuple)):
891 self.labels[c] = l
892 else:
893 self.labels[c] = [l]
894 return self
896 def unit(self, column):
897 """The unit of a column.
899 Parameters
900 ----------
901 column: None, int, or str
902 A specification of a column.
903 See self.index() for more information on how to specify a column.
905 Returns
906 -------
907 unit: str
908 The unit.
909 """
910 column = self.index(column)
911 return self.units[column]
913 def set_unit(self, column, unit):
914 """Set the unit of a column.
916 Parameters
917 ----------
918 column: None, int, or str
919 A specification of a column.
920 See self.index() for more information on how to specify a column.
921 unit: str
922 The new unit to be used for the column.
924 Returns
925 -------
926 self: TableData
927 This TableData
928 """
929 column = self.index(column)
930 self.units[column] = unit
931 return self
933 def set_units(self, units):
934 """Set the units of some columns.
936 Parameters
937 ----------
938 units: TableData, dict, list of str, str, None
939 The new units to be used.
940 If TableData, take the units of matching column labels.
941 If dict, keys are column labels (see self.index() for more
942 information on how to specify a column), and values are
943 units for the respective columns as str.
944 If list of str, set units of the first successive columns to
945 the list elements.
946 If `None`, do nothing.
947 Otherwise, set units of all columns to `units`.
949 Returns
950 -------
951 self: TableData
952 This TableData
953 """
954 if isinstance(units, TableData):
955 for c in units:
956 i = self.index(c)
957 if i is None:
958 continue
959 self.units[i] = units.unit(c)
960 elif isinstance(units, dict):
961 for c in units:
962 i = self.index(c)
963 if i is None:
964 continue
965 self.units[i] = units[c]
966 elif isinstance(units, (list, tuple, np.ndarray)) and not \
967 (isinstance(units, np.ndarray) and len(units.shape) == 0):
968 for c, u in enumerate(units):
969 self.units[c] = u
970 elif units is not None:
971 for c in range(len(self.units)):
972 self.units[c] = units
973 return self
975 def format(self, column):
976 """The format string of the column.
978 Parameters
979 ----------
980 column: None, int, or str
981 A specification of a column.
982 See self.index() for more information on how to specify a column.
984 Returns
985 -------
986 format: str
987 The format string.
988 """
989 column = self.index(column)
990 return self.formats[column]
992 def set_format(self, column, format):
993 """Set the format string of a column.
995 Parameters
996 ----------
997 column: None, int, or str
998 A specification of a column.
999 See self.index() for more information on how to specify a column.
1000 format: str
1001 The new format string to be used for the column.
1003 Returns
1004 -------
1005 self: TableData
1006 This TableData
1007 """
1008 column = self.index(column)
1009 self.formats[column] = format
1010 return self
1012 def set_formats(self, formats):
1013 """Set the format strings of all columns.
1015 Parameters
1016 ----------
1017 formats: TableData, dict, list of str, str, None
1018 The new format strings to be used.
1019 If TableData, take the format strings of matching column labels.
1020 If dict, keys are column labels (see self.index() for more
1021 information on how to specify a column), and values are
1022 format strings for the respective columns as str.
1023 If list of str, set format strings of the first successive
1024 columns to the list elements.
1025 If `None`, do nothing.
1026 Otherwise, set format strings of all columns to `formats`.
1028 Returns
1029 -------
1030 self: TableData
1031 This TableData
1032 """
1033 if isinstance(formats, TableData):
1034 for c in formats:
1035 i = self.index(c)
1036 if i is None:
1037 continue
1038 self.formats[i] = formats.format(c)
1039 elif isinstance(formats, dict):
1040 for c in formats:
1041 i = self.index(c)
1042 if i is None:
1043 continue
1044 self.formats[i] = formats[c] or '%g'
1045 elif isinstance(formats, (list, tuple, np.ndarray)) and not \
1046 (isinstance(formats, np.ndarray) and len(formats.shape) == 0):
1047 for c, f in enumerate(formats):
1048 self.formats[c] = f or '%g'
1049 elif formats is not None:
1050 for c in range(len(self.formats)):
1051 self.formats[c] = formats or '%g'
1052 return self
1054 def description(self, column):
1055 """The description of a column.
1057 Parameters
1058 ----------
1059 column: None, int, or str
1060 A specification of a column.
1061 See self.index() for more information on how to specify a column.
1063 Returns
1064 -------
1065 description: str
1066 The description.
1067 """
1068 column = self.index(column)
1069 return self.descriptions[column]
1071 def set_description(self, column, description):
1072 """Set the description of a column.
1074 Parameters
1075 ----------
1076 column: None, int, or str
1077 A specification of a column.
1078 See self.index() for more information on how to specify a column.
1079 description: str
1080 The new description to be used for the column.
1082 Returns
1083 -------
1084 self: TableData
1085 This TableData
1086 """
1087 column = self.index(column)
1088 self.descriptions[column] = description
1089 return self
1091 def set_descriptions(self, descriptions):
1092 """Set the descriptions of some columns.
1094 Parameters
1095 ----------
1096 descriptions: TableData, dict, list of str, str, None
1097 The new descriptions to be used.
1098 If TableData, take the descriptions of matching column labels.
1099 If dict, keys are column labels (see self.index() for more
1100 information on how to specify a column), and values are
1101 descriptions for the respective columns as str.
1102 If list of str, set descriptions of the first successive columns to
1103 the list elements.
1104 If `None`, do nothing.
1106 Returns
1107 -------
1108 self: TableData
1109 This TableData
1110 """
1111 if isinstance(descriptions, TableData):
1112 for c in descriptions:
1113 i = self.index(c)
1114 if i is None:
1115 continue
1116 self.descriptions[i] = descriptions.description(c)
1117 elif isinstance(descriptions, dict):
1118 for c in descriptions:
1119 i = self.index(c)
1120 if i is None:
1121 continue
1122 self.descriptions[i] = descriptions[c]
1123 elif isinstance(descriptions, (list, tuple, np.ndarray)) and not \
1124 (isinstance(descriptions, np.ndarray) and len(descriptions.shape) == 0):
1125 for c, d in enumerate(descriptions):
1126 self.descriptions[c] = d
1127 return self
1129 def table_header(self):
1130 """The header of the table without content.
1132 Returns
1133 -------
1134 data: TableData
1135 A TableData object with the same header but empty data.
1136 """
1137 data = TableData()
1138 sec_indices = [-1] * self.nsecs
1139 for c in range(self.columns()):
1140 data.append(*self.column_head(c))
1141 for l in range(self.nsecs):
1142 s, i = self.section(c, l+1)
1143 if i != sec_indices[l]:
1144 data.header[-1].append(s)
1145 sec_indices[l] = i
1146 data.nsecs = self.nsecs
1147 return data
1149 def column_head(self, column, secs=False):
1150 """The name, unit, format, and description of a column.
1152 Parameters
1153 ----------
1154 column: None, int, or str
1155 A specification of a column.
1156 See self.index() for more information on how to specify a column.
1157 secs: bool
1158 If True, return all section names in addition to the column label.
1160 Returns
1161 -------
1162 name: str or list of str
1163 The column label or the label with all its sections.
1164 unit: str
1165 The unit.
1166 format: str
1167 The format string.
1168 description: str
1169 The description of the data column.
1170 """
1171 column = self.index(column)
1172 if secs:
1173 header = self.header[column]
1174 c = column - 1
1175 while len(header) < self.nsecs + 1 and c >= 0:
1176 if len(self.header[c]) > len(header):
1177 header.extend(self.header[c][len(header):])
1178 c -= 1
1179 return list(reversed(header)), self.units[column], self.formats[column], self.descriptions[column]
1180 else:
1181 return self.header[column][0], self.units[column], self.formats[column], self.descriptions[column]
1183 def column_spec(self, column):
1184 """Full specification of a column with all its section names.
1186 Parameters
1187 ----------
1188 column: int or str
1189 Specifies the column.
1190 See self.index() for more information on how to specify a column.
1192 Returns
1193 -------
1194 s: str
1195 Full specification of the column by all its section names and its header label.
1196 """
1197 c = self.index(column)
1198 fh = [self.header[c][0]]
1199 for l in range(self.nsecs):
1200 fh.append(self.section(c, l+1)[0])
1201 return '>'.join(reversed(fh))
1203 def find_col(self, column):
1204 """Find the start and end index of a column specification.
1206 Parameters
1207 ----------
1208 column: None, int, or str
1209 A specification of a column.
1210 See self.index() for more information on how to specify a column.
1212 Returns
1213 -------
1214 c0: int or None
1215 A valid column index or None that is specified by `column`.
1216 c1: int or None
1217 A valid column index or None of the column following the range specified
1218 by `column`.
1219 """
1221 def find_column_indices(ss, si, minns, maxns, c0, strict=True):
1222 if si >= len(ss):
1223 return None, None, None, None
1224 ns0 = 0
1225 for ns in range(minns, maxns+1):
1226 nsec = maxns - ns
1227 if ss[si] == '':
1228 si += 1
1229 continue
1230 for c in range(c0, len(self.header)):
1231 if nsec < len(self.header[c]) and \
1232 ((strict and self.header[c][nsec] == ss[si]) or
1233 (not strict and ss[si] in self.header[c][nsec])):
1234 ns0 = ns
1235 c0 = c
1236 si += 1
1237 if si >= len(ss):
1238 c1 = len(self.header)
1239 for c in range(c0+1, len(self.header)):
1240 if nsec < len(self.header[c]):
1241 c1 = c
1242 break
1243 return c0, c1, ns0, None
1244 elif nsec > 0:
1245 break
1246 return None, c0, ns0, si
1248 if column is None:
1249 return None, None
1250 if not isinstance(column, (np.integer, int)) and column.isdigit():
1251 column = int(column)
1252 if isinstance(column, (np.integer, int)):
1253 if column >= 0 and column < len(self.header):
1254 return column, column + 1
1255 else:
1256 return None, None
1257 # find column by header:
1258 ss = column.rstrip('>').split('>')
1259 maxns = self.nsecs
1260 si0 = 0
1261 while si0 < len(ss) and ss[si0] == '':
1262 maxns -= 1
1263 si0 += 1
1264 if maxns < 0:
1265 maxns = 0
1266 c0, c1, ns, si = find_column_indices(ss, si0, 0, maxns, 0, True)
1267 if c0 is None and c1 is not None:
1268 c0, c1, ns, si = find_column_indices(ss, si, ns, maxns, c1, False)
1269 return c0, c1
1271 def index(self, column):
1272 """The column index of a column specifier.
1274 Parameters
1275 ----------
1276 column: None, int, or str
1277 A specification of a column.
1278 - None: no column is specified
1279 - int: the index of the column (first column is zero), e.g. `index(2)`.
1280 - a string representing an integer is converted into the column index,
1281 e.g. `index('2')`
1282 - a string specifying a column by its header.
1283 Header names of descending hierarchy are separated by '>'.
1285 Returns
1286 -------
1287 index: int or None
1288 A valid column index or None.
1289 """
1290 c0, c1 = self.find_col(column)
1291 return c0
1293 def __contains__(self, column):
1294 """Check for existence of a column.
1296 Parameters
1297 ----------
1298 column: None, int, or str
1299 The column to be checked.
1300 See self.index() for more information on how to specify a column.
1302 Returns
1303 -------
1304 contains: bool
1305 True if `column` specifies an existing column key.
1306 """
1307 return self.index(column) is not None
1309 def keys(self):
1310 """List of unique column keys for all available columns.
1312 Returns
1313 -------
1314 keys: list of str
1315 List of unique column specifications.
1316 """
1317 return [self.column_spec(c) for c in range(self.columns())]
1319 def values(self):
1320 """List of column data corresponding to keys(). Same as `self.data`.
1322 Returns
1323 -------
1324 data: list of list of values
1325 The data of the table. First index is columns!
1326 """
1327 return self.data
1329 def items(self):
1330 """Generator over column names and corresponding data.
1332 Yields
1333 ------
1334 item: tuple
1335 Unique column specifications and the corresponding data.
1336 """
1337 for c in range(self.columns()):
1338 yield self.column_spec(c), self.data[c]
1340 def __len__(self):
1341 """The number of rows.
1343 Returns
1344 -------
1345 rows: int
1346 The number of rows contained in the table.
1347 """
1348 return self.rows()
1350 def __iter__(self):
1351 """Initialize iteration over data columns.
1352 """
1353 self.iter_counter = -1
1354 return self
1356 def __next__(self):
1357 """Next unique column key.
1359 Returns
1360 -------
1361 s: str
1362 Full specification of the column by all its section names and its header label.
1363 """
1364 self.iter_counter += 1
1365 if self.iter_counter >= self.columns():
1366 raise StopIteration
1367 else:
1368 return self.column_spec(self.iter_counter)
1370 def rows(self):
1371 """The number of rows.
1373 Returns
1374 -------
1375 rows: int
1376 The number of rows contained in the table.
1377 """
1378 return max(map(len, self.data)) if self.data else 0
1380 def columns(self):
1381 """The number of columns.
1383 Returns
1384 -------
1385 columns: int
1386 The number of columns contained in the table.
1387 """
1388 return len(self.header)
1390 def row(self, index):
1391 """A single row of the table as TableData.
1393 Parameters
1394 ----------
1395 index: int
1396 The index of the row to be returned.
1398 Returns
1399 -------
1400 data: TableData
1401 A TableData object with a single row.
1402 """
1403 data = TableData()
1404 sec_indices = [-1] * self.nsecs
1405 for c in range(self.columns()):
1406 data.append(*self.column_head(c))
1407 for l in range(self.nsecs):
1408 s, i = self.section(c, l+1)
1409 if i != sec_indices[l]:
1410 data.header[-1].append(s)
1411 sec_indices[l] = i
1412 data.data[-1] = [self.data[c][index]]
1413 data.nsecs = self.nsecs
1414 return data
1416 def row_list(self, index):
1417 """A single row of the table as list.
1419 Parameters
1420 ----------
1421 index: int
1422 The index of the row to be returned.
1424 Returns
1425 -------
1426 data: list
1427 A list with data values of each column of row `index`.
1428 """
1429 data = []
1430 for c in range(self.columns()):
1431 data.append(self.data[c][index])
1432 return data
1434 def row_data(self):
1435 """A generator for iterating over rows of the table.
1437 Yields
1438 ------
1439 data: list
1440 A list with data values of each column.
1441 """
1442 for r in range(self.rows()):
1443 yield self.row_list(r)
1445 def row_dict(self, index):
1446 """A single row of the table as dictionary.
1448 Parameters
1449 ----------
1450 index: int
1451 The index of the row to be returned.
1453 Returns
1454 -------
1455 data: dict
1456 A dictionary with column header as key and corresponding data value of row `index`
1457 as value.
1458 """
1459 data = {}
1460 for c in range(self.columns()):
1461 data[self.column_spec(c)] = self.data[c][index]
1462 return data
1464 def column(self, col):
1465 """A single column of the table.
1467 Parameters
1468 ----------
1469 col: None, int, or str
1470 The column to be returned.
1471 See self.index() for more information on how to specify a column.
1473 Returns
1474 -------
1475 table: TableData
1476 A TableData object with a single column.
1477 """
1478 data = TableData()
1479 c = self.index(col)
1480 data.append(*self.column_head(c))
1481 data.data = [self.data[c]]
1482 data.nsecs = 0
1483 return data
1485 def __call__(self, column):
1486 """A single column of the table as a ndarray.
1488 Parameters
1489 ----------
1490 column: None, int, or str
1491 The column to be returned.
1492 See self.index() for more information on how to specify a column.
1494 Returns
1495 -------
1496 data: 1-D ndarray
1497 Content of the specified column as a ndarray.
1498 """
1499 c = self.index(column)
1500 return np.asarray(self.data[c])
1502 def __setupkey(self, key):
1503 """Helper function that turns a key into row and column indices.
1505 Returns
1506 -------
1507 rows: list of int, slice, None
1508 Indices of selected rows.
1509 cols: list of int
1510 Indices of selected columns.
1512 Raises
1513 ------
1514 IndexError:
1515 If an invalid column was specified.
1516 """
1517 if type(key) is not tuple:
1518 if isinstance(key, str):
1519 cols = key
1520 rows = slice(0, self.rows(), 1)
1521 elif isinstance(key, slice) and isinstance(key.start, str) and isinstance(key.stop, str):
1522 cols = key
1523 rows = slice(0, self.rows(), 1)
1524 else:
1525 rows = key
1526 cols = range(self.columns())
1527 else:
1528 rows = key[0]
1529 cols = key[1]
1530 if isinstance(cols, slice):
1531 start = cols.start
1532 if start is not None:
1533 start = self.index(start)
1534 if start is None:
1535 raise IndexError('"%s" is not a valid column index' % cols.start)
1536 stop = cols.stop
1537 if stop is not None:
1538 stop_str = isinstance(stop, str)
1539 stop = self.index(stop)
1540 if stop is None:
1541 raise IndexError('"%s" is not a valid column index' % cols.stop)
1542 if stop_str:
1543 stop += 1
1544 cols = slice(start, stop, cols.step)
1545 cols = range(self.columns())[cols]
1546 else:
1547 if not isinstance(cols, (list, tuple, np.ndarray, range)):
1548 cols = [cols]
1549 c = [self.index(inx) for inx in cols]
1550 if None in c:
1551 raise IndexError('"%s" is not a valid column index' % cols[c.index(None)])
1552 cols = c
1553 if isinstance(rows, np.ndarray) and rows.dtype == np.dtype(bool):
1554 rows = np.where(rows)[0]
1555 if len(rows) == 0:
1556 rows = None
1557 return rows, cols
1559 def __getitem__(self, key):
1560 """Data elements specified by slice.
1562 Parameters
1563 -----------
1564 key:
1565 First key specifies row, (optional) second key the column.
1566 Columns can be specified by index or name,
1567 see `index()` for details.
1568 A single key of strings selects columns by their names: `td[:, 'col'] == td['col']`
1569 If a stop column is specified by name,
1570 it is inclusively!
1572 Returns
1573 -------
1574 data:
1575 - A single data value if a single row and a single column is specified.
1576 - A ndarray of data elements if a single column is specified.
1577 - A TableData object for multiple columns.
1578 - None if no row is selected (e.g. by a logical index that nowhere is True)
1580 Raises
1581 ------
1582 IndexError:
1583 If an invalid column was specified.
1584 """
1585 rows, cols = self.__setupkey(key)
1586 if len(cols) == 1:
1587 if cols[0] >= self.columns():
1588 return None
1589 if rows is None:
1590 return None
1591 elif isinstance(rows, slice):
1592 return np.asarray(self.data[cols[0]][rows])
1593 elif isinstance(rows, (list, tuple, np.ndarray)):
1594 return np.asarray([self.data[cols[0]][r] for r in rows if r < len(self.data[cols[0]])])
1595 elif rows < len(self.data[cols[0]]):
1596 return self.data[cols[0]][rows]
1597 else:
1598 return None
1599 else:
1600 data = TableData()
1601 sec_indices = [-1] * self.nsecs
1602 for c in cols:
1603 data.append(*self.column_head(c, secs=True))
1604 if rows is None:
1605 continue
1606 if isinstance(rows, (list, tuple, np.ndarray)):
1607 for r in rows:
1608 data.data[-1].append(self.data[c][r])
1609 else:
1610 try:
1611 if isinstance(self.data[c][rows],
1612 (list, tuple, np.ndarray)):
1613 data.data[-1].extend(self.data[c][rows])
1614 else:
1615 data.data[-1].append(self.data[c][rows])
1616 except IndexError:
1617 data.data[-1].append(np.nan)
1618 data.nsecs = self.nsecs
1619 return data
1621 def __setitem__(self, key, value):
1622 """Assign values to data elements specified by slice.
1624 Parameters
1625 -----------
1626 key:
1627 First key specifies row, (optional) second one the column.
1628 Columns can be specified by index or name,
1629 see `index()` for details.
1630 A single key of strings selects columns by their names: `td[:, 'col'] == td['col']`
1631 If a stop column is specified by name,
1632 it is inclusively!
1633 value: TableData, list, ndarray, float, ...
1634 Value(s) used to assing to the table elements as specified by `key`.
1636 Raises
1637 ------
1638 IndexError:
1639 If an invalid column was specified.
1640 """
1641 rows, cols = self.__setupkey(key)
1642 if rows is None:
1643 return
1644 if isinstance(value, TableData):
1645 if isinstance(self.data[cols[0]][rows], (list, tuple, np.ndarray)):
1646 for k, c in enumerate(cols):
1647 self.data[c][rows] = value.data[k]
1648 else:
1649 for k, c in enumerate(cols):
1650 self.data[c][rows] = value.data[k][0]
1651 else:
1652 if len(cols) == 1:
1653 if isinstance(rows, (list, tuple, np.ndarray)):
1654 if len(rows) == 1:
1655 self.data[cols[0]][rows[0]] = value
1656 elif isinstance(value, (list, tuple, np.ndarray)):
1657 for k, r in enumerate(rows):
1658 self.data[cols[0]][r] = value[k]
1659 else:
1660 for r in rows:
1661 self.data[cols[0]][r] = value
1662 elif isinstance(value, (list, tuple, np.ndarray)):
1663 self.data[cols[0]][rows] = value
1664 elif isinstance(rows, (np.integer, int)):
1665 self.data[cols[0]][rows] = value
1666 else:
1667 n = len(self.data[cols[0]][rows])
1668 if n > 1:
1669 self.data[cols[0]][rows] = [value]*n
1670 else:
1671 self.data[cols[0]][rows] = value
1672 else:
1673 if isinstance(self.data[0][rows], (list, tuple, np.ndarray)):
1674 for k, c in enumerate(cols):
1675 self.data[c][rows] = value[:,k]
1676 elif isinstance(value, (list, tuple, np.ndarray)):
1677 for k, c in enumerate(cols):
1678 self.data[c][rows] = value[k]
1679 else:
1680 for k, c in enumerate(cols):
1681 self.data[c][rows] = value
1683 def __delitem__(self, key):
1684 """Delete data elements or whole columns or rows.
1686 Parameters
1687 -----------
1688 key:
1689 First key specifies row, (optional) second one the column.
1690 Columns can be specified by index or name,
1691 see `index()` for details.
1692 A single key of strings selects columns by their names: `td[:, 'col'] == td['col']`
1693 If a stop column is specified by name,
1694 it is inclusively!
1695 If all rows are selected, then the specified columns are removed from the table.
1696 Otherwise only data values are removed.
1697 If all columns are selected than entire rows of data values are removed.
1698 Otherwise only data values in the specified rows are removed.
1700 Raises
1701 ------
1702 IndexError:
1703 If an invalid column was specified.
1704 """
1705 rows, cols = self.__setupkey(key)
1706 if rows is None:
1707 return
1708 row_indices = np.arange(self.rows(), dtype=int)[rows]
1709 if isinstance(row_indices, np.ndarray):
1710 if len(row_indices) == self.rows():
1711 # delete whole columns:
1712 self.remove(cols)
1713 elif len(row_indices) > 0:
1714 for r in reversed(sorted(row_indices)):
1715 for c in cols:
1716 if r < len(self.data[c]):
1717 del self.data[c][r]
1718 self.__recompute_shape()
1719 else:
1720 for c in cols:
1721 del self.data[c][row_indices]
1722 self.__recompute_shape()
1724 def array(self, row=None):
1725 """The table data as a ndarray.
1727 Parameters
1728 ----------
1729 row: int or None
1730 If specified, a 1D ndarray of that row will be returned.
1732 Returns
1733 -------
1734 data: 2D or 1D ndarray
1735 If no row is specified, the data content of the entire table
1736 as a 2D ndarray (rows first).
1737 If a row is specified, a 1D ndarray of that row.
1738 """
1739 if row is None:
1740 return np.array(self.data).T
1741 else:
1742 return np.array([d[row] for d in self.data])
1744 def data_frame(self):
1745 """The table data as a pandas DataFrame.
1747 Returns
1748 -------
1749 data: pandas.DataFrame
1750 A pandas DataFrame of the whole table.
1751 """
1752 return pd.DataFrame(self.dict())
1754 def dicts(self, raw_values=True, missing=default_missing_str):
1755 """The table as a list of dictionaries.
1757 Parameters
1758 ----------
1759 raw_values: bool
1760 If True, use raw table values as values,
1761 else format the values and add unit string.
1762 missing: str
1763 String indicating non-existing data elements.
1765 Returns
1766 -------
1767 table: list of dict
1768 For each row of the table a dictionary with header as key.
1769 """
1770 table = []
1771 for row in range(self.rows()):
1772 data = {}
1773 for col in range(len(self.header)):
1774 if raw_values:
1775 v = self.data[col][row];
1776 else:
1777 if isinstance(self.data[col][row], (float, np.floating)) and m.isnan(self.data[col][row]):
1778 v = missing
1779 else:
1780 u = ''
1781 if not self.units[col] in '1-' and self.units[col] != 'a.u.':
1782 u = self.units[col]
1783 v = (self.formats[col] % self.data[col][row]) + u
1784 data[self.header[col][0]] = v
1785 table.append(data)
1786 return table
1788 def dict(self):
1789 """The table as a dictionary.
1791 Returns
1792 -------
1793 table: dict
1794 A dictionary with keys being the column headers and
1795 values the list of data elements of the corresponding column.
1796 """
1797 table = {k: v for k, v in self.items()}
1798 return table
1800 def _add_table_data(self, data, add_all):
1801 """Add data of a TableData.
1803 Parameters
1804 ----------
1805 data: TableData
1806 Table with the data to be added.
1807 add_all: bool
1808 If False, then only data of columns that already exist in
1809 the table are added to the table. If the table is empty or
1810 `add_all` is set to `True` then all data is added and if
1811 necessary new columns are appended to the table.
1812 """
1813 empty = False
1814 if self.shape[1] == 0:
1815 add_all = True
1816 empty = True
1817 maxr = self.rows()
1818 for k in data.keys():
1819 col = self.index(k)
1820 if empty or col is None:
1821 if not add_all:
1822 continue
1823 self.append(*data.column_head(k, secs=True),
1824 value=[np.nan]*maxr)
1825 col = len(self.data) - 1
1826 c = data.index(k)
1827 self.data[col].extend(data.data[c])
1828 self.__recompute_shape()
1830 def _add_dict(self, data, add_all):
1831 """Add data of a TableData.
1833 Parameters
1834 ----------
1835 data: dict
1836 Keys are column labels and values are single values or
1837 lists of values to be added to the corresponding table columns.
1838 add_all: bool
1839 If False, then only data of columns that already exist in
1840 the table are added to the table. If the table is empty or
1841 `add_all` is set to `True` then all data is added and if
1842 necessary new columns are appended to the table.
1844 """
1845 empty = False
1846 if self.shape[1] == 0:
1847 add_all = True
1848 empty = True
1849 maxr = self.rows()
1850 for key in data:
1851 new_key = key
1852 new_unit = ''
1853 if '/' in key:
1854 p = key.split('/')
1855 new_key = p[0].strip()
1856 new_unit = '/'.join(p[1:])
1857 col = self.index(new_key)
1858 if empty or col is None:
1859 if not add_all:
1860 continue
1861 self.append(new_key, new_unit,
1862 value=[np.nan]*maxr)
1863 col = len(self.data) - 1
1864 if isinstance(data[key], (list, tuple, np.ndarray)):
1865 self.data[col].extend(data[key])
1866 else:
1867 self.data[col].append(data[key])
1868 self.__recompute_shape()
1870 def add(self, data, column=None, add_all=False):
1871 """Add data elements to successive columns.
1873 The current column is set behid the added columns.
1875 Parameters
1876 ----------
1877 data: float, int, str, etc. or list thereof or list of list thereof or dict or list of dict or TableData
1878 Data values to be appended to successive columns:
1879 - A single value is simply appended to the specified
1880 column of the table.
1881 - A 1D-list of values is appended to successive columns of the table
1882 starting with the specified column.
1883 - The columns (second index) of a 2D-list of values are
1884 appended to successive columns of the table starting
1885 with the specified column.
1886 - Values of a dictionary or of a list of dictionaries are
1887 added to the columns specified by the keys. Dictionary
1888 values can also be lists of values. Their values are
1889 added to successive rows of the columns specified by the
1890 dictionary keys. Does not affect the current column.
1891 - All elements of a TableData are added to matching columns.
1892 Does not affect the current column.
1893 column: None, int, or str
1894 The first column to which the data should be appended,
1895 if `data` does not specify columns.
1896 If None, append to the current column.
1897 See self.index() for more information on how to specify a column.
1898 add_all: bool
1899 If the data are given as dictionaries or TableData, then
1900 only data of columns that already exist in the table are
1901 added to the table. If the table is empty or `add_all` is
1902 set to `True` then all data is added and if necessary new
1903 columns are appended to the table.
1904 """
1905 if self.shape[1] == 0:
1906 add_all = True
1907 column = self.index(column)
1908 if column is None:
1909 column = self.setcol
1910 if isinstance(data, TableData):
1911 self._add_table_data(data, add_all)
1912 elif isinstance(data, (list, tuple, np.ndarray)) and not \
1913 (isinstance(data, np.ndarray) and len(data.shape) == 0):
1914 if len(data) > 0 and \
1915 isinstance(data[0], (list, tuple, np.ndarray)) and not \
1916 (isinstance(data[0], np.ndarray) and len(data[0].shape) == 0):
1917 # 2D list, rows first:
1918 for row in data:
1919 for i, val in enumerate(row):
1920 self.data[column + i].append(val)
1921 self.setcol = column + len(data[0])
1922 elif len(data) > 0 and isinstance(data[0], dict):
1923 # list of dictionaries:
1924 for d in data:
1925 self._add_dict(d, add_all)
1926 else:
1927 # 1D list:
1928 for val in data:
1929 self.data[column].append(val)
1930 column += 1
1931 self.setcol = column
1932 elif isinstance(data, dict):
1933 # dictionary with values:
1934 self._add_dict(data, add_all)
1935 else:
1936 # single value:
1937 self.data[column].append(data)
1938 self.setcol = column + 1
1939 if self.setcol >= len(self.data):
1940 self.setcol = 0
1941 self.__recompute_shape()
1943 def append_data_column(self, data, column=None):
1944 """Append data elements to a column.
1946 The current column is incremented by one.
1948 Parameters
1949 ----------
1950 data: float, int, str, etc. or list thereof
1951 Data values to be appended to a column.
1952 column: None, int, or str
1953 The column to which the data should be appended.
1954 If None, append to the current column.
1955 See self.index() for more information on how to specify a column.
1956 """
1957 column = self.index(column)
1958 if column is None:
1959 column = self.setcol
1960 if isinstance(data, (list, tuple, np.ndarray)):
1961 self.data[column].extend(data)
1962 column += 1
1963 self.setcol = column
1964 else:
1965 self.data[column].append(data)
1966 self.setcol = column+1
1967 if self.setcol >= len(self.data):
1968 self.setcol = 0
1969 self.__recompute_shape()
1971 def set_column(self, column):
1972 """Set the column where to add data.
1974 Parameters
1975 ----------
1976 column: int or str
1977 The column to which data elements should be appended.
1978 See self.index() for more information on how to specify a column.
1980 Raises
1981 ------
1982 IndexError:
1983 If an invalid column was specified.
1984 """
1985 col = self.index(column)
1986 if col is None:
1987 if isinstance(column, (np.integer, int)):
1988 column = '%d' % column
1989 raise IndexError('column ' + column + ' not found or invalid')
1990 self.setcol = col
1991 return col
1993 def fill_data(self):
1994 """Fill up all columns with missing data to have the same number of
1995 data elements.
1996 """
1997 # maximum rows:
1998 maxr = self.rows()
1999 # fill up:
2000 for c in range(len(self.data)):
2001 while len(self.data[c]) < maxr:
2002 self.data[c].append(np.nan)
2003 self.setcol = 0
2004 self.__recompute_shape()
2006 def clear_data(self):
2007 """Clear content of the table but keep header.
2008 """
2009 for c in range(len(self.data)):
2010 self.data[c] = []
2011 self.setcol = 0
2012 self.__recompute_shape()
2014 def clear(self):
2015 """Clear the table of any content and header information.
2016 """
2017 self.ndim = 2
2018 self.size = 0
2019 self.shape = (0, 0)
2020 self.nsecs = 0
2021 self.header = []
2022 self.units = []
2023 self.formats = []
2024 self.descriptions = []
2025 self.data = []
2026 self.hidden = []
2027 self.setcol = 0
2028 self.addcol = 0
2030 def sort(self, columns, reverse=False):
2031 """Sort the table rows in place.
2033 Parameters
2034 ----------
2035 columns: int or str or list of int or str
2036 A column specifier or a list of column specifiers of the columns
2037 to be sorted.
2038 reverse: boolean
2039 If `True` sort in descending order.
2041 Raises
2042 ------
2043 IndexError:
2044 If an invalid column was specified.
2045 """
2046 # fix columns:
2047 if not isinstance(columns, (list, tuple, np.ndarray)):
2048 columns = [ columns ]
2049 if not columns:
2050 return
2051 cols = []
2052 for col in columns:
2053 c = self.index(col)
2054 if c is None:
2055 if isinstance(col, (np.integer, int)):
2056 col = '%d' % col
2057 raise IndexError('sort column ' + col + ' not found')
2058 continue
2059 cols.append(c)
2060 # get sorted row indices:
2061 row_inx = range(self.rows())
2062 row_inx = sorted(row_inx, key=lambda x : [float('-inf') if self.data[c][x] is np.nan \
2063 or self.data[c][x] != self.data[c][x] \
2064 else self.data[c][x] for c in cols], reverse=reverse)
2065 # sort table according to indices:
2066 for c in range(self.columns()):
2067 self.data[c] = [self.data[c][r] for r in row_inx]
2069 def key_value(self, row, col, missing=default_missing_str):
2070 """A data element returned as a key-value pair.
2072 Parameters
2073 ----------
2074 row: int
2075 Specifies the row from which the data element should be retrieved.
2076 col: None, int, or str
2077 A specification of a column.
2078 See self.index() for more information on how to specify a column.
2079 missing: str
2080 String indicating non-existing data elements.
2082 Returns
2083 -------
2084 key: str
2085 Header label of the column
2086 value: str
2087 A textual representation of the data element according to the format
2088 of the column, followed by the unit of the column.
2089 """
2090 col = self.index(col)
2091 if col is None:
2092 return ''
2093 if isinstance(self.data[col][row], (float, np.floating)) and m.isnan(self.data[col][row]):
2094 v = missing
2095 else:
2096 u = ''
2097 if not self.units[col] in '1-' and self.units[col] != 'a.u.':
2098 u = self.units[col]
2099 v = (self.formats[col] % self.data[col][row]) + u
2100 return self.header[col][0], v
2102 def _aggregate(self, funcs, columns=None, label=None,
2103 numbers_only=False, remove_nans=False, single_row=False,
2104 keep_columns=None):
2105 """Apply functions to columns.
2107 Parameter
2108 ---------
2109 funcs: function, list of function, dict
2110 Functions that are applied to columns of the table.
2111 - a single function that is applied to the `columns`.
2112 The results are named according to the function's `__name__`.
2113 - a list or tuple of functions.
2114 The results are named according to the functions' `__name__`.
2115 - a dictionary. The results are named after the provided keys,
2116 the functions are given by the values.
2117 If the function returns more than one value, then the
2118 corresponding key in the dictionary needs to be a tuple
2119 (not a list!) of names for each of the returned values.
2120 Functions in lists or dictionaries can be just a plain
2121 function, like `max` or `np.mean`. In case a function
2122 needs further arguments, then you need to supply a tuple
2123 with the first elements being the function, the second
2124 element being another tuple holding positional arguments,
2125 and an optional third argument holding a dictionary for
2126 key-word arguments.
2127 columns: None, int or str or list of int or str
2128 Columns of the table on which functions are applied.
2129 If None apply functions on all columns.
2130 label: str or list of str
2131 Column label and optional section names of the first
2132 column with the function labels (if `single_row` is `False`).
2133 numbers_only: bool
2134 If True, skip columns that do not contain numbers.
2135 remove_nans: bool
2136 If True, remove nans before passing column values to function.
2137 single_row: bool
2138 If False, add for each function a row to the table.
2139 If True, add function values in a single row.
2140 keep_columns: None, int or str or list of int or str
2141 Columns of the table from which to simply keep the first value.
2142 Only if single_row is True. Usefull for grouped tables.
2143 Order of columns and keep_columns are kept from the original table.
2145 Returns
2146 -------
2147 dest: TableData
2148 A new table with the column headers specified by `columns`.
2149 A first column is inserted with the function labels.
2150 The functions are applied to all columns specified by `columns`
2151 and their return values are written into the new table.
2153 """
2154 # standardize functions dictionary:
2155 if not isinstance(funcs, (list, tuple, dict)):
2156 funcs = [funcs]
2157 if isinstance(funcs, (list, tuple)):
2158 fs = {}
2159 for f in funcs:
2160 fs[f.__name__] = f
2161 funcs = fs
2162 fs = {}
2163 for k in funcs:
2164 kk = k
2165 if not isinstance(k, tuple):
2166 kk = (k,)
2167 v = funcs[k]
2168 if not isinstance(v, tuple):
2169 v = (funcs[k], (), {})
2170 elif len(v) < 3:
2171 v = (v[0], v[1], {})
2172 fs[kk] = v
2173 funcs = fs
2174 # standardize columns:
2175 if columns is None:
2176 columns = list(range(self.shape[1]))
2177 if not isinstance(columns, (list, tuple, np.ndarray)):
2178 columns = [columns]
2179 if numbers_only:
2180 cols = []
2181 for c in columns:
2182 c = self.index(c)
2183 if len(self.data[c]) > 0 and \
2184 isinstance(self.data[c][0], (float, int, np.floating, np.integer)):
2185 cols.append(c)
2186 columns = cols
2187 if label is None:
2188 label = 'property'
2189 dest = TableData()
2190 if single_row:
2191 if keep_columns is None:
2192 keep_columns = []
2193 elif not isinstance(keep_columns, (list, tuple)):
2194 keep_columns = [keep_columns]
2195 keep_columns = [self.index(c) for c in keep_columns]
2196 columns = [self.index(c) for c in columns]
2197 columns = [c for c in columns if not c in keep_columns]
2198 keep = np.zeros(len(keep_columns) + len(columns), dtype=bool)
2199 keep[:len(keep_columns)] = True
2200 columns = keep_columns + columns
2201 idx = np.argsort(columns)
2202 for i in idx:
2203 c = columns[i]
2204 if keep[i]:
2205 name, unit, format, descr = self.column_head(c, secs=True)
2206 dest.append(name + ['-'], unit, format, descr,
2207 value=self.data[c][0])
2208 else:
2209 name, unit, format, descr = self.column_head(c, secs=True)
2210 values = self[:, c]
2211 if remove_nans:
2212 values = values[np.isfinite(values)]
2213 for k in funcs:
2214 v = funcs[k][0](values, *funcs[k][1], **funcs[k][2])
2215 if len(k) == 1:
2216 dest.append(name + [k[0]], unit, format, descr,
2217 value=v)
2218 else:
2219 for j in range(len(k)):
2220 dest.append(name + [k[j]], unit, format, descr,
2221 value=v[j])
2222 dest.fill_data()
2223 else:
2224 dest.append(label, '', '%-s')
2225 for c in columns:
2226 dest.append(*self.column_head(c, secs=True))
2227 for k in funcs:
2228 for j in range(len(k)):
2229 dest.add(k[j], 0)
2230 for i, c in enumerate(columns):
2231 values = self[:, c]
2232 if remove_nans:
2233 values = values[np.isfinite(values)]
2234 v = funcs[k][0](values, *funcs[k][1], **funcs[k][2])
2235 if len(k) == 1:
2236 dest.add(v, i + 1)
2237 else:
2238 for j in range(len(k)):
2239 dest.add(v[j], i + 1)
2240 dest.fill_data()
2241 return dest
2243 def aggregate(self, funcs, columns=None, label=None,
2244 numbers_only=False, remove_nans=False,
2245 single_row=False, by=None):
2246 """Apply functions to columns.
2248 Parameter
2249 ---------
2250 funcs: function, list of function, dict
2251 Functions that are applied to columns of the table.
2252 - a single function that is applied to the `columns`.
2253 The results are named according to the function's `__name__`.
2254 - a list or tuple of functions.
2255 The results are named according to the functions' `__name__`.
2256 - a dictionary. The results are named after the provided keys,
2257 the functions are given by the values.
2258 If the function returns more than one value, then the
2259 corresponding key in the dictionary needs to be a tuple
2260 (not a list!) of names for each of the returned values.
2261 Functions in lists or dictionaries can be just a plain
2262 function, like `max` or `np.mean`. In case a function
2263 needs further arguments, then you need to supply a tuple
2264 with the first elements being the function, the second
2265 element being another tuple holding positional arguments,
2266 and an optional third argument holding a dictionary for
2267 key-word arguments.
2268 columns: None, int or str or list of int or str
2269 Columns of the table on which functions are applied.
2270 If None apply functions on all columns.
2271 label: str or list of str
2272 Column label and optional section names of the first
2273 column with the function labels (if `single_row` is `False`).
2274 numbers_only: bool
2275 If True, skip columns that do not contain numbers.
2276 remove_nans: bool
2277 If True, remove nans before passing column values to function.
2278 single_row: bool
2279 If False, add for each function a row to the table.
2280 If True, add function values in a single row.
2281 by: None, int or str or list of int or str
2282 Group the table by the specified columns and apply the functions
2283 to each resulting sub-table separately.
2285 Returns
2286 -------
2287 dest: TableData
2288 A new table with the column headers specified by `columns`.
2289 A first column is inserted with the function labels
2290 (if not `single_row`).
2291 The functions are applied to all columns specified by `columns`
2292 and their return values are written into the new table.
2293 """
2294 if by is not None:
2295 # aggregate on grouped table:
2296 if not isinstance(by, (list, tuple)):
2297 by = [by]
2298 if len(by) > 0:
2299 gd = TableData()
2300 for name, values in self.groupby(*by):
2301 ad = values._aggregate(funcs, columns, label,
2302 numbers_only=numbers_only,
2303 remove_nans=remove_nans,
2304 single_row=True, keep_columns=by)
2305 gd.add(ad)
2306 return gd
2307 # aggregate on whole table:
2308 return self._aggregate(funcs, columns, label,
2309 numbers_only=numbers_only,
2310 remove_nans=remove_nans,
2311 single_row=single_row,
2312 keep_columns=None)
2314 def statistics(self, columns=None, label=None,
2315 remove_nans=False, single_row=False, by=None):
2316 """Descriptive statistics of each column.
2318 Parameter
2319 ---------
2320 columns: None, int or str or list of int or str
2321 Columns of the table on which statistics should be computed.
2322 If None apply functions on all columns.
2323 label: str or list of str
2324 Column label and optional section names of the first
2325 column with the function labels (if `single_row` is `False`).
2326 remove_nans: bool
2327 If True, remove nans before passing column values to function.
2328 single_row: bool
2329 If False, add for each function a row to the table.
2330 If True, add function values in a single row.
2331 by: None, int or str or list of int or str
2332 Group the table by the specified columns and compute statistics
2333 to each resulting sub-table separately.
2335 Returns
2336 -------
2337 dest: TableData
2338 A new table with the column headers specified by `columns`.
2339 For each column that contains numbers some basic
2340 descriptive statistics is computed.
2341 """
2342 if label is None:
2343 label = 'statistics'
2344 funcs = {'mean': np.mean,
2345 'std': np.std,
2346 'min': np.min,
2347 ('quartile1', 'median', 'quartile2'):
2348 (np.quantile, ([0.25, 0.5, 0.75],)),
2349 'max': np.max,
2350 'count': len}
2351 ds = self.aggregate(funcs, columns, label,
2352 numbers_only=True,
2353 remove_nans=remove_nans,
2354 single_row=single_row, by=by)
2355 if by is not None:
2356 if not isinstance(by, (list, tuple)):
2357 by = [by]
2358 if len(by) > 0:
2359 single_row = True
2360 c0 = 0
2361 if not single_row:
2362 ds.set_format(0, '%-10s')
2363 c0 = 1
2364 for c in range(c0, ds.shape[1]):
2365 f = ds.formats[c]
2366 if single_row and ds.label(c) == 'count':
2367 ds.set_unit(c, '')
2368 ds.set_format(c, '%d')
2369 elif f[-1] in 'fge':
2370 i0 = f.find('.')
2371 if i0 > 0:
2372 p = int(f[i0 + 1:-1])
2373 f = f'{f[:i0 + 1]}{p + 1}{f[-1]}'
2374 ds.set_format(c, f)
2375 else:
2376 ds.set_format(c, '%.1f')
2377 return ds
2379 def groupby(self, *columns):
2380 """ Iterate through unique values of a column.
2382 Parameter
2383 ---------
2384 columns: int or str
2385 One or several columns used to group the data.
2386 See self.index() for more information on how to specify a column.
2388 Yields
2389 ------
2390 values: float or str or tuple of float or str
2391 The values of the specified columns.
2392 data: TableData
2393 The sub table where the specified columns equals `values`.
2394 """
2395 # check column indices and values:
2396 cols = []
2397 vals = []
2398 for col in columns:
2399 c = self.index(col)
2400 if c is None:
2401 raise StopIteration
2402 cols.append(c)
2403 vals.append(np.unique(self.data[c]))
2404 for values in product(*vals):
2405 mask = np.ones(len(self), dtype=bool)
2406 for c, v in zip(cols, values):
2407 mask &= self[:, c] == v
2408 if len(cols) == 1:
2409 yield values[0], self[mask]
2410 else:
2411 yield values, self[mask]
2413 def hide(self, column):
2414 """Hide a column or a range of columns.
2416 Hidden columns will not be printed out by the write() function.
2418 Parameters
2419 ----------
2420 column: int or str
2421 The column to be hidden.
2422 See self.index() for more information on how to specify a column.
2423 """
2424 c0, c1 = self.find_col(column)
2425 if c0 is not None:
2426 for c in range(c0, c1):
2427 self.hidden[c] = True
2429 def hide_all(self):
2430 """Hide all columns.
2432 Hidden columns will not be printed out by the write() function.
2433 """
2434 for c in range(len(self.hidden)):
2435 self.hidden[c] = True
2437 def hide_empty_columns(self, missing=default_missing_inputs):
2438 """Hide all columns that do not contain data.
2440 Hidden columns will not be printed out by the write() function.
2442 Parameters
2443 ----------
2444 missing: list of str
2445 Strings indicating missing data.
2446 """
2447 for c in range(len(self.data)):
2448 # check for empty column:
2449 isempty = True
2450 for v in self.data[c]:
2451 if isinstance(v, (float, np.floating)):
2452 if not m.isnan(v):
2453 isempty = False
2454 break
2455 else:
2456 if not v in missing:
2457 isempty = False
2458 break
2459 if isempty:
2460 self.hidden[c] = True
2462 def show(self, column):
2463 """Show a column or a range of columns.
2465 Undoes hiding of a column.
2467 Parameters
2468 ----------
2469 column: int or str
2470 The column to be shown.
2471 See self.index() for more information on how to specify a column.
2472 """
2473 c0, c1 = self.find_col(column)
2474 if c0 is not None:
2475 for c in range(c0, c1):
2476 self.hidden[c] = False
2478 def write(self, fh=sys.stdout, table_format=None, delimiter=None,
2479 unit_style=None, column_numbers=None, sections=None,
2480 align_columns=None, shrink_width=True,
2481 missing=default_missing_str, center_columns=False,
2482 latex_unit_package=None, latex_label_command='',
2483 latex_merge_std=False, descriptions_name='-description',
2484 section_headings=None, maxc=80):
2485 """Write the table to a file or stream.
2487 Parameters
2488 ----------
2489 fh: filename or stream
2490 If not a stream, the file with path `fh` is opened.
2491 If `fh` does not have an extension,
2492 the `table_format` is appended as an extension.
2493 Otherwise `fh` is used as a stream for writing.
2494 table_format: None or str
2495 The format to be used for output.
2496 One of 'out', 'dat', 'ascii', 'csv', 'rtai', 'md', 'tex', 'html'.
2497 If None or 'auto' then the format is set to the extension of the
2498 filename given by `fh`. If the filename does not have an extension
2499 `fh` is set to 'csv'. if `fh` is a stream the format is set
2500 to 'out'.
2501 delimiter: str
2502 String or character separating columns, if supported by the
2503 `table_format`.
2504 If None or 'auto' use the default for the specified `table_format`.
2505 unit_style: None or str
2506 - None or 'auto': use default of the specified `table_format`.
2507 - 'row': write an extra row to the table header specifying the
2508 units of the columns.
2509 - 'header': add the units to the column headers.
2510 - 'none': do not specify the units.
2511 column_numbers: str or None
2512 Add a row specifying the column index:
2513 - 'index': indices are integers, first column is 0.
2514 - 'num': indices are integers, first column is 1.
2515 - 'aa': use 'a', 'b', 'c', ..., 'z', 'aa', 'ab', ... for indexing
2516 - 'aa': use 'A', 'B', 'C', ..., 'Z', 'AA', 'AB', ... for indexing
2517 - None or 'none': do not add a row with column indices
2518 TableData.column_numbering is a list with the supported styles.
2519 sections: None or int
2520 Number of section levels to be printed.
2521 If `None` or 'auto' use default of selected `table_format`.
2522 align_columns: boolean
2523 - `True`: set width of column formats to make them align.
2524 - `False`: set width of column formats to 0 - no unnecessary spaces.
2525 - None or 'auto': Use default of the selected `table_format`.
2526 shrink_width: boolean
2527 If `True` disregard width specified by the format strings,
2528 such that columns can become narrower.
2529 missing: str
2530 Indicate missing data by this string.
2531 center_columns: boolean
2532 If True center all columns (markdown, html, and latex).
2533 latex_unit_package: None or 'siunitx' or 'SIunit'
2534 Translate units for the specified LaTeX package.
2535 If None set sub- and superscripts in text mode.
2536 If 'siunitx', also use `S` columns for numbers to align
2537 them on the decimal point.
2538 latex_label_command: str
2539 LaTeX command for formatting header labels.
2540 E.g. 'textbf' for making the header labels bold.
2541 latex_merge_std: str
2542 Merge header of columns with standard deviations with
2543 previous column (LaTeX tables only), but separate them
2544 with $\\pm$. Valid labels for standrad deviations are
2545 listed in `TableData.stdev_labels`.
2546 descriptions_name: None or str
2547 If not None and if `fh` is a file path, then write the column
2548 descriptions to a file with the same name as `fh`, but with
2549 `descriptions_name` appended.
2550 section_headings: None or int
2551 How to write treat header sections in the column descriptions.
2552 If set, set header sections as headings with the top-level
2553 section at the level as specified. 0 is the top level.
2554 If False, just produce a nested list.
2555 maxc: int
2556 Maximum character count for each line in the column descriptions.
2558 Returns
2559 -------
2560 file_name: Path or None
2561 The full name of the file into which the data were written.
2563 Supported file formats
2564 ----------------------
2566 ## `dat`: data text file
2567 ``` plain
2568 # info reaction
2569 # size weight delay jitter
2570 # m kg ms mm
2571 2.34 123 98.7 23
2572 56.70 3457 54.3 45
2573 8.90 43 67.9 345
2574 ```
2576 ## `ascii`: ascii-art table
2577 ``` plain
2578 |---------------------------------|
2579 | info | reaction |
2580 | size | weight | delay | jitter |
2581 | m | kg | ms | mm |
2582 |-------|--------|-------|--------|
2583 | 2.34 | 123 | 98.7 | 23 |
2584 | 56.70 | 3457 | 54.3 | 45 |
2585 | 8.90 | 43 | 67.9 | 345 |
2586 |---------------------------------|
2587 ```
2589 ## `csv`: comma separated values
2590 ``` plain
2591 size/m,weight/kg,delay/ms,jitter/mm
2592 2.34,123,98.7,23
2593 56.70,3457,54.3,45
2594 8.90,43,67.9,345
2595 ```
2597 ## `rtai`: rtai-style table
2598 ``` plain
2599 RTH| info | reaction
2600 RTH| size | weight| delay| jitter
2601 RTH| m | kg | ms | mm
2602 RTD| 2.34| 123| 98.7| 23
2603 RTD| 56.70| 3457| 54.3| 45
2604 RTD| 8.90| 43| 67.9| 345
2605 ```
2607 ## `md`: markdown
2608 ``` plain
2609 | size/m | weight/kg | delay/ms | jitter/mm |
2610 |------:|-------:|------:|-------:|
2611 | 2.34 | 123 | 98.7 | 23 |
2612 | 56.70 | 3457 | 54.3 | 45 |
2613 | 8.90 | 43 | 67.9 | 345 |
2614 ```
2616 ## `tex`: latex tabular
2617 ``` tex
2618 \\begin{tabular}{rrrr}
2619 \\hline
2620 \\multicolumn{2}{l}{info} & \\multicolumn{2}{l}{reaction} \\
2621 \\multicolumn{1}{l}{size} & \\multicolumn{1}{l}{weight} & \\multicolumn{1}{l}{delay} & \\multicolumn{1}{l}{jitter} \\
2622 \\multicolumn{1}{l}{m} & \\multicolumn{1}{l}{kg} & \\multicolumn{1}{l}{ms} & \\multicolumn{1}{l}{mm} \\
2623 \\hline
2624 2.34 & 123 & 98.7 & 23 \\
2625 56.70 & 3457 & 54.3 & 45 \\
2626 8.90 & 43 & 67.9 & 345 \\
2627 \\hline
2628 \\end{tabular}
2629 ```
2631 ## `html`: html
2632 ``` html
2633 <table>
2634 <thead>
2635 <tr class="header">
2636 <th align="left" colspan="2">info</th>
2637 <th align="left" colspan="2">reaction</th>
2638 </tr>
2639 <tr class="header">
2640 <th align="left">size</th>
2641 <th align="left">weight</th>
2642 <th align="left">delay</th>
2643 <th align="left">jitter</th>
2644 </tr>
2645 <tr class="header">
2646 <th align="left">m</th>
2647 <th align="left">kg</th>
2648 <th align="left">ms</th>
2649 <th align="left">mm</th>
2650 </tr>
2651 </thead>
2652 <tbody>
2653 <tr class"odd">
2654 <td align="right">2.34</td>
2655 <td align="right">123</td>
2656 <td align="right">98.7</td>
2657 <td align="right">23</td>
2658 </tr>
2659 <tr class"even">
2660 <td align="right">56.70</td>
2661 <td align="right">3457</td>
2662 <td align="right">54.3</td>
2663 <td align="right">45</td>
2664 </tr>
2665 <tr class"odd">
2666 <td align="right">8.90</td>
2667 <td align="right">43</td>
2668 <td align="right">67.9</td>
2669 <td align="right">345</td>
2670 </tr>
2671 </tbody>
2672 </table>
2673 ```
2675 """
2676 # fix parameter:
2677 if table_format is not None:
2678 table_format = table_format.lower()
2679 if table_format == 'auto':
2680 table_format = None
2681 if delimiter == 'auto':
2682 delimiter = None
2683 if unit_style == 'auto':
2684 unit_style = None
2685 if column_numbers == 'none':
2686 column_numbers = None
2687 if sections == 'auto':
2688 sections = None
2689 if align_columns == 'auto':
2690 align_columns = None
2691 # open file:
2692 own_file = False
2693 file_name = None
2694 if not hasattr(fh, 'write'):
2695 fh = Path(fh)
2696 ext = fh.suffix
2697 if table_format is None:
2698 if len(ext) > 1 and ext[1:] in self.ext_formats:
2699 table_format = self.ext_formats[ext[1:]]
2700 else:
2701 table_format = 'csv'
2702 if not ext or not ext[1:].lower() in self.ext_formats:
2703 fh = fh.with_suffix('.' + self.extensions[table_format])
2704 file_name = fh
2705 try:
2706 fh = open(os.fspath(fh), 'w')
2707 except AttributeError:
2708 fh = open(str(fh), 'w')
2709 own_file = True
2710 if table_format is None:
2711 table_format = 'out'
2712 # set style:
2713 if table_format[0] == 'd':
2714 align_columns = True
2715 begin_str = ''
2716 end_str = ''
2717 header_start = '# '
2718 header_sep = ' '
2719 header_close = ''
2720 header_end = '\n'
2721 data_start = ' '
2722 data_sep = ' '
2723 data_close = ''
2724 data_end = '\n'
2725 top_line = False
2726 header_line = False
2727 bottom_line = False
2728 if delimiter is not None:
2729 header_sep = delimiter
2730 data_sep = delimiter
2731 if sections is None:
2732 sections = 1000
2733 elif table_format[0] == 'a':
2734 align_columns = True
2735 begin_str = ''
2736 end_str = ''
2737 header_start = '| '
2738 header_sep = ' | '
2739 header_close = ''
2740 header_end = ' |\n'
2741 data_start = '| '
2742 data_sep = ' | '
2743 data_close = ''
2744 data_end = ' |\n'
2745 top_line = True
2746 header_line = True
2747 bottom_line = True
2748 if delimiter is not None:
2749 header_sep = delimiter
2750 data_sep = delimiter
2751 if sections is None:
2752 sections = 1000
2753 elif table_format[0] == 'c':
2754 # csv according to http://www.ietf.org/rfc/rfc4180.txt :
2755 column_numbers=None
2756 if unit_style is None:
2757 unit_style = 'header'
2758 if align_columns is None:
2759 align_columns = False
2760 begin_str = ''
2761 end_str = ''
2762 header_start=''
2763 header_sep = ','
2764 header_close = ''
2765 header_end='\n'
2766 data_start=''
2767 data_sep = ','
2768 data_close = ''
2769 data_end='\n'
2770 top_line = False
2771 header_line = False
2772 bottom_line = False
2773 if delimiter is not None:
2774 header_sep = delimiter
2775 data_sep = delimiter
2776 if sections is None:
2777 sections = 0
2778 elif table_format[0] == 'r':
2779 align_columns = True
2780 begin_str = ''
2781 end_str = ''
2782 header_start = 'RTH| '
2783 header_sep = '| '
2784 header_close = ''
2785 header_end = '\n'
2786 data_start = 'RTD| '
2787 data_sep = '| '
2788 data_close = ''
2789 data_end = '\n'
2790 top_line = False
2791 header_line = False
2792 bottom_line = False
2793 if sections is None:
2794 sections = 1000
2795 elif table_format[0] == 'm':
2796 if unit_style is None or unit_style == 'row':
2797 unit_style = 'header'
2798 align_columns = True
2799 begin_str = ''
2800 end_str = ''
2801 header_start='| '
2802 header_sep = ' | '
2803 header_close = ''
2804 header_end=' |\n'
2805 data_start='| '
2806 data_sep = ' | '
2807 data_close = ''
2808 data_end=' |\n'
2809 top_line = False
2810 header_line = True
2811 bottom_line = False
2812 if sections is None:
2813 sections = 0
2814 elif table_format[0] == 'h':
2815 align_columns = False
2816 begin_str = '<table>\n<thead>\n'
2817 end_str = '</tbody>\n</table>\n'
2818 if center_columns:
2819 header_start=' <tr>\n <th align="center"'
2820 header_sep = '</th>\n <th align="center"'
2821 else:
2822 header_start=' <tr>\n <th align="left"'
2823 header_sep = '</th>\n <th align="left"'
2824 header_close = '>'
2825 header_end='</th>\n </tr>\n'
2826 data_start=' <tr>\n <td'
2827 data_sep = '</td>\n <td'
2828 data_close = '>'
2829 data_end='</td>\n </tr>\n'
2830 top_line = False
2831 header_line = False
2832 bottom_line = False
2833 if sections is None:
2834 sections = 1000
2835 elif table_format[0] == 't':
2836 if align_columns is None:
2837 align_columns = False
2838 begin_str = '\\begin{tabular}'
2839 end_str = '\\end{tabular}\n'
2840 header_start=' '
2841 header_sep = ' & '
2842 header_close = ''
2843 header_end=' \\\\\n'
2844 data_start=' '
2845 data_sep = ' & '
2846 data_close = ''
2847 data_end=' \\\\\n'
2848 top_line = True
2849 header_line = True
2850 bottom_line = True
2851 if sections is None:
2852 sections = 1000
2853 else:
2854 if align_columns is None:
2855 align_columns = True
2856 begin_str = ''
2857 end_str = ''
2858 header_start = ''
2859 header_sep = ' '
2860 header_close = ''
2861 header_end = '\n'
2862 data_start = ''
2863 data_sep = ' '
2864 data_close = ''
2865 data_end = '\n'
2866 top_line = False
2867 header_line = False
2868 bottom_line = False
2869 if sections is None:
2870 sections = 1000
2871 # check units:
2872 if unit_style is None:
2873 unit_style = 'row'
2874 have_units = False
2875 for u in self.units:
2876 if u and u != '1' and u != '-':
2877 have_units = True
2878 break
2879 if not have_units:
2880 unit_style = 'none'
2881 # find std columns:
2882 stdev_col = np.zeros(len(self.header), dtype=bool)
2883 for c in range(len(self.header) - 1):
2884 if self.header[c+1][0].lower() in self.stdev_labels and \
2885 not self.hidden[c+1]:
2886 stdev_col[c] = True
2887 # begin table:
2888 fh.write(begin_str)
2889 if table_format[0] == 't':
2890 fh.write('{')
2891 merged = False
2892 for h, f, s in zip(self.hidden, self.formats, stdev_col):
2893 if merged:
2894 fh.write('l')
2895 merged = False
2896 continue
2897 if h:
2898 continue
2899 if latex_merge_std and s:
2900 fh.write('r@{$\\,\\pm\\,$}')
2901 merged = True
2902 elif center_columns:
2903 fh.write('c')
2904 elif f[1] == '-':
2905 fh.write('l')
2906 else:
2907 if latex_unit_package is not None and \
2908 latex_unit_package.lower() == 'siunitx':
2909 fh.write('S')
2910 else:
2911 fh.write('r')
2912 fh.write('}\n')
2913 # retrieve column formats and widths:
2914 widths = [] # width from format string
2915 widths_pos = [] # start and end index of width specifyer in format
2916 for c, f in enumerate(self.formats):
2917 w = 0
2918 # position of width specification:
2919 i0 = 1
2920 if len(f) > 1 and f[1] == '-' :
2921 i0 = 2
2922 i1 = f.find('.')
2923 if i1 < 0:
2924 i1 = i0
2925 while i1 < len(f) and f[i1].isdigit():
2926 i1 += 1
2927 if not shrink_width and i1 > i0:
2928 if f[i0:i1]:
2929 w = int(f[i0:i1])
2930 widths_pos.append((i0, i1))
2931 # adapt width to header label:
2932 hw = len(self.header[c][0])
2933 if unit_style == 'header' and self.units[c] and \
2934 self.units[c] != '1' and self.units[c] != '-':
2935 hw += 1 + len(self.units[c])
2936 if w < hw:
2937 w = hw
2938 # adapt width to data:
2939 if f[-1] == 's':
2940 for v in self.data[c]:
2941 if isinstance(v, str) and w < len(v):
2942 w = len(v)
2943 else:
2944 fs = f'{f[:i0]}0{f[i1:]}'
2945 for v in self.data[c]:
2946 if v is None or (isinstance(v, (float, np.floating)) and
2947 m.isnan(v)):
2948 s = missing
2949 else:
2950 try:
2951 s = fs % v
2952 except ValueError:
2953 s = missing
2954 except TypeError:
2955 s = str(v)
2956 if w < len(s):
2957 w = len(s)
2958 widths.append(w)
2959 # adapt width to sections:
2960 sec_indices = [0] * self.nsecs # previous column with this level
2961 sec_widths = [0] * self.nsecs # total width of section level
2962 sec_columns = [0] * self.nsecs # number of columns in section level
2963 for c in range(len(self.header)):
2964 w = widths[c]
2965 for l in range(min(self.nsecs, sections)):
2966 if 1 + l < len(self.header[c]):
2967 if c > 0 and sec_columns[l] > 0 and \
2968 1 + l < len(self.header[sec_indices[l]]) and \
2969 len(self.header[sec_indices[l]][1 + l]) > sec_widths[l]:
2970 dw = len(self.header[sec_indices[l]][1 + l]) - sec_widths[l]
2971 nc = sec_columns[l]
2972 ddw = np.zeros(nc, dtype=int) + dw // nc
2973 ddw[:dw % nc] += 1
2974 wk = 0
2975 for ck in range(sec_indices[l], c):
2976 if not self.hidden[ck]:
2977 widths[ck] += ddw[wk]
2978 wk += 1
2979 sec_indices[l] = c
2980 sec_widths[l] = 0
2981 sec_columns[l] = 0
2982 if not self.hidden[c]:
2983 if sec_widths[l] > 0:
2984 sec_widths[l] += len(header_sep)
2985 sec_widths[l] += w
2986 sec_columns[l] += 1
2987 # set width of format string:
2988 formats = []
2989 for c, (f, w) in enumerate(zip(self.formats, widths)):
2990 formats.append(f'{f[:widths_pos[c][0]]}{w}{f[widths_pos[c][1]:]}')
2991 # top line:
2992 if top_line:
2993 if table_format[0] == 't':
2994 fh.write(' \\hline \\\\[-2ex]\n')
2995 else:
2996 first = True
2997 fh.write(header_start.replace(' ', '-'))
2998 for c in range(len(self.header)):
2999 if self.hidden[c]:
3000 continue
3001 if not first:
3002 fh.write('-'*len(header_sep))
3003 first = False
3004 fh.write(header_close)
3005 w = widths[c]
3006 fh.write(w*'-')
3007 fh.write(header_end.replace(' ', '-'))
3008 # section and column headers:
3009 nsec0 = self.nsecs - sections
3010 if nsec0 < 0:
3011 nsec0 = 0
3012 for ns in range(nsec0, self.nsecs+1):
3013 nsec = self.nsecs - ns
3014 first = True
3015 last = False
3016 merged = False
3017 fh.write(header_start)
3018 for c in range(len(self.header)):
3019 if nsec < len(self.header[c]):
3020 # section width and column count:
3021 sw = -len(header_sep)
3022 columns = 0
3023 if not self.hidden[c]:
3024 sw = widths[c]
3025 columns = 1
3026 for k in range(c+1, len(self.header)):
3027 if nsec < len(self.header[k]):
3028 break
3029 if self.hidden[k]:
3030 continue
3031 sw += len(header_sep) + widths[k]
3032 columns += 1
3033 else:
3034 last = True
3035 if len(header_end.strip()) == 0:
3036 sw = 0 # last entry needs no width
3037 if columns == 0:
3038 continue
3039 if not first and not merged:
3040 fh.write(header_sep)
3041 first = False
3042 if table_format[0] == 'c':
3043 sw -= len(header_sep)*(columns - 1)
3044 elif table_format[0] == 'h':
3045 if columns>1:
3046 fh.write(' colspan="%d"' % columns)
3047 elif table_format[0] == 't':
3048 if merged:
3049 merged = False
3050 continue
3051 if latex_merge_std and nsec == 0 and stdev_col[c]:
3052 merged = True
3053 fh.write('\\multicolumn{%d}{c}{' % (columns+1))
3054 elif center_columns:
3055 fh.write('\\multicolumn{%d}{c}{' % columns)
3056 else:
3057 fh.write('\\multicolumn{%d}{l}{' % columns)
3058 if latex_label_command:
3059 fh.write('\\%s{' % latex_label_command)
3060 fh.write(header_close)
3061 hs = self.header[c][nsec]
3062 if nsec == 0 and unit_style == 'header':
3063 if self.units[c] and self.units[c] != '1' and self.units[c] != '-':
3064 hs += '/' + self.units[c]
3065 if align_columns and not table_format[0] in 'th':
3066 f = '%%-%ds' % sw
3067 fh.write(f % hs)
3068 else:
3069 fh.write(hs)
3070 if table_format[0] == 'c':
3071 if not last:
3072 fh.write(header_sep*(columns - 1))
3073 elif table_format[0] == 't':
3074 if latex_label_command:
3075 fh.write('}')
3076 fh.write('}')
3077 fh.write(header_end)
3078 # units:
3079 if unit_style == 'row':
3080 first = True
3081 merged = False
3082 fh.write(header_start)
3083 for c in range(len(self.header)):
3084 if self.hidden[c] or merged:
3085 merged = False
3086 continue
3087 if not first:
3088 fh.write(header_sep)
3089 first = False
3090 fh.write(header_close)
3091 unit = self.units[c]
3092 if not unit:
3093 unit = '-'
3094 if table_format[0] == 't':
3095 if latex_merge_std and stdev_col[c]:
3096 merged = True
3097 fh.write('\\multicolumn{2}{c}{%s}' % latex_unit(unit, latex_unit_package))
3098 elif center_columns:
3099 fh.write('\\multicolumn{1}{c}{%s}' % latex_unit(unit, latex_unit_package))
3100 else:
3101 fh.write('\\multicolumn{1}{l}{%s}' % latex_unit(unit, latex_unit_package))
3102 else:
3103 if align_columns and not table_format[0] in 'h':
3104 f = '%%-%ds' % widths[c]
3105 fh.write(f % unit)
3106 else:
3107 fh.write(unit)
3108 fh.write(header_end)
3109 # column numbers:
3110 if column_numbers is not None:
3111 first = True
3112 fh.write(header_start)
3113 for c in range(len(self.header)):
3114 if self.hidden[c]:
3115 continue
3116 if not first:
3117 fh.write(header_sep)
3118 first = False
3119 fh.write(header_close)
3120 i = c
3121 if column_numbers == 'num':
3122 i = c+1
3123 aa = index2aa(c, 'a')
3124 if column_numbers == 'AA':
3125 aa = index2aa(c, 'A')
3126 if table_format[0] == 't':
3127 if column_numbers == 'num' or column_numbers == 'index':
3128 fh.write('\\multicolumn{1}{l}{%d}' % i)
3129 else:
3130 fh.write('\\multicolumn{1}{l}{%s}' % aa)
3131 else:
3132 if column_numbers == 'num' or column_numbers == 'index':
3133 if align_columns:
3134 f = '%%%dd' % widths[c]
3135 fh.write(f % i)
3136 else:
3137 fh.write('%d' % i)
3138 else:
3139 if align_columns:
3140 f = '%%-%ds' % widths[c]
3141 fh.write(f % aa)
3142 else:
3143 fh.write(aa)
3144 fh.write(header_end)
3145 # header line:
3146 if header_line:
3147 if table_format[0] == 'm':
3148 fh.write('|')
3149 for c in range(len(self.header)):
3150 if self.hidden[c]:
3151 continue
3152 w = widths[c]+2
3153 if center_columns:
3154 fh.write(':' + (w-2)*'-' + ':|')
3155 elif formats[c][1] == '-':
3156 fh.write(w*'-' + '|')
3157 else:
3158 fh.write((w - 1)*'-' + ':|')
3159 fh.write('\n')
3160 elif table_format[0] == 't':
3161 fh.write(' \\hline \\\\[-2ex]\n')
3162 else:
3163 first = True
3164 fh.write(header_start.replace(' ', '-'))
3165 for c in range(len(self.header)):
3166 if self.hidden[c]:
3167 continue
3168 if not first:
3169 fh.write(header_sep.replace(' ', '-'))
3170 first = False
3171 fh.write(header_close)
3172 w = widths[c]
3173 fh.write(w*'-')
3174 fh.write(header_end.replace(' ', '-'))
3175 # start table data:
3176 if table_format[0] == 'h':
3177 fh.write('</thead>\n<tbody>\n')
3178 # data:
3179 for k in range(self.rows()):
3180 first = True
3181 merged = False
3182 fh.write(data_start)
3183 for c, f in enumerate(formats):
3184 if self.hidden[c] or merged:
3185 merged = False
3186 continue
3187 if not first:
3188 fh.write(data_sep)
3189 first = False
3190 if table_format[0] == 'h':
3191 if center_columns:
3192 fh.write(' align="center"')
3193 elif f[1] == '-':
3194 fh.write(' align="left"')
3195 else:
3196 fh.write(' align="right"')
3197 fh.write(data_close)
3198 if k >= len(self.data[c]) or self.data[c][k] is None or \
3199 (isinstance(self.data[c][k], (float, np.floating)) and m.isnan(self.data[c][k])):
3200 # missing data:
3201 if table_format[0] == 't' and latex_merge_std and stdev_col[c]:
3202 merged = True
3203 fh.write('\\multicolumn{2}{c}{%s}' % missing)
3204 elif align_columns:
3205 if f[1] == '-':
3206 fn = '%%-%ds' % widths[c]
3207 else:
3208 fn = '%%%ds' % widths[c]
3209 fh.write(fn % missing)
3210 else:
3211 fh.write(missing)
3212 else:
3213 # data value:
3214 try:
3215 ds = f % self.data[c][k]
3216 except ValueError:
3217 ds = missing
3218 except TypeError:
3219 ds = str(self.data[c][k])
3220 if not align_columns:
3221 ds = ds.strip()
3222 fh.write(ds)
3223 fh.write(data_end)
3224 # bottom line:
3225 if bottom_line:
3226 if table_format[0] == 't':
3227 fh.write(' \\hline\n')
3228 else:
3229 first = True
3230 fh.write(header_start.replace(' ', '-'))
3231 for c in range(len(self.header)):
3232 if self.hidden[c]:
3233 continue
3234 if not first:
3235 fh.write('-'*len(header_sep))
3236 first = False
3237 fh.write(header_close)
3238 w = widths[c]
3239 fh.write(w*'-')
3240 fh.write(header_end.replace(' ', '-'))
3241 # end table:
3242 fh.write(end_str)
3243 # close file:
3244 if own_file:
3245 fh.close()
3246 # write descriptions:
3247 if file_name is not None and descriptions_name:
3248 write_descriptions = False
3249 for c in range(len(self.descriptions)):
3250 if self.descriptions[c]:
3251 write_descriptions = True
3252 if write_descriptions:
3253 descr_path = file_name.with_name(file_name.stem +
3254 descriptions_name)
3255 if table_format[0] not in 'th': # neither tex nore html
3256 table_format = 'md'
3257 if len(descr_path.suffix) <= 1:
3258 descr_path = descr_path.with_suffix('.' + self.extensions[table_format])
3259 self.write_descriptions(descr_path, table_format=table_format,
3260 sections=sections,
3261 section_headings=section_headings,
3262 latex_unit_package=latex_unit_package,
3263 maxc=maxc)
3264 # return file name:
3265 return file_name
3267 def write_file_stream(self, base_name, file_name, **kwargs):
3268 """Write table to file or stream and return appropriate file name.
3270 Parameters
3271 ----------
3272 base_name: str, Path, or stream
3273 If str, path and basename of file.
3274 `file_name` and an extension are appended.
3275 If stream, write table data into this stream.
3276 file_name: str
3277 Name of file that is appended to `base_name`.
3278 kwargs:
3279 Arguments passed on to `TableData.write()`.
3280 In particular, 'table_format' is used to set the file extension
3281 that is appended to the returned `file_name`.
3283 Returns
3284 -------
3285 file_name: Path
3286 Path and full name of the written file in case of `base_name`
3287 being a string. Otherwise, the file name and extension that
3288 should be appended to a base name.
3289 """
3290 if hasattr(base_name, 'write'):
3291 table_format = kwargs.get('table_format', None)
3292 if table_format is None or table_format == 'auto':
3293 table_format = 'csv'
3294 file_name = Path(file_name)
3295 file_name = file_name.with_suffix('.' + TableData.extensions[table_format])
3296 self.write(base_name, **kwargs)
3297 return file_name
3298 else:
3299 base_name = Path(base_name + file_name)
3300 file_name = self.write(base_name, **kwargs)
3301 return file_name
3303 def __str__(self):
3304 """Write table to a string.
3305 """
3306 stream = StringIO()
3307 self.write(stream, table_format='out')
3308 return stream.getvalue()
3310 def write_descriptions(self, fh=sys.stdout, table_format=None,
3311 sections=None, section_headings=None,
3312 latex_unit_package=None, maxc=80):
3313 """Write column descriptions of the table to a file or stream.
3315 Parameters
3316 ----------
3317 fh: filename or stream
3318 If not a stream, the file with path `fh` is opened.
3319 If `fh` does not have an extension,
3320 the `table_format` is appended as an extension.
3321 Otherwise `fh` is used as a stream for writing.
3322 table_format: None or str
3323 The format to be used for output.
3324 One of 'md', 'tex', or 'html'.
3325 If None or 'auto' then the format is set to the extension
3326 of the filename given by `fh`.
3327 If `fh` is a stream the format is set to 'md'.
3328 sections: None or int
3329 Number of section levels to be printed.
3330 If `None` or 'auto' use default of selected `table_format`.
3331 section_headings: None or int
3332 If set, set header sections as headings with the top-level
3333 section at the level as specified. 0 is the top level.
3334 If False, just produce a nested list.
3335 latex_unit_package: None or 'siunitx' or 'SIunit'
3336 Translate units for the specified LaTeX package.
3337 If None set sub- and superscripts in text mode.
3338 If 'siunitx', also use `S` columns for numbers to align
3339 them on the decimal point.
3340 maxc: int
3341 Maximum character count for each line.
3342 """
3343 # fix parameter:
3344 if table_format == 'auto':
3345 table_format = None
3346 if sections is None:
3347 sections = 1000
3348 nsecs = min(self.nsecs, sections)
3349 # open file:
3350 own_file = False
3351 file_name = None
3352 if not hasattr(fh, 'write'):
3353 fh = Path(fh)
3354 ext = fh.suffix
3355 if table_format is None:
3356 if len(ext) > 1 and ext[1:] in self.ext_formats:
3357 table_format = self.ext_formats[ext[1:]]
3358 elif not ext or not ext[1:].lower() in self.ext_formats:
3359 fh = fh.with_suffix('.' + self.extensions[table_format])
3360 file_name = fh
3361 try:
3362 fh = open(os.fspath(fh), 'w')
3363 except AttributeError:
3364 fh = open(str(fh), 'w')
3365 own_file = True
3366 if table_format is None:
3367 table_format = 'md'
3368 # write descriptions:
3369 headers = ['']*(1 + nsecs)
3370 prev_headers = ['']*(1 + nsecs)
3371 if table_format == 'md':
3372 for c in range(len(self.header)):
3373 headers[:len(self.header[c])] = self.header[c]
3374 if not self.hidden[c]:
3375 changed = False
3376 for k in reversed(range(nsecs)):
3377 if changed or prev_headers[k + 1] != headers[k + 1]:
3378 changed = True
3379 if section_headings is None:
3380 fh.write(f'{" "*2*(nsecs - k - 1)}- '
3381 f'{headers[k + 1]}\n')
3382 else:
3383 level = nsecs - k - 1 + section_headings + 1
3384 fh.write(f'\n{"#"*level} {headers[k + 1]}\n')
3385 prev_headers[k + 1] = headers[k + 1]
3386 indent = 2*nsecs if section_headings is None else 0
3387 fh.write(f'{" "*indent}- **{headers[0]}**')
3388 if self.units[c]:
3389 fh.write(f' [{self.units[c]}]')
3390 fh.write(' \n')
3391 break_text(fh, self.descriptions[c], maxc,
3392 indent=indent + 2)
3393 prev_headers[0] = headers[0]
3394 elif table_format == 'html':
3395 level = -1
3396 for c in range(len(self.header)):
3397 headers[:len(self.header[c])] = self.header[c]
3398 if not self.hidden[c]:
3399 changed = False
3400 for k in reversed(range(nsecs)):
3401 if changed or prev_headers[k + 1] != headers[k + 1]:
3402 new_level = nsecs - k - 1
3403 if not changed:
3404 if section_headings is None:
3405 while level > new_level:
3406 fh.write(f'{" "*2*level}</ul>\n')
3407 level -= 1
3408 elif level >= 0:
3409 fh.write(f'{" "*2*level}</ul>\n')
3410 level -= 1
3411 changed = True
3412 if section_headings is None:
3413 while level < new_level:
3414 level += 1
3415 fh.write(f'{" "*2*level}<ul>\n')
3416 fh.write(f'{" "*2*(level + 1)}<li><b>{headers[k + 1]}</b></li>\n')
3417 else:
3418 fh.write(f'\n<h{new_level + section_headings + 1}>{headers[k + 1]}</h{new_level + section_headings + 1}>\n')
3419 prev_headers[k + 1] = headers[k + 1]
3420 if changed:
3421 level += 1
3422 fh.write(f'{" "*2*level}<ul>\n')
3424 fh.write(f'{" "*2*(level + 1)}<li><b>{headers[0]}</b>')
3425 if self.units[c]:
3426 fh.write(f'[{self.units[c]}]')
3427 fh.write('<br>\n')
3428 break_text(fh, self.descriptions[c], maxc,
3429 indent=2*(level + 1))
3430 fh.write(f'{" "*2*(level + 1)}</li>\n')
3431 prev_headers[0] = headers[0]
3432 while level >= 0:
3433 fh.write(f'{" "*2*level}</ul>\n')
3434 level -= 1
3435 elif table_format == 'tex':
3436 headings = [r'\section', r'\subsection', r'\subsubsection',
3437 r'\paragraph', r'\subparagraph']
3438 level = -1
3439 for c in range(len(self.header)):
3440 headers[:len(self.header[c])] = self.header[c]
3441 if not self.hidden[c]:
3442 changed = False
3443 for k in reversed(range(nsecs)):
3444 if changed or prev_headers[k + 1] != headers[k + 1]:
3445 new_level = nsecs - k - 1
3446 if not changed:
3447 if section_headings is None:
3448 while level > new_level:
3449 fh.write(f'{" "*2*level}\\end{{enumerate}}\n')
3450 level -= 1
3451 elif level >= 0:
3452 fh.write(f'{" "*2*level}\\end{{enumerate}}\n')
3453 level -= 1
3454 changed = True
3455 if section_headings is None:
3456 while level < new_level:
3457 level += 1
3458 fh.write(f'{" "*2*level}\\begin{{enumerate}}\n')
3459 fh.write(f'{" "*2*(level + 1)}\\item \\textbf{{{headers[k + 1]}}}\n')
3460 else:
3461 fh.write(f'\n{headings[new_level + section_headings]}{{{headers[k + 1]}}}\n')
3462 prev_headers[k + 1] = headers[k + 1]
3463 if changed:
3464 level += 1
3465 fh.write(f'{" "*2*level}\\begin{{enumerate}}\n')
3466 fh.write(f'{" "*2*(level + 1)}\\item \\textbf{{{headers[0]}}}')
3467 if self.units[c]:
3468 fh.write(f' [{latex_unit(self.units[c], latex_unit_package)}]')
3469 fh.write('\n')
3470 break_text(fh, self.descriptions[c], maxc,
3471 indent=2*(level + 1))
3472 prev_headers[0] = headers[0]
3473 while level >= 0:
3474 fh.write(f'{" "*2*level}\\end{{enumerate}}\n')
3475 level -= 1
3476 else:
3477 raise ValueError(f'File format "{table_format}" not supported for writing column descriptions')
3478 # close file:
3479 if own_file:
3480 fh.close()
3481 # return file name:
3482 return file_name
3484 def load_descriptions(fh):
3485 """Load column descriptions from file or stream.
3487 Parameters
3488 ----------
3489 fh: str, Path, or stream
3490 If not a stream, the file with path `fh` is opened for reading.
3491 """
3492 # open file:
3493 own_file = False
3494 if not hasattr(fh, 'readline'):
3495 try:
3496 fh = open(os.fspath(fh), 'r')
3497 except AttributeError:
3498 fh = open(str(fh), 'r')
3499 own_file = True
3500 # read file:
3501 print('WARNING: load_descriptions() not implemented yet')
3502 for line in fh:
3503 if line[0] == '#':
3504 heading_level = len(line.split()[0])
3505 section_name = line[heading_level + 1:]
3506 elif line[0] == '-':
3507 lp = line.split('**')
3508 label = lp[1]
3509 unit = lp[-1].strip().lstrip('[').rstrip(']')
3510 # close file:
3511 if own_file:
3512 fh.close()
3514 def load(self, fh, missing=default_missing_inputs, sep=None, stop=None):
3515 """Load table from file or stream.
3517 File type and properties are automatically inferred.
3519 Parameters
3520 ----------
3521 fh: str, Path, or stream
3522 If not a stream, the file with path `fh` is opened for reading.
3523 missing: str or list of str
3524 Missing data are indicated by this string and
3525 are translated to np.nan.
3526 sep: str or None
3527 Column separator.
3528 stop: str or None
3529 If a line matches `stop`, stop reading the file. `stop`
3530 can be an empty string to stop reading at the first empty
3531 line.
3533 Raises
3534 ------
3535 FileNotFoundError:
3536 If `fh` is a path that does not exist.
3538 """
3540 def read_key_line(line, sep, table_format):
3541 if sep is None:
3542 cols, indices = zip(*[(m.group(0), m.start()) for m in re.finditer(r'( ?[\S]+)+(?=[ ][ ]+|\Z)', line.strip())])
3543 elif table_format == 'csv':
3544 cols, indices = zip(*[(c.strip(), i) for i, c in enumerate(line.strip().split(sep)) if c.strip()])
3545 else:
3546 seps = r'[^'+re.escape(sep)+']+'
3547 cols, indices = zip(*[(m.group(0), m.start()) for m in re.finditer(seps, line.strip())])
3548 colss = []
3549 indicess = []
3550 if table_format == 'tex':
3551 i = 0
3552 for c in cols:
3553 if 'multicolumn' in c:
3554 fields = c.split('{')
3555 n = int(fields[1].strip().rstrip('}').rstrip())
3556 colss.append(fields[3].strip().rstrip('}').rstrip())
3557 indicess.append(i)
3558 i += n
3559 else:
3560 colss.append(c.strip())
3561 indicess.append(i)
3562 i += 1
3563 else:
3564 for k, (c, i) in enumerate(zip(cols, indices)):
3565 if table_format != 'csv':
3566 if k == 0:
3567 c = c.lstrip('|')
3568 if k == len(cols) - 1:
3569 c = c.rstrip('|')
3570 cs = c.strip()
3571 if len(cs) >= 2 and cs[0] == '"' and cs[-1] == '"':
3572 cs = cs.strip('"')
3573 colss.append(cs)
3574 indicess.append(i)
3575 return colss, indicess
3577 def read_data_line(line, sep, post, precd, alld, numc, exped,
3578 fixed, strf, missing, nans):
3579 # read line:
3580 cols = []
3581 if sep is None:
3582 cols = [m.group(0) for m in re.finditer(r'\S+', line.strip())]
3583 else:
3584 if sep.isspace():
3585 seps = r'[^'+re.escape(sep)+']+'
3586 cols = [m.group(0) for m in re.finditer(seps, line.strip())]
3587 else:
3588 cols = line.split(sep)
3589 if len(cols) > 0 and len(cols[0]) == 0:
3590 cols = cols[1:]
3591 if len(cols) > 0 and len(cols[-1]) == 0:
3592 cols = cols[:-1]
3593 if len(cols) > 0:
3594 cols[0] = cols[0].lstrip('|').lstrip()
3595 cols[-1] = cols[-1].rstrip('|').rstrip()
3596 cols = [c.strip() for c in cols if c != '|']
3597 # read columns:
3598 for k, c in enumerate(cols):
3599 try:
3600 v = float(c)
3601 ad = 0
3602 ve = c.split('e')
3603 if len(ve) <= 1:
3604 exped[k] = False
3605 else:
3606 ad = len(ve[1])+1
3607 vc = ve[0].split('.')
3608 ad += len(vc[0])
3609 prec = len(vc[0].lstrip('-').lstrip('+').lstrip('0'))
3610 if len(vc) == 2:
3611 if numc[k] and post[k] != len(vc[1]):
3612 fixed[k] = False
3613 if post[k] < len(vc[1]):
3614 post[k] = len(vc[1])
3615 ad += len(vc[1])+1
3616 prec += len(vc[1].rstrip('0'))
3617 if precd[k] < prec:
3618 precd[k] = prec
3619 if alld[k] < ad:
3620 alld[k] = ad
3621 numc[k] = True
3622 except ValueError:
3623 if c in missing:
3624 v = np.nan
3625 nans[k] = c
3626 elif len(c) == 0 and not strf[k]:
3627 v = np.nan
3628 else:
3629 strf[k] = True
3630 if alld[k] < len(c):
3631 alld[k] = len(c)
3632 if len(c) >= 2 and c[0] == '"' and c[-1] == '"':
3633 v = c.strip('"')
3634 else:
3635 v = c
3636 self.add(v, k)
3637 self.fill_data()
3639 # initialize:
3640 if isinstance(missing, str):
3641 missing = [missing]
3642 self.data = []
3643 self.ndim = 2
3644 self.shape = (0, 0)
3645 self.header = []
3646 self.nsecs = 0
3647 self.units = []
3648 self.formats = []
3649 self.descriptions = []
3650 self.hidden = []
3651 self.setcol = 0
3652 self.addcol = 0
3653 # open file:
3654 own_file = False
3655 if not hasattr(fh, 'readline'):
3656 try:
3657 fh = open(os.fspath(fh), 'r')
3658 except AttributeError:
3659 fh = open(str(fh), 'r')
3660 own_file = True
3661 # read inital lines of file:
3662 key = []
3663 data = []
3664 target = data
3665 comment = False
3666 table_format='dat'
3667 for line in fh:
3668 line = line.rstrip()
3669 if line == stop:
3670 break;
3671 if line:
3672 if r'\begin{tabular' in line:
3673 table_format='tex'
3674 target = key
3675 continue
3676 if table_format == 'tex':
3677 if r'\end{tabular' in line:
3678 break
3679 if r'\hline' in line:
3680 if key:
3681 target = data
3682 continue
3683 line = line.rstrip(r'\\')
3684 if line[0] == '#':
3685 comment = True
3686 table_format='dat'
3687 target = key
3688 line = line.lstrip('#')
3689 elif comment:
3690 target = data
3691 if line[0:3] == 'RTH':
3692 target = key
3693 line = line[3:]
3694 table_format='rtai'
3695 elif line[0:3] == 'RTD':
3696 target = data
3697 line = line[3:]
3698 table_format='rtai'
3699 if (line[0:3] == '|--' or line[0:3] == '|:-') and \
3700 (line[-3:] == '--|' or line[-3:] == '-:|'):
3701 if not data and not key:
3702 table_format='ascii'
3703 target = key
3704 continue
3705 elif not key:
3706 table_format='md'
3707 key = data
3708 data = []
3709 target = data
3710 continue
3711 elif not data:
3712 target = data
3713 continue
3714 else:
3715 break
3716 target.append(line)
3717 else:
3718 break
3719 if len(data) > 5:
3720 break
3721 # find column separator of data and number of columns:
3722 col_seps = ['|', ',', ';', ':', '\t', '&', None]
3723 if sep is not None:
3724 col_seps = [sep]
3725 colstd = np.zeros(len(col_seps))
3726 colnum = np.zeros(len(col_seps), dtype=int)
3727 for k, sep in enumerate(col_seps):
3728 cols = []
3729 s = 5 if len(data) >= 8 else len(data) - 3
3730 if s < 0 or key:
3731 s = 0
3732 for line in data[s:]:
3733 cs = line.strip().split(sep)
3734 if not cs[0]:
3735 cs = cs[1:]
3736 if cs and not cs[-1]:
3737 cs = cs[:-1]
3738 cols.append(len(cs))
3739 colstd[k] = np.std(cols)
3740 colnum[k] = np.median(cols)
3741 if np.max(colnum) < 2:
3742 sep = None
3743 colnum = 1
3744 else:
3745 ci = np.where(np.array(colnum) > 1.5)[0]
3746 ci = ci[np.argmin(colstd[ci])]
3747 sep = col_seps[ci]
3748 colnum = int(colnum[ci])
3749 # fix key:
3750 if not key and sep is not None and sep in ',;:\t|':
3751 table_format = 'csv'
3752 # read key:
3753 key_cols = []
3754 key_indices = []
3755 for line in key:
3756 cols, indices = read_key_line(line, sep, table_format)
3757 key_cols.append(cols)
3758 key_indices.append(indices)
3759 if not key_cols:
3760 # no obviously marked table key:
3761 key_num = 0
3762 for line in data:
3763 cols, indices = read_key_line(line, sep, table_format)
3764 numbers = 0
3765 for c in cols:
3766 try:
3767 v = float(c)
3768 numbers += 1
3769 except ValueError:
3770 break
3771 if numbers == 0:
3772 key_cols.append(cols)
3773 key_indices.append(indices)
3774 key_num += 1
3775 else:
3776 break
3777 if len(key_cols) == len(data):
3778 key_num = 1
3779 key_cols = key_cols[:1]
3780 key_indices = key_indices[:1]
3781 colnum = len(key_cols[0])
3782 data = data[key_num:]
3783 kr = len(key_cols) - 1
3784 # check for key with column indices:
3785 if kr >= 0:
3786 cols = key_cols[kr]
3787 numrow = True
3788 try:
3789 pv = int(cols[0])
3790 for c in cols[1:]:
3791 v = int(c)
3792 if v != pv+1:
3793 numrow = False
3794 break
3795 pv = v
3796 except ValueError:
3797 try:
3798 pv = aa2index(cols[0])
3799 for c in cols[1:]:
3800 v = aa2index(c)
3801 if v != pv+1:
3802 numrow = False
3803 break
3804 pv = v
3805 except ValueError:
3806 numrow = False
3807 if numrow:
3808 kr -= 1
3809 # check for unit line:
3810 units = None
3811 if kr > 0 and len(key_cols[kr]) == len(key_cols[kr - 1]):
3812 units = key_cols[kr]
3813 kr -= 1
3814 # column labels:
3815 if kr >= 0:
3816 if units is None:
3817 # units may be part of the label:
3818 labels = []
3819 units = []
3820 for c in key_cols[kr]:
3821 if c[-1] == ')':
3822 lu = c[:-1].split('(')
3823 if len(lu) >= 2:
3824 labels.append(lu[0].strip())
3825 units.append('('.join(lu[1:]).strip())
3826 continue
3827 lu = c.split('/')
3828 if len(lu) >= 2:
3829 labels.append(lu[0].strip())
3830 units.append('/'.join(lu[1:]).strip())
3831 else:
3832 labels.append(c)
3833 units.append('')
3834 else:
3835 labels = key_cols[kr]
3836 indices = key_indices[kr]
3837 # init table columns:
3838 for k in range(colnum):
3839 self.append(labels[k], units[k], '%g')
3840 # read in sections:
3841 while kr > 0:
3842 kr -= 1
3843 for sec_label, sec_inx in zip(key_cols[kr], key_indices[kr]):
3844 col_inx = indices.index(sec_inx)
3845 self.header[col_inx].append(sec_label)
3846 if self.nsecs < len(self.header[col_inx]) - 1:
3847 self.nsecs = len(self.header[col_inx]) - 1
3848 # read data:
3849 post = np.zeros(colnum, dtype=int)
3850 precd = np.zeros(colnum, dtype=int)
3851 alld = np.zeros(colnum, dtype=int)
3852 numc = [False] * colnum
3853 exped = [True] * colnum
3854 fixed = [True] * colnum
3855 strf = [False] * colnum
3856 nans = [None] * colnum # for each column the missing string that was encountered.
3857 for line in data:
3858 read_data_line(line, sep, post, precd, alld, numc, exped, fixed,
3859 strf, missing, nans)
3860 # read remaining data:
3861 for line in fh:
3862 line = line.rstrip()
3863 if line == stop:
3864 break;
3865 if table_format == 'tex':
3866 if r'\end{tabular' in line or r'\hline' in line:
3867 break
3868 line = line.rstrip(r'\\')
3869 if (line[0:3] == '|--' or line[0:3] == '|:-') and \
3870 (line[-3:] == '--|' or line[-3:] == '-:|'):
3871 break
3872 if line[0:3] == 'RTD':
3873 line = line[3:]
3874 read_data_line(line, sep, post, precd, alld, numc, exped, fixed,
3875 strf, missing, nans)
3876 # set formats:
3877 for k in range(len(alld)):
3878 if strf[k]:
3879 self.set_format(k, '%%-%ds' % alld[k])
3880 # make sure all elements are strings:
3881 for i in range(len(self.data[k])):
3882 if self.data[k][i] is np.nan:
3883 self.data[k][i] = nans[k]
3884 else:
3885 self.data[k][i] = str(self.data[k][i])
3886 elif exped[k]:
3887 self.set_format(k, '%%%d.%de' % (alld[k], post[k]))
3888 elif fixed[k]:
3889 self.set_format(k, '%%%d.%df' % (alld[k], post[k]))
3890 else:
3891 self.set_format(k, '%%%d.%dg' % (alld[k], precd[k]))
3892 # close file:
3893 if own_file:
3894 fh.close()
3897def add_write_table_config(cfg, table_format=None, delimiter=None,
3898 unit_style=None, column_numbers=None,
3899 sections=None, align_columns=None,
3900 shrink_width=True, missing='-',
3901 center_columns=False,
3902 latex_label_command='',
3903 latex_merge_std=False):
3904 """Add parameter specifying how to write a table to a file as a new
3905section to a configuration.
3907 Parameters
3908 ----------
3909 cfg: ConfigFile
3910 The configuration.
3911 """
3913 cfg.add_section('File format for storing analysis results:')
3914 cfg.add('fileFormat', table_format or 'auto', '', 'Default file format used to store analysis results.\nOne of %s.' % ', '.join(TableData.formats))
3915 cfg.add('fileDelimiter', delimiter or 'auto', '', 'String used to separate columns or "auto".')
3916 cfg.add('fileUnitStyle', unit_style or 'auto', '', 'Add units as extra row ("row"), add units to header label separated by "/" ("header"), do not print out units ("none"), or "auto".')
3917 cfg.add('fileColumnNumbers', column_numbers or 'none', '', 'Add line with column indices ("index", "num", "aa", "AA", or "none")')
3918 cfg.add('fileSections', sections or 'auto', '', 'Maximum number of section levels or "auto"')
3919 cfg.add('fileAlignColumns', align_columns or 'auto', '', 'If True, write all data of a column using the same width, if False write the data without any white space, or "auto".')
3920 cfg.add('fileShrinkColumnWidth', shrink_width, '', 'Allow to make columns narrower than specified by the corresponding format strings.')
3921 cfg.add('fileMissing', missing, '', 'String used to indicate missing data values.')
3922 cfg.add('fileCenterColumns', center_columns, '', 'Center content of all columns instead of left align columns of strings and right align numbers (markdown, html, and latex).')
3923 cfg.add('fileLaTeXLabelCommand', latex_label_command, '', 'LaTeX command name for formatting column labels of the table header.')
3924 cfg.add('fileLaTeXMergeStd', latex_merge_std, '', 'Merge header of columns with standard deviations with previous column (LaTeX tables only).')
3927def write_table_args(cfg):
3928 """Translates a configuration to the respective parameter names for
3929writing a table to a file.
3931 The return value can then be passed as key-word arguments to TableData.write().
3933 Parameters
3934 ----------
3935 cfg: ConfigFile
3936 The configuration.
3938 Returns
3939 -------
3940 a: dict
3941 Dictionary with names of arguments of the `TableData.write` function
3942 and their values as supplied by `cfg`.
3943 """
3944 d = cfg.map({'table_format': 'fileFormat',
3945 'delimiter': 'fileDelimiter',
3946 'unit_style': 'fileUnitStyle',
3947 'column_numbers': 'fileColumnNumbers',
3948 'sections': 'fileSections',
3949 'align_columns': 'fileAlignColumns',
3950 'shrink_width': 'fileShrinkColumnWidth',
3951 'missing': 'fileMissing',
3952 'center_columns': 'fileCenterColumns',
3953 'latex_label_command': 'fileLaTeXLabelCommand',
3954 'latex_merge_std': 'fileLaTeXMergeStd'})
3955 if 'sections' in d:
3956 if d['sections'] != 'auto':
3957 d['sections'] = int(d['sections'])
3958 return d
3961def latex_unit(unit, unit_package=None):
3962 """Translate unit string into LaTeX code.
3964 Parameters
3965 ----------
3966 unit: str
3967 String denoting a unit.
3968 unit_package: None or 'siunitx' or 'SIunit'
3969 Translate unit string for the specified LaTeX package.
3970 If None set sub- and superscripts in text mode.
3972 Returns
3973 -------
3974 unit: str
3975 Unit string as valid LaTeX code.
3976 """
3977 si_prefixes = {'y': '\\yocto',
3978 'z': '\\zepto',
3979 'a': '\\atto',
3980 'f': '\\femto',
3981 'p': '\\pico',
3982 'n': '\\nano',
3983 'u': '\\micro',
3984 'm': '\\milli',
3985 'c': '\\centi',
3986 'd': '\\deci',
3987 'h': '\\hecto',
3988 'k': '\\kilo',
3989 'M': '\\mega',
3990 'G': '\\giga',
3991 'T': '\\tera',
3992 'P': '\\peta',
3993 'E': '\\exa',
3994 'Z': '\\zetta',
3995 'Y': '\\yotta' }
3996 si_units = {'m': '\\metre',
3997 'g': '\\gram',
3998 's': '\\second',
3999 'A': '\\ampere',
4000 'K': '\\kelvin',
4001 'mol': '\\mole',
4002 'M': '\\mole',
4003 'cd': '\\candela',
4004 'Hz': '\\hertz',
4005 'N': '\\newton',
4006 'Pa': '\\pascal',
4007 'J': '\\joule',
4008 'W': '\\watt',
4009 'C': '\\coulomb',
4010 'V': '\\volt',
4011 'F': '\\farad',
4012 'O': '\\ohm',
4013 'S': '\\siemens',
4014 'Wb': '\\weber',
4015 'T': '\\tesla',
4016 'H': '\\henry',
4017 'C': '\\celsius',
4018 'lm': '\\lumen',
4019 'lx': '\\lux',
4020 'Bq': '\\becquerel',
4021 'Gv': '\\gray',
4022 'Sv': '\\sievert'}
4023 other_units = {"'": '\\arcminute',
4024 "''": '\\arcsecond',
4025 'a': '\\are',
4026 'd': '\\dday',
4027 'eV': '\\electronvolt',
4028 'ha': '\\hectare',
4029 'h': '\\hour',
4030 'L': '\\liter',
4031 'l': '\\litre',
4032 'min': '\\minute',
4033 'Np': '\\neper',
4034 'rad': '\\rad',
4035 't': '\\ton',
4036 '%': '\\%'}
4037 unit_powers = {'^2': '\\squared',
4038 '^3': '\\cubed',
4039 '/': '\\per',
4040 '^-1': '\\power{}{-1}',
4041 '^-2': '\\rpsquared',
4042 '^-3': '\\rpcubed'}
4043 if unit_package is None:
4044 # without any unit package:
4045 units = ''
4046 k = 0
4047 while k < len(unit):
4048 if unit[k] == '^':
4049 j = k + 1
4050 while j < len(unit) and (unit[j] == '-' or unit[j].isdigit()):
4051 j += 1
4052 units = units + '$^{\\text{' + unit[k + 1:j] + '}}$'
4053 k = j
4054 elif unit[k] == '_':
4055 j = k + 1
4056 while j < len(unit) and not unit[j].isspace():
4057 j += 1
4058 units = units + '$_{\\text{' + unit[k + 1:j] + '}}$'
4059 k = j
4060 else:
4061 units = units + unit[k]
4062 k += 1
4063 elif unit_package.lower() in ['siunit', 'siunitx']:
4064 # use SIunit package:
4065 if '\\' in unit: # this string is already translated!
4066 return unit
4067 units = ''
4068 j = len(unit)
4069 while j >= 0:
4070 for k in range(-3, 0):
4071 if j+k < 0:
4072 continue
4073 uss = unit[j+k:j]
4074 if uss in unit_powers:
4075 units = unit_powers[uss] + units
4076 break
4077 elif uss in other_units:
4078 units = other_units[uss] + units
4079 break
4080 elif uss in si_units:
4081 units = si_units[uss] + units
4082 j = j+k
4083 k = 0
4084 if j - 1 >= 0:
4085 uss = unit[j - 1:j]
4086 if uss in si_prefixes:
4087 units = si_prefixes[uss] + units
4088 k = -1
4089 break
4090 else:
4091 k = -1
4092 units = unit[j+k:j] + units
4093 j = j + k
4094 if unit_package.lower() == 'siunitx':
4095 units = '\\unit{' + units + '}'
4096 else:
4097 raise ValueError(f'latex_unit(): invalid unit_package={unit_package}!')
4098 return units
4101def break_text(stream, text, maxc=80, indent=0):
4102 """Write text to stream and break lines at maximum character count.
4104 Parameters
4105 ----------
4106 stream: io
4107 Stream into which the text is written.
4108 text: str
4109 The text to be written to the stream.
4110 maxc: int
4111 Maximum character count for each line.
4112 indent: int
4113 Number of characters each line is indented.
4114 """
4115 nc = 0
4116 nw = 0
4117 stream.write(' '*indent)
4118 nc += indent
4119 for word in text.split():
4120 if nc + len(word) > maxc:
4121 stream.write('\n')
4122 nc = 0
4123 nw = 0
4124 stream.write(' '*indent)
4125 nc += indent
4126 if nw > 0:
4127 stream.write(' ')
4128 nc += 1
4129 stream.write(word)
4130 nc += len(word)
4131 nw += 1
4132 stream.write('\n')
4135def index2aa(n, a='a'):
4136 """Convert an integer into an alphabetical representation.
4138 The integer number is converted into 'a', 'b', 'c', ..., 'z',
4139 'aa', 'ab', 'ac', ..., 'az', 'ba', 'bb', ...
4141 Inspired by https://stackoverflow.com/a/37604105
4143 Parameters
4144 ----------
4145 n: int
4146 An integer to be converted into alphabetical representation.
4147 a: str ('a' or 'A')
4148 Use upper or lower case characters.
4150 Returns
4151 -------
4152 ns: str
4153 Alphabetical represtnation of an integer.
4154 """
4155 d, m = divmod(n, 26)
4156 bm = chr(ord(a)+m)
4157 return index2aa(d - 1, a) + bm if d else bm
4160def aa2index(s):
4161 """Convert an alphabetical representation to an index.
4163 The alphabetical representation 'a', 'b', 'c', ..., 'z',
4164 'aa', 'ab', 'ac', ..., 'az', 'ba', 'bb', ...
4165 is converted to an index starting with 0.
4167 Parameters
4168 ----------
4169 s: str
4170 Alphabetical representation of an index.
4172 Returns
4173 -------
4174 index: int
4175 The corresponding index.
4177 Raises
4178 ------
4179 ValueError:
4180 Invalid character in input string.
4181 """
4182 index = 0
4183 maxc = ord('z') - ord('a') + 1
4184 for c in s.lower():
4185 index *= maxc
4186 if ord(c) < ord('a') or ord(c) > ord('z'):
4187 raise ValueError('invalid character "%s" in string.' % c)
4188 index += ord(c) - ord('a') + 1
4189 return index - 1
4192class IndentStream(object):
4193 """Filter an output stream and start each newline with a number of
4194 spaces.
4195 """
4196 def __init__(self, stream, indent=4):
4197 self.stream = stream
4198 self.indent = indent
4199 self.pending = True
4201 def __getattr__(self, attr_name):
4202 return getattr(self.stream, attr_name)
4204 def write(self, data):
4205 if not data:
4206 return
4207 if self.pending:
4208 self.stream.write(' '*self.indent)
4209 self.pending = False
4210 substr = data.rstrip('\n')
4211 rn = len(data) - len(substr)
4212 if len(substr) > 0:
4213 self.stream.write(substr.replace('\n', '\n'+' '*self.indent))
4214 if rn > 0:
4215 self.stream.write('\n'*rn)
4216 self.pending = True
4218 def flush(self):
4219 self.stream.flush()
4222def main():
4223 # setup a table:
4224 df = TableData()
4225 df.append(["data", "specimen", "ID"], "", "%-s", value=list('ABCBAACB'))
4226 df.append("size", "m", "%6.2f", value=[2.34, 56.7, 8.9])
4227 df.append("full weight", "kg", "%.0f", value=122.8)
4228 df.append_section("all measures")
4229 df.append("speed", "m/s", "%.3g", value=98.7)
4230 df.append("median jitter", "mm", "%.1f", value=23)
4231 df.append("size", "g", "%.2e", value=1.234)
4232 df.set_descriptions({'ID': 'A unique identifier of a snake.',
4233 'size': 'The total length of each snake.',
4234 'full weight': 'Weight of each snake',
4235 'speed': 'Maximum speed the snake can climb a tree.',
4236 'median jitter': 'The jitter around a given path the snake should follow.',
4237 'all measures>size': 'Weight of mouse the snake has eaten before.',
4238 })
4239 df.add(np.nan, 2) # single value
4240 df.add([0.543, 45, 1.235e2]) # remaining row
4241 df.add([43.21, 6789.1, 3405, 1.235e-4], 2) # next row
4242 a = 0.5*np.arange(1, 6)*np.random.randn(5, 5) + 10.0 + np.arange(5)
4243 df.add(a.T, 1) # rest of table
4244 #df[3:6,'weight'] = [11.0]*3
4245 df.insert('median jitter', 's.d.', 'm/s', '%.3g',
4246 'Standard deviation of all speeds',
4247 value=2*np.random.rand(df.rows()))
4249 # write out in all formats:
4250 for tf in TableData.formats:
4251 print(' - `%s`: %s' % (tf, TableData.descriptions[tf]))
4252 print(' ```')
4253 iout = IndentStream(sys.stdout, 4+2)
4254 df.write(iout, table_format=tf, latex_unit_package='siunitx',
4255 latex_merge_std=True)
4256 print(' ```')
4257 print()
4259 # write descriptions:
4260 for tf in ['md', 'html', 'tex']:
4261 df.write_descriptions(table_format=tf, maxc=40)
4262 print()
4264 # aggregate demos:
4265 print(df)
4266 print(df.aggregate(np.mean, numbers_only=True))
4267 print(df.aggregate(dict(count=len, maximum=np.max), numbers_only=True))
4268 print(df.aggregate([np.mean, len, max],
4269 ['size', 'full weight', 'speed'], 'statistics',
4270 remove_nans=True, single_row=False))
4271 print(df.aggregate({('25%', '50%', '75%'):
4272 (np.quantile, ([0.25, 0.6, 0.75],))},
4273 numbers_only=True))
4275 print(df.statistics(single_row=False))
4276 print(df.statistics(single_row=True, remove_nans=True))
4277 print(df.statistics(remove_nans=True, by='ID'))
4279 # groupby demo:
4280 for name, values in df.groupby('ID'):
4281 print(name)
4282 print(values)
4283 print()
4285 # aggregrate on groups demo:
4286 print(df.aggregate(np.mean, by='ID'))
4287 print()
4289 # write descriptions:
4290 df.write_descriptions(table_format='md', section_headings=0)
4291 print()
4294if __name__ == "__main__":
4295 main()