Coverage for src / thunderlab / tabledata.py: 86%
2101 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-20 21:54 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-20 21:54 +0000
1"""Tables with hierarchical headers and units
3## Classes
5- `class TableData`: tables with hierarchical header including units
6 and column-specific formats. Kind of similar to a pandas data frame,
7 but without index column and with intuitive numpy-style indexing and
8 nicely formatted output to csv, markdown, html, and latex.
11## Helper functions
13- `write()`: shortcut for constructing and writing a TableData.
14- `latex_unit()`: translate unit string into SIunit LaTeX code.
15- `index2aa()`: convert an integer into an alphabetical representation.
16- `aa2index()`: convert an alphabetical representation to an index.
19## Configuration
21- `add_write_table_config()`: add parameter specifying how to write a table to a file as a new section to a configuration.
22- `write_table_args()`: translates a configuration to the respective parameter names for writing a table to a file.
24"""
26import sys
27import re
28import math as m
29import numpy as np
30try:
31 import pandas as pd
32 has_pandas = True
33except ImportError:
34 has_pandas = False
36from pathlib import Path
37from itertools import product
38from io import StringIO
41__pdoc__ = {}
42__pdoc__['TableData.__contains__'] = True
43__pdoc__['TableData.__len__'] = True
44__pdoc__['TableData.__iter__'] = True
45__pdoc__['TableData.__next__'] = True
46__pdoc__['TableData.__setupkey__'] = True
47__pdoc__['TableData.__call__'] = True
48__pdoc__['TableData.__getitem__'] = True
49__pdoc__['TableData.__setitem__'] = True
50__pdoc__['TableData.__delitem__'] = True
51__pdoc__['TableData.__str__'] = True
54default_missing_str = '-'
55"""Default string indicating nan data elements when outputting data."""
57default_missing_inputs = ['na', 'NA', 'nan', 'NAN', '-']
58"""Default strings that are translated to nan when loading table data."""
61class TableData(object):
62 """Table with numpy-style indexing and hierarchical header including units and formats.
64 Parameters
65 ----------
66 data: str, stream, ndarray
67 - a filename: load table from file with name `data`.
68 - a stream/file handle: load table from that stream.
69 - 1-D or 2-D ndarray of data: the data of the table.
70 Requires als a specified `header`.
71 - pandas data frame.
72 header: TableData, dict, list of str, list of list of str
73 Header labels for each column.
74 See `set_labels()' for details.
75 units: None, TableData, dict, list of str, str
76 Optional unit strings for each column.
77 See `set_units()' for details.
78 formats: None, TableData, dict, list of str, str
79 Optional format strings for each column.
80 See `set_formats()' for details.
81 descriptions: None, TableData, dict, list of str, str
82 Optional description strings for each column.
83 See `set_descriptions()' for details.
84 missing: list of str
85 Missing data are indicated by one of these strings.
86 sep: str or None
87 If `data` is a file, force `sep` as column separator.
88 stop: str or None
89 If a line matches `stop`, stop reading the file. `stop`
90 can be an empty string to stop reading at the first empty
91 line.
93 Manipulate table header
94 -----------------------
96 Each column of the table has a label (the name of the column), a
97 unit, and a format specifier. Sections group columns into a hierarchy.
99 - `__init__()`: initialize a TableData from data or a file.
100 - `append()`: append column to the table.
101 - `insert()`: insert a table column at a given position.
102 - `remove()`: remove columns from the table.
103 - `section()`: the section name of a specified column.
104 - `set_section()`: set a section name.
105 - `append_section()`: add sections to the table header.
106 - `insert_section()`: insert a section at a given position of the table header.
107 - `label()`: the name of a column.
108 - `set_label()`: set the name of a column.
109 - `set_labels()`: set the labels of some columns.
110 - `unit()`: the unit of a column.
111 - `set_unit()`: set the unit of a column.
112 - `set_units()`: set the units of some columns.
113 - `format()`: the format string of the column.
114 - `set_format()`: set the format string of a column.
115 - `set_formats()`: set the format strings of some columns.
116 - `description()`: the description of a column.
117 - `set_description()`: set the description of a column.
118 - `set_descriptions()`: set the descriptions of some columns.
120 For example:
121 ```
122 tf = TableData('data.csv')
123 ```
124 loads a table directly from a file. See `load()` for details.
125 ```
126 tf = TableData(np.random.randn(4,3), header=['aaa', 'bbb', 'ccc'], units=['m', 's', 'g'], formats='%.2f')
127 ```
128 results in
129 ``` plain
130 aaa bbb ccc
131 m s g
132 1.45 0.01 0.16
133 -0.74 -0.58 -1.34
134 -2.06 0.08 1.47
135 -0.43 0.60 1.38
136 ```
138 A more elaborate way to construct a table is:
139 ```
140 df = TableData()
141 # first column with section names and 3 data values:
142 df.append(["data", "specimen", "size"], "m", "%6.2f",
143 [2.34, 56.7, 8.9])
144 # next columns with single data values:
145 df.append("full weight", "kg", "%.0f", 122.8)
146 df.append_section("all measures")
147 df.append("speed", "m/s", "%.3g", 98.7)
148 df.append("median jitter", "mm", "%.1f", 23)
149 df.append("size", "g", "%.2e", 1.234)
150 # add a missing value to the second column:
151 df.add(np.nan, 1)
152 # fill up the remaining columns of the row:
153 df.add((0.543, 45, 1.235e2))
154 # add data to the next row starting at the second column:
155 df.add([43.21, 6789.1, 3405, 1.235e-4], 1) # next row
156 ```
157 results in
158 ``` plain
159 data
160 specimen all measures
161 size full weight speed median jitter size
162 m kg m/s mm g
163 2.34 123 98.7 23.0 1.23e+00
164 56.70 - 0.543 45.0 1.24e+02
165 8.90 43 6.79e+03 3405.0 1.23e-04
166 ```
168 Table columns
169 -------------
171 Columns can be specified by an index or by the name of a column. In
172 table headers with sections the colum can be specified by the
173 section names and the column name separated by '>'.
175 - `index()`: the column index of a column specifier.
176 - `__contains__()`: check for existence of a column.
177 - `find_col()`: find the start and end index of a column specification.
178 - `column_spec()`: full specification of a column with all its section names.
179 - `column_head()`: the name, unit, and format of a column.
180 - `table_header()`: the header of the table without content.
182 For example:
183 ```
184 df.index('all measures>size) # returns 4
185 'speed' in df # is True
186 ```
188 Iterating over columns
189 ----------------------
191 A table behaves like an ordered dictionary with column names as
192 keys and the data of each column as values.
193 Iterating over a table goes over columns.
194 Note, however, that the len() of a table is the number of rows,
195 not the number of columns!
197 - `keys()`: list of unique column keys for all available columns.
198 - `values()`: list of column data corresponding to keys().
199 - `items()`: generator over column names and the corresponding data.
200 - `__iter__()`: initialize iteration over data columns.
201 - `__next__()`: return unique column key of next column.
202 - `data`: the table data as a list over columns each containing a list of data elements.
204 For example:
205 ```
206 print('column specifications:')
207 for c in range(df.columns()):
208 print(df.column_spec(c))
209 print('iterating over column specifications:')
210 for c, k in enumerate(df):
211 print(f'{c}: {k}')
212 print('keys():')
213 for c, k in enumerate(df.keys()):
214 print(f'{c}: {k}')
215 print('values():')
216 for c, v in enumerate(df.values()):
217 print(v)
218 print('iterating over the table:')
219 for v in df:
220 print(v)
221 ```
222 results in
223 ``` plain
224 column specifications:
225 data>specimen>size
226 data>specimen>full weight
227 data>all measures>speed
228 data>all measures>median jitter
229 data>all measures>size
230 iterating over column specifications:
231 0: data>specimen>size
232 1: data>specimen>full weight
233 2: data>all measures>speed
234 3: data>all measures>median jitter
235 4: data>all measures>size
236 keys():
237 0: data>specimen>size
238 1: data>specimen>full weight
239 2: data>all measures>speed
240 3: data>all measures>median jitter
241 4: data>all measures>size
242 values():
243 [2.34, 56.7, 8.9]
244 [122.8, nan, 43.21]
245 [98.7, 0.543, 6789.1]
246 [23, 45, 3405]
247 [1.234, 123.5, 0.0001235]
248 iterating over the table:
249 [2.34, 56.7, 8.9]
250 [122.8, nan, 43.21]
251 [98.7, 0.543, 6789.1]
252 [23, 45, 3405]
253 [1.234, 123.5, 0.0001235]
254 ```
256 Accessing data
257 --------------
259 In contrast to the iterator functions the [] operator treats the
260 table as a 2D-array where the first index indicates the row and
261 the second index the column.
263 Rows are indexed by integer row numbers or boolean arrays.
264 Columns are also indexed by integer column numbers, but in
265 addition can be index by their names.
267 A single index selects rows, unless it is specified by
268 strings. Since strings can only specify column names, this selects
269 whole columns.
271 Like a numpy array the table can be sliced, and logical indexing can
272 be used to select specific parts of the table.
274 As for any function, columns can be specified as indices or strings.
276 - `rows()`: the number of rows.
277 - `columns()`: the number of columns.
278 - `__len__()`: the number of rows.
279 - `ndim`: always 2.
280 - `size`: number of elements (sum of length of all data columns), can be smaller than `columns()*rows()`.
281 - `shape`: number of rows and columns.
282 - `row()`: a single row of the table as TableData.
283 - `row_list()`: a single row of the table as list.
284 - `row_data()`: a generator for iterating over rows of the table.
285 - `row_dict()`: a single row of the table as dictionary.
286 - `col()`: a single column of the table as TableData.
287 - `__call__()`: a single column of the table as ndarray.
288 - `__getitem__()`: data elements specified by slice.
289 - `__setitem__()`: assign values to data elements specified by slice.
290 - `__delitem__()`: delete data elements or whole columns or rows.
291 - `array()`: the table data as a ndarray.
292 - `data_frame()`: the table data as a pandas DataFrame.
293 - `dicts()`: the table as a list of dictionaries.
294 - `dict()`: the table as a dictionary.
295 - `add()`: add data elements row-wise.
296 - `append_data_column()`: append data elements to a column.
297 - `set_column()`: set the column where to add data.
298 - `fill_data()`: fill up all columns with missing data.
299 - `clear_data()`: clear content of the table but keep header.
300 - `clear()`: clear the table of any content and header information.
301 - `key_value()`: a data element returned as a key-value pair.
302 - `aggregate()`: apply functions to columns.
303 - `groupby()`: iterate through unique values of columns.
305 - `sort()`: sort the table rows in place.
306 - `statistics()`: descriptive statistics of each column.
308 For example:
309 ```
310 # single column:
311 df('size') # data of 'size' column as ndarray
312 df['size'] # data of 'size' column as ndarray
313 df[:, 'size'] # data of 'size' column as ndarray
314 df.col('size') # table with the single column 'size'
316 # single row:
317 df[2, :] # table with data of only the third row
318 df.row(2) # table with data of only the third row
320 # slices:
321 df[2:5,['size','jitter']] # sub-table
322 df[2:5,['size','jitter']].array() # ndarray with data only
324 # logical indexing:
325 df[df['speed'] > 100.0, 'size'] = 0.0 # set size to 0 if speed is > 100
327 # delete:
328 del df[3:6, 'weight'] # delete rows 3-6 from column 'weight'
329 del df[3:5, :] # delete rows 3-5 completeley
330 del df[:, 'speed'] # remove column 'speed' from table
331 del df['speed'] # remove column 'speed' from table
332 df.remove('weight') # remove column 'weigth' from table
334 # sort and statistics:
335 df.sort(['weight', 'jitter'])
336 df.statistics()
337 ```
338 statistics() returns a table with standard descriptive statistics:
339 ``` plain
340 statistics data
341 - specimen all measures
342 - size full weight speed median jitter size
343 - m kg m/s mm g
344 mean 22.65 83 2.3e+03 1157.7 4.16e+01
345 std 24.23 40 3.18e+03 1589.1 5.79e+01
346 min 2.34 43 0.543 23.0 1.23e-04
347 quartile1 5.62 83 49.6 34.0 6.17e-01
348 median 8.90 123 98.7 45.0 1.23e+00
349 quartile3 32.80 - 3.44e+03 1725.0 6.24e+01
350 max 56.70 123 6.79e+03 3405.0 1.24e+02
351 count 3.00 2 3 3.0 3.00e+00
352 ```
354 Write and load tables
355 ---------------------
357 Table data can be written to a variety of text-based formats
358 including comma separated values, latex and html files. Which
359 columns are written can be controlled by the hide() and show()
360 functions. TableData can be loaded from all the written file formats
361 (except html), also directly via the constructor.
363 - `hide()`: hide a column or a range of columns.
364 - `hide_all()`: hide all columns.
365 - `hide_empty_columns()`: hide all columns that do not contain data.
366 - `show()`: show a column or a range of columns.
367 - `write()`: write table to a file or stream.
368 - `write_file_stream()`: write table to file or stream and return appropriate file name.
369 - `__str__()`: write table to a string.
370 - `write_descriptions()`: write column descriptions of the table to a file or stream.
371 - `load()`: load table from file or stream.
372 - `formats`: list of supported file formats for writing.
373 - `descriptions`: dictionary with descriptions of the supported file formats.
374 - `extensions`: dictionary with default filename extensions for each of the file formats.
375 - `ext_formats`: dictionary mapping filename extensions to file formats.
377 See documentation of the `write()` function for examples of the supported file formats.
379 """
381 formats = ['dat', 'ascii', 'csv', 'rtai', 'md', 'tex', 'html']
382 """list of strings: Supported output formats."""
384 descriptions = {'dat': 'data text file', 'ascii': 'ascii-art table',
385 'csv': 'comma separated values', 'rtai': 'rtai-style table',
386 'md': 'markdown', 'tex': 'latex tabular',
387 'html': 'html markup'}
388 """dict: Decription of output formats corresponding to `formats`."""
390 extensions = {'dat': 'dat', 'ascii': 'txt', 'csv': 'csv', 'rtai': 'dat',
391 'md': 'md', 'tex': 'tex', 'html': 'html'}
392 """dict: Default file extensions for the output `formats`. """
394 ext_formats = {'dat': 'dat', 'DAT': 'dat', 'txt': 'dat', 'TXT': 'dat',
395 'csv': 'csv', 'CSV': 'csv', 'md': 'md', 'MD': 'md',
396 'tex': 'tex', 'TEX': 'tex', 'html': 'html', 'HTML': 'html'}
397 """dict: Mapping of file extensions to the output formats."""
399 stdev_labels = ['sd', 'std', 's.d.', 'stdev', 'error']
400 """list: column labels recognized as standard deviations."""
402 def __init__(self, data=None, header=None, units=None, formats=None,
403 descriptions=None, missing=default_missing_inputs,
404 sep=None, stop=None):
405 self.clear()
406 if data is None:
407 ncols = 0
408 for stuff in [header, units, formats, descriptions]:
409 if stuff is not None:
410 if isinstance(stuff, TableData):
411 n = stuff.columns()
412 elif isinstance(stuff, (list, tuple, np.ndarray)) and not \
413 (isinstance(stuff, np.ndarray) and \
414 len(stuff.shape) == 0):
415 n = len(stuff)
416 elif not isinstance(stuff, dict):
417 n = 1
418 if n > ncols:
419 ncols = n
420 # initialize empty table:
421 if ncols > 0:
422 for c in range(ncols):
423 self.data.append([])
424 else:
425 if isinstance(data, TableData):
426 self.ndim = data.ndim
427 self.size = data.size
428 self.shape = data.shape
429 self.nsecs = data.nsecs
430 self.setcol = data.setcol
431 self.addcol = data.addcol
432 for c in range(data.columns()):
433 self.header.append(list(data.header[c]))
434 self.units.append(data.units[c])
435 self.formats.append(data.formats[c])
436 self.descriptions.append(data.descriptions[c])
437 self.hidden.append(data.hidden[c])
438 self.data.append(list(data.data[c]))
439 elif has_pandas and isinstance(data, pd.DataFrame):
440 for c, key in enumerate(data.keys()):
441 new_key = key
442 new_unit = ''
443 if '/' in key:
444 p = key.split('/')
445 new_key = p[0].strip()
446 new_unit = '/'.join(p[1:])
447 formats = '%s' if isinstance(values[0], str) else '%g'
448 values = data[key].tolist()
449 self.append(new_key, new_unit, formats, value=values)
450 elif isinstance(data, (list, tuple, np.ndarray)) and not \
451 (isinstance(data, np.ndarray) and len(data.shape) == 0):
452 if len(data) > 0 and \
453 isinstance(data[0], (list, tuple, np.ndarray)) and not \
454 (isinstance(data[0], np.ndarray) and \
455 len(data[0].shape) == 0):
456 # 2D list, rows first:
457 for c in range(len(data[0])):
458 self.data.append([])
459 for row in data:
460 for c, val in enumerate(row):
461 self.data[c].append(val)
462 elif len(data) > 0 and isinstance(data[0], dict):
463 # list of dictionaries:
464 for d in data:
465 self._add_dict(d, True)
466 self.fill_data()
467 else:
468 # 1D list:
469 for val in data:
470 self.data.append([val])
471 elif isinstance(data, dict):
472 self._add_dict(data, True)
473 self.fill_data()
474 else:
475 self.load(data, missing, sep, stop)
476 self.set_labels(header)
477 self.set_units(units)
478 self.set_formats(formats)
479 self.set_descriptions(descriptions)
480 self.hidden = [False]*len(self.data)
481 self.addcol = len(self.data)
482 self.__recompute_shape()
484 def __recompute_shape(self):
485 self.size = sum(map(len, self.data))
486 self.shape = (self.rows(), self.columns())
488 def append(self, label, unit=None, formats=None, description=None,
489 value=None, fac=None, key=None):
490 """Append column to the table.
492 Parameters
493 ----------
494 label: str or list of str
495 Optional section titles and the name of the column.
496 unit: str or None
497 The unit of the column contents.
498 formats: str or None
499 The C-style format string used for printing out the column content, e.g.
500 '%g', '%.2f', '%s', etc.
501 If None, the format is set to '%g'.
502 description: str or None
503 The description of the column contents.
504 value: None, float, int, str, etc. or list thereof, or list of dict
505 If not None, data for the column.
506 If list of dictionaries, extract from each dictionary in the list
507 the value specified by `key`. If `key` is `None` use `label` as
508 the key.
509 fac: float
510 If not None, multiply the data values by this number.
511 key: None or key of a dictionary
512 If not None and `value` is a list of dictionaries,
513 extract from each dictionary in the list the value specified
514 by `key` and assign the resulting list as data to the column.
516 Returns
517 -------
518 self: TableData
519 This TableData
520 """
521 if self.addcol >= len(self.data):
522 if isinstance(label, (list, tuple, np.ndarray)):
523 label = list(reversed(label))
524 # number of sections larger than what we have so far:
525 n = max(0, len(label) - 1 - self.nsecs)
526 # find matching sections:
527 found = False
528 for s in range(1, len(label)):
529 for c in range(len(self.header) - 1, -1, -1):
530 if len(self.header[c]) > s - n:
531 if s - n >= 0 and \
532 self.header[c][s - n] == label[s]:
533 # remove matching sections:
534 label = label[:s]
535 found = True
536 break
537 if found:
538 break
539 # add label and unique sections:
540 self.header.append(label)
541 label = label[0]
542 if n > 0:
543 # lift previous header label and sections:
544 for c in range(len(self.header) - 1):
545 self.header[c] = ['-']*n + self.header[c]
546 else:
547 self.header.append([label])
548 self.units.append(unit or '')
549 self.formats.append(formats or '%g')
550 self.descriptions.append(description or '')
551 self.hidden.append(False)
552 self.data.append([])
553 self.nsecs = max(map(len, self.header)) - 1
554 else:
555 if isinstance(label, (list, tuple, np.ndarray)):
556 self.header[self.addcol] = list(reversed(label)) + self.header[self.addcol]
557 label = label[-1]
558 else:
559 self.header[self.addcol] = [label] + self.header[self.addcol]
560 self.units[self.addcol] = unit or ''
561 self.formats[self.addcol] = formats or '%g'
562 self.descriptions[self.addcol] = description or ''
563 if self.nsecs < len(self.header[self.addcol]) - 1:
564 self.nsecs = len(self.header[self.addcol]) - 1
565 if not key:
566 key = label
567 if value is not None:
568 if isinstance(value, (list, tuple, np.ndarray)):
569 if key and len(value) > 0 and isinstance(value[0], dict):
570 value = [d[key] if key in d else float('nan') for d in value]
571 self.data[-1].extend(value)
572 else:
573 self.data[-1].append(value)
574 if fac:
575 for k in range(len(self.data[-1])):
576 self.data[-1][k] *= fac
577 self.addcol = len(self.data)
578 self.__recompute_shape()
579 return self
581 def insert(self, column, label, unit=None, formats=None, description=None,
582 value=None, fac=None, key=None):
583 """Insert a table column at a given position.
585 .. WARNING::
586 If no `value` is given, the inserted column is an empty list.
588 Parameters
589 ----------
590 columns int or str
591 Column before which to insert the new column.
592 Column can be specified by index or name,
593 see `index()` for details.
594 label: str or list of str
595 Optional section titles and the name of the column.
596 unit: str or None
597 The unit of the column contents.
598 formats: str or None
599 The C-style format string used for printing out the column content, e.g.
600 '%g', '%.2f', '%s', etc.
601 If None, the format is set to '%g'.
602 description: str or None
603 The description of the column contents.
604 value: None, float, int, str, etc. or list thereof, or list of dict
605 If not None, data for the column.
606 If list of dictionaries, extract from each dictionary in the list
607 the value specified by `key`. If `key` is `None` use `label` as
608 the key.
609 fac: float
610 If not None, multiply the data values by this number.
611 key: None or key of a dictionary
612 If not None and `value` is a list of dictionaries,
613 extract from each dictionary in the list the value specified
614 by `key` and assign the resulting list as data to the column.
616 Returns
617 -------
618 index: int
619 The index of the inserted column.
621 Raises
622 ------
623 self: TableData
624 This TableData
625 """
626 col = self.index(column)
627 if col is None:
628 raise IndexError(f'Cannot insert before non-existing column "{column}"')
629 if isinstance(label, (list, tuple, np.ndarray)):
630 self.header.insert(col, list(reversed(label)))
631 else:
632 self.header.insert(col, [label])
633 self.units.insert(col, unit or '')
634 self.formats.insert(col, formats or '%g')
635 self.descriptions.insert(col, description or '')
636 self.hidden.insert(col, False)
637 self.data.insert(col, [])
638 if self.nsecs < len(self.header[col]) - 1:
639 self.nsecs = len(self.header[col]) - 1
640 if not key:
641 key = label
642 if value is not None:
643 if isinstance(value, (list, tuple, np.ndarray)):
644 if key and len(value) > 0 and isinstance(value[0], dict):
645 value = [d[key] if key in d else float('nan') for d in value]
646 self.data[col].extend(value)
647 else:
648 self.data[col].append(value)
649 if fac:
650 for k in range(len(self.data[col])):
651 self.data[col][k] *= fac
652 self.addcol = len(self.data)
653 self.__recompute_shape()
654 return self
656 def remove(self, columns):
657 """Remove columns from the table.
659 Parameters
660 -----------
661 columns: int or str or list of int or str
662 Columns can be specified by index or name,
663 see `index()` for details.
665 Raises
666 ------
667 IndexError:
668 If an invalid column was specified.
669 """
670 # fix columns:
671 if not isinstance(columns, (list, tuple, np.ndarray)):
672 columns = [ columns ]
673 if not columns:
674 return
675 # remove:
676 for col in columns:
677 c = self.index(col)
678 if c is None:
679 if isinstance(col, (np.integer, int)):
680 col = '%d' % col
681 raise IndexError('Cannot remove non-existing column ' + col)
682 continue
683 if c+1 < len(self.header):
684 self.header[c+1].extend(self.header[c][len(self.header[c+1]):])
685 del self.header[c]
686 del self.units[c]
687 del self.formats[c]
688 del self.descriptions[c]
689 del self.hidden[c]
690 del self.data[c]
691 if self.setcol >= len(self.data):
692 self.setcol = 0
693 self.__recompute_shape()
695 def section(self, column, level):
696 """The section name of a specified column.
698 Parameters
699 ----------
700 column: None, int, or str
701 A specification of a column.
702 See self.index() for more information on how to specify a column.
703 level: int
704 The level of the section to be returned. The column label itself is level=0.
706 Returns
707 -------
708 name: str
709 The name of the section at the specified level containing
710 the column.
711 index: int
712 The column index that contains this section
713 (equal or smaller thant `column`).
715 Raises
716 ------
717 IndexError:
718 If `level` exceeds the maximum possible level.
719 """
720 if level < 0 or level > self.nsecs:
721 raise IndexError('Invalid section level')
722 column = self.index(column)
723 while len(self.header[column]) <= level:
724 column -= 1
725 return self.header[column][level], column
727 def set_section(self, column, label, level):
728 """Set a section name.
730 Parameters
731 ----------
732 column: None, int, or str
733 A specification of a column.
734 See self.index() for more information on how to specify a column.
735 label: str
736 The new name to be used for the section.
737 level: int
738 The level of the section to be set. The column label itself is level=0.
739 """
740 column = self.index(column)
741 self.header[column][level] = label
742 return column
744 def append_section(self, label):
745 """Add sections to the table header.
747 Each column of the table has a header label. Columns can be
748 grouped into sections. Sections can be nested arbitrarily.
750 Parameters
751 ----------
752 label: stri or list of str
753 The name(s) of the section(s).
755 Returns
756 -------
757 index: int
758 The column index where the section was appended.
759 """
760 if self.addcol >= len(self.data):
761 if isinstance(label, (list, tuple, np.ndarray)):
762 self.header.append(list(reversed(label)))
763 else:
764 self.header.append([label])
765 self.units.append('')
766 self.formats.append('')
767 self.descriptions.append('')
768 self.hidden.append(False)
769 self.data.append([])
770 else:
771 if isinstance(label, (list, tuple, np.ndarray)):
772 self.header[self.addcol] = list(reversed(label)) + self.header[self.addcol]
773 else:
774 self.header[self.addcol] = [label] + self.header[self.addcol]
775 if self.nsecs < len(self.header[self.addcol]):
776 self.nsecs = len(self.header[self.addcol])
777 self.addcol = len(self.data) - 1
778 self.__recompute_shape()
779 return self.addcol
781 def insert_section(self, column, section):
782 """Insert a section at a given position of the table header.
784 Parameters
785 ----------
786 columns int or str
787 Column before which to insert the new section.
788 Column can be specified by index or name,
789 see `index()` for details.
790 section: str
791 The name of the section.
793 Returns
794 -------
795 index: int
796 The index of the column where the section was inserted.
798 Raises
799 ------
800 IndexError:
801 If an invalid column was specified.
802 """
803 col = self.index(column)
804 if col is None:
805 if isinstance(column, (np.integer, int)):
806 column = '%d' % column
807 raise IndexError('Cannot insert at non-existing column ' + column)
808 self.header[col].append(section)
809 if self.nsecs < len(self.header[col]) - 1:
810 self.nsecs = len(self.header[col]) - 1
811 return col
813 def label(self, column):
814 """The name of a column.
816 Parameters
817 ----------
818 column: None, int, or str
819 A specification of a column.
820 See self.index() for more information on how to specify a column.
822 Returns
823 -------
824 self: TableData
825 This TableData
826 """
827 column = self.index(column)
828 return self.header[column][0]
830 def set_label(self, column, label):
831 """Set the name of a column.
833 Parameters
834 ----------
835 column: None, int, or str
836 A specification of a column.
837 See self.index() for more information on how to specify a column.
838 label: str
839 The new name to be used for the column.
841 Returns
842 -------
843 self: TableData
844 This TableData
845 """
846 column = self.index(column)
847 self.header[column][0] = label
848 return self
850 def set_labels(self, labels):
851 """Set the labels of some columns.
853 Parameters
854 ----------
855 labels: TableData, dict, list of str, list of list of str, None
856 The new labels to be used.
857 If TableData, take the labels of the respective column indices.
858 If dict, keys are column labels (see self.index() for more
859 information on how to specify a column), and values are
860 the new labels for the respective columns as str or list of str.
861 If list of str or list of list of str,
862 set labels of the first successive columns to the list elements.
863 If `None`, do nothing.
865 Returns
866 -------
867 self: TableData
868 This TableData
869 """
870 while len(self.header) < len(self.data):
871 self.header.append([f'C{len(self.header) + 1}'])
872 if isinstance(labels, TableData):
873 for c in range(min(self.columns(), labels.columns())):
874 self.header[c] = labels.header[c]
875 elif isinstance(labels, dict):
876 for c in labels:
877 i = self.index(c)
878 if i is None:
879 continue
880 l = labels[c]
881 if isinstance(l, (list, tuple)):
882 self.header[i] = l
883 else:
884 self.header[i] = [l]
885 elif isinstance(labels, (list, tuple, np.ndarray)) and not \
886 (isinstance(labels, np.ndarray) and len(labels.shape) == 0):
887 for c, l in enumerate(labels):
888 if isinstance(l, (list, tuple)):
889 self.header[c] = l
890 else:
891 self.header[c] = [l]
892 return self
894 def unit(self, column):
895 """The unit of a column.
897 Parameters
898 ----------
899 column: None, int, or str
900 A specification of a column.
901 See self.index() for more information on how to specify a column.
903 Returns
904 -------
905 unit: str
906 The unit.
907 """
908 column = self.index(column)
909 return self.units[column]
911 def set_unit(self, column, unit):
912 """Set the unit of a column.
914 Parameters
915 ----------
916 column: None, int, or str
917 A specification of a column.
918 See self.index() for more information on how to specify a column.
919 unit: str
920 The new unit to be used for the column.
922 Returns
923 -------
924 self: TableData
925 This TableData
926 """
927 column = self.index(column)
928 self.units[column] = unit
929 return self
931 def set_units(self, units):
932 """Set the units of some columns.
934 Parameters
935 ----------
936 units: TableData, dict, list of str, str, None
937 The new units to be used.
938 If TableData, take the units of matching column labels.
939 If dict, keys are column labels (see self.index() for more
940 information on how to specify a column), and values are
941 units for the respective columns as str.
942 If list of str, set units of the first successive columns to
943 the list elements.
944 If `None`, do nothing.
945 Otherwise, set units of all columns to `units`.
947 Returns
948 -------
949 self: TableData
950 This TableData
951 """
952 while len(self.units) < len(self.data):
953 self.units.append('')
954 if isinstance(units, TableData):
955 for c in units:
956 i = self.index(c)
957 if i is None:
958 continue
959 self.units[i] = units.unit(c)
960 elif isinstance(units, dict):
961 for c in units:
962 i = self.index(c)
963 if i is None:
964 continue
965 self.units[i] = units[c]
966 elif isinstance(units, (list, tuple, np.ndarray)) and not \
967 (isinstance(units, np.ndarray) and len(units.shape) == 0):
968 for c, u in enumerate(units):
969 self.units[c] = u
970 elif units is not None:
971 for c in range(len(self.units)):
972 self.units[c] = units
973 return self
975 def format(self, column):
976 """The format string of the column.
978 Parameters
979 ----------
980 column: None, int, or str
981 A specification of a column.
982 See self.index() for more information on how to specify a column.
984 Returns
985 -------
986 format: str
987 The format string.
988 """
989 column = self.index(column)
990 return self.formats[column]
992 def set_format(self, column, format):
993 """Set the format string of a column.
995 Parameters
996 ----------
997 column: None, int, or str
998 A specification of a column.
999 See self.index() for more information on how to specify a column.
1000 format: str
1001 The new format string to be used for the column.
1003 Returns
1004 -------
1005 self: TableData
1006 This TableData
1007 """
1008 column = self.index(column)
1009 self.formats[column] = format
1010 return self
1012 def set_formats(self, formats):
1013 """Set the format strings of all columns.
1015 Parameters
1016 ----------
1017 formats: TableData, dict, list of str, str, None
1018 The new format strings to be used.
1019 If TableData, take the format strings of matching column labels.
1020 If dict, keys are column labels (see self.index() for more
1021 information on how to specify a column), and values are
1022 format strings for the respective columns as str.
1023 If list of str, set format strings of the first successive
1024 columns to the list elements.
1025 If `None`, do nothing.
1026 Otherwise, set format strings of all columns to `formats`.
1028 Returns
1029 -------
1030 self: TableData
1031 This TableData
1032 """
1033 while len(self.formats) < len(self.data):
1034 self.formats.append('%g')
1035 if isinstance(formats, TableData):
1036 for c in formats:
1037 i = self.index(c)
1038 if i is None:
1039 continue
1040 self.formats[i] = formats.format(c)
1041 elif isinstance(formats, dict):
1042 for c in formats:
1043 i = self.index(c)
1044 if i is None:
1045 continue
1046 self.formats[i] = formats[c] or '%g'
1047 elif isinstance(formats, (list, tuple, np.ndarray)) and not \
1048 (isinstance(formats, np.ndarray) and len(formats.shape) == 0):
1049 for c, f in enumerate(formats):
1050 self.formats[c] = f or '%g'
1051 elif formats is not None:
1052 for c in range(len(self.formats)):
1053 self.formats[c] = formats or '%g'
1054 return self
1056 def description(self, column):
1057 """The description of a column.
1059 Parameters
1060 ----------
1061 column: None, int, or str
1062 A specification of a column.
1063 See self.index() for more information on how to specify a column.
1065 Returns
1066 -------
1067 description: str
1068 The description.
1069 """
1070 column = self.index(column)
1071 return self.descriptions[column]
1073 def set_description(self, column, description):
1074 """Set the description of a column.
1076 Parameters
1077 ----------
1078 column: None, int, or str
1079 A specification of a column.
1080 See self.index() for more information on how to specify a column.
1081 description: str
1082 The new description to be used for the column.
1084 Returns
1085 -------
1086 self: TableData
1087 This TableData
1088 """
1089 column = self.index(column)
1090 self.descriptions[column] = description
1091 return self
1093 def set_descriptions(self, descriptions):
1094 """Set the descriptions of some columns.
1096 Parameters
1097 ----------
1098 descriptions: TableData, dict, list of str, str, None
1099 The new descriptions to be used.
1100 If TableData, take the descriptions of matching column labels.
1101 If dict, keys are column labels (see self.index() for more
1102 information on how to specify a column), and values are
1103 descriptions for the respective columns as str.
1104 If list of str, set descriptions of the first successive columns to
1105 the list elements.
1106 If `None`, do nothing.
1108 Returns
1109 -------
1110 self: TableData
1111 This TableData
1112 """
1113 while len(self.descriptions) < len(self.data):
1114 self.descriptions.append('')
1115 if isinstance(descriptions, TableData):
1116 for c in descriptions:
1117 i = self.index(c)
1118 if i is None:
1119 continue
1120 self.descriptions[i] = descriptions.description(c)
1121 elif isinstance(descriptions, dict):
1122 for c in descriptions:
1123 i = self.index(c)
1124 if i is None:
1125 continue
1126 self.descriptions[i] = descriptions[c]
1127 elif isinstance(descriptions, (list, tuple, np.ndarray)) and not \
1128 (isinstance(descriptions, np.ndarray) and len(descriptions.shape) == 0):
1129 for c, d in enumerate(descriptions):
1130 self.descriptions[c] = d
1131 return self
1133 def table_header(self):
1134 """The header of the table without content.
1136 Returns
1137 -------
1138 data: TableData
1139 A TableData object with the same header but empty data.
1140 """
1141 data = TableData()
1142 sec_indices = [-1] * self.nsecs
1143 for c in range(self.columns()):
1144 data.append(*self.column_head(c))
1145 for l in range(self.nsecs):
1146 s, i = self.section(c, l+1)
1147 if i != sec_indices[l]:
1148 data.header[-1].append(s)
1149 sec_indices[l] = i
1150 data.nsecs = self.nsecs
1151 return data
1153 def column_head(self, column, secs=False):
1154 """The name, unit, format, and description of a column.
1156 Parameters
1157 ----------
1158 column: None, int, or str
1159 A specification of a column.
1160 See self.index() for more information on how to specify a column.
1161 secs: bool
1162 If True, return all section names in addition to the column label.
1164 Returns
1165 -------
1166 name: str or list of str
1167 The column label or the label with all its sections.
1168 unit: str
1169 The unit.
1170 format: str
1171 The format string.
1172 description: str
1173 The description of the data column.
1174 """
1175 column = self.index(column)
1176 if secs:
1177 header = self.header[column]
1178 c = column - 1
1179 while len(header) < self.nsecs + 1 and c >= 0:
1180 if len(self.header[c]) > len(header):
1181 header.extend(self.header[c][len(header):])
1182 c -= 1
1183 return list(reversed(header)), self.units[column], self.formats[column], self.descriptions[column]
1184 else:
1185 return self.header[column][0], self.units[column], self.formats[column], self.descriptions[column]
1187 def column_spec(self, column):
1188 """Full specification of a column with all its section names.
1190 Parameters
1191 ----------
1192 column: int or str
1193 Specifies the column.
1194 See self.index() for more information on how to specify a column.
1196 Returns
1197 -------
1198 s: str
1199 Full specification of the column by all its section names and its header label.
1200 """
1201 c = self.index(column)
1202 fh = [self.header[c][0]]
1203 for l in range(self.nsecs):
1204 fh.append(self.section(c, l+1)[0])
1205 return '>'.join(reversed(fh))
1207 def find_col(self, column):
1208 """Find the start and end index of a column specification.
1210 Parameters
1211 ----------
1212 column: None, int, or str
1213 A specification of a column.
1214 See self.index() for more information on how to specify a column.
1216 Returns
1217 -------
1218 c0: int or None
1219 A valid column index or None that is specified by `column`.
1220 c1: int or None
1221 A valid column index or None of the column following the range specified
1222 by `column`.
1223 """
1225 def find_column_indices(ss, si, minns, maxns, c0, strict=True):
1226 if si >= len(ss):
1227 return None, None, None, None
1228 ns0 = 0
1229 for ns in range(minns, maxns+1):
1230 nsec = maxns - ns
1231 if ss[si] == '':
1232 si += 1
1233 continue
1234 for c in range(c0, len(self.header)):
1235 if nsec < len(self.header[c]) and \
1236 ((strict and self.header[c][nsec] == ss[si]) or
1237 (not strict and ss[si] in self.header[c][nsec])):
1238 ns0 = ns
1239 c0 = c
1240 si += 1
1241 if si >= len(ss):
1242 c1 = len(self.header)
1243 for c in range(c0+1, len(self.header)):
1244 if nsec < len(self.header[c]):
1245 c1 = c
1246 break
1247 return c0, c1, ns0, None
1248 elif nsec > 0:
1249 break
1250 return None, c0, ns0, si
1252 if column is None:
1253 return None, None
1254 if not isinstance(column, (np.integer, int)) and column.isdigit():
1255 column = int(column)
1256 if isinstance(column, (np.integer, int)):
1257 if column >= 0 and column < len(self.header):
1258 return column, column + 1
1259 else:
1260 return None, None
1261 # find column by header:
1262 ss = column.rstrip('>').split('>')
1263 maxns = self.nsecs
1264 si0 = 0
1265 while si0 < len(ss) and ss[si0] == '':
1266 maxns -= 1
1267 si0 += 1
1268 if maxns < 0:
1269 maxns = 0
1270 c0, c1, ns, si = find_column_indices(ss, si0, 0, maxns, 0, True)
1271 if c0 is None and c1 is not None:
1272 c0, c1, ns, si = find_column_indices(ss, si, ns, maxns, c1, False)
1273 return c0, c1
1275 def index(self, column):
1276 """The column index of a column specifier.
1278 Parameters
1279 ----------
1280 column: None, int, or str
1281 A specification of a column.
1282 - None: no column is specified
1283 - int: the index of the column (first column is zero), e.g. `index(2)`.
1284 - a string representing an integer is converted into the column index,
1285 e.g. `index('2')`
1286 - a string specifying a column by its header.
1287 Header names of descending hierarchy are separated by '>'.
1289 Returns
1290 -------
1291 index: int or None
1292 A valid column index or None.
1293 """
1294 c0, c1 = self.find_col(column)
1295 return c0
1297 def __contains__(self, column):
1298 """Check for existence of a column.
1300 Parameters
1301 ----------
1302 column: None, int, or str
1303 The column to be checked.
1304 See self.index() for more information on how to specify a column.
1306 Returns
1307 -------
1308 contains: bool
1309 True if `column` specifies an existing column key.
1310 """
1311 return self.index(column) is not None
1313 def keys(self):
1314 """List of unique column keys for all available columns.
1316 Returns
1317 -------
1318 keys: list of str
1319 List of unique column specifications.
1320 """
1321 return [self.column_spec(c) for c in range(self.columns())]
1323 def values(self):
1324 """List of column data corresponding to keys(). Same as `self.data`.
1326 Returns
1327 -------
1328 data: list of list of values
1329 The data of the table. First index is columns!
1330 """
1331 return self.data
1333 def items(self):
1334 """Generator over column names and corresponding data.
1336 Yields
1337 ------
1338 item: tuple
1339 Unique column specifications and the corresponding data.
1340 """
1341 for c in range(self.columns()):
1342 yield self.column_spec(c), self.data[c]
1344 def __len__(self):
1345 """The number of rows.
1347 Returns
1348 -------
1349 rows: int
1350 The number of rows contained in the table.
1351 """
1352 return self.rows()
1354 def __iter__(self):
1355 """Initialize iteration over data columns.
1356 """
1357 self.iter_counter = -1
1358 return self
1360 def __next__(self):
1361 """Next unique column key.
1363 Returns
1364 -------
1365 s: str
1366 Full specification of the column by all its section names and its header label.
1367 """
1368 self.iter_counter += 1
1369 if self.iter_counter >= self.columns():
1370 raise StopIteration
1371 else:
1372 return self.column_spec(self.iter_counter)
1374 def rows(self):
1375 """The number of rows.
1377 Returns
1378 -------
1379 rows: int
1380 The number of rows contained in the table.
1381 """
1382 return max(map(len, self.data)) if self.data else 0
1384 def columns(self):
1385 """The number of columns.
1387 Returns
1388 -------
1389 columns: int
1390 The number of columns contained in the table.
1391 """
1392 return len(self.header)
1394 def row(self, index):
1395 """A single row of the table as TableData.
1397 Parameters
1398 ----------
1399 index: int
1400 The index of the row to be returned.
1402 Returns
1403 -------
1404 data: TableData
1405 A TableData object with a single row.
1406 """
1407 data = TableData()
1408 sec_indices = [-1] * self.nsecs
1409 for c in range(self.columns()):
1410 data.append(*self.column_head(c))
1411 for l in range(self.nsecs):
1412 s, i = self.section(c, l+1)
1413 if i != sec_indices[l]:
1414 data.header[-1].append(s)
1415 sec_indices[l] = i
1416 data.data[-1] = [self.data[c][index]]
1417 data.nsecs = self.nsecs
1418 return data
1420 def row_list(self, index):
1421 """A single row of the table as list.
1423 Parameters
1424 ----------
1425 index: int
1426 The index of the row to be returned.
1428 Returns
1429 -------
1430 data: list
1431 A list with data values of each column of row `index`.
1432 """
1433 data = []
1434 for c in range(self.columns()):
1435 data.append(self.data[c][index])
1436 return data
1438 def row_data(self):
1439 """A generator for iterating over rows of the table.
1441 Yields
1442 ------
1443 data: list
1444 A list with data values of each column.
1445 """
1446 for r in range(self.rows()):
1447 yield self.row_list(r)
1449 def row_dict(self, index):
1450 """A single row of the table as dictionary.
1452 Parameters
1453 ----------
1454 index: int
1455 The index of the row to be returned.
1457 Returns
1458 -------
1459 data: dict
1460 A dictionary with column header as key and corresponding data value of row `index`
1461 as value.
1462 """
1463 data = {}
1464 for c in range(self.columns()):
1465 data[self.column_spec(c)] = self.data[c][index]
1466 return data
1468 def column(self, col):
1469 """A single column of the table.
1471 Parameters
1472 ----------
1473 col: None, int, or str
1474 The column to be returned.
1475 See self.index() for more information on how to specify a column.
1477 Returns
1478 -------
1479 table: TableData
1480 A TableData object with a single column.
1481 """
1482 data = TableData()
1483 c = self.index(col)
1484 data.append(*self.column_head(c))
1485 data.data = [self.data[c]]
1486 data.nsecs = 0
1487 return data
1489 def __call__(self, column):
1490 """A single column of the table as a ndarray.
1492 Parameters
1493 ----------
1494 column: None, int, or str
1495 The column to be returned.
1496 See self.index() for more information on how to specify a column.
1498 Returns
1499 -------
1500 data: 1-D ndarray
1501 Content of the specified column as a ndarray.
1502 """
1503 c = self.index(column)
1504 return np.asarray(self.data[c])
1506 def __setupkey(self, key):
1507 """Helper function that turns a key into row and column indices.
1509 Returns
1510 -------
1511 rows: list of int, slice, None
1512 Indices of selected rows.
1513 cols: list of int
1514 Indices of selected columns.
1516 Raises
1517 ------
1518 IndexError:
1519 If an invalid column was specified.
1520 """
1521 if type(key) is not tuple:
1522 if isinstance(key, str):
1523 cols = key
1524 rows = slice(0, self.rows(), 1)
1525 elif isinstance(key, slice) and isinstance(key.start, str) and isinstance(key.stop, str):
1526 cols = key
1527 rows = slice(0, self.rows(), 1)
1528 else:
1529 rows = key
1530 cols = range(self.columns())
1531 else:
1532 rows = key[0]
1533 cols = key[1]
1534 if isinstance(cols, slice):
1535 start = cols.start
1536 if start is not None:
1537 start = self.index(start)
1538 if start is None:
1539 raise IndexError('"%s" is not a valid column index' % cols.start)
1540 stop = cols.stop
1541 if stop is not None:
1542 stop_str = isinstance(stop, str)
1543 stop = self.index(stop)
1544 if stop is None:
1545 raise IndexError('"%s" is not a valid column index' % cols.stop)
1546 if stop_str:
1547 stop += 1
1548 cols = slice(start, stop, cols.step)
1549 cols = range(self.columns())[cols]
1550 else:
1551 if not isinstance(cols, (list, tuple, np.ndarray, range)):
1552 cols = [cols]
1553 c = [self.index(inx) for inx in cols]
1554 if None in c:
1555 raise IndexError('"%s" is not a valid column index' % cols[c.index(None)])
1556 cols = c
1557 if isinstance(rows, np.ndarray) and rows.dtype == np.dtype(bool):
1558 rows = np.where(rows)[0]
1559 if len(rows) == 0:
1560 rows = None
1561 return rows, cols
1563 def __getitem__(self, key):
1564 """Data elements specified by slice.
1566 Parameters
1567 -----------
1568 key:
1569 First key specifies row, (optional) second key the column.
1570 Columns can be specified by index or name,
1571 see `index()` for details.
1572 A single key of strings selects columns by their names: `td[:, 'col'] == td['col']`
1573 If a stop column is specified by name,
1574 it is inclusively!
1576 Returns
1577 -------
1578 data:
1579 - A single data value if a single row and a single column is specified.
1580 - A ndarray of data elements if a single column is specified.
1581 - A TableData object for multiple columns.
1582 - None if no row is selected (e.g. by a logical index that nowhere is True)
1584 Raises
1585 ------
1586 IndexError:
1587 If an invalid column was specified.
1588 """
1589 rows, cols = self.__setupkey(key)
1590 if len(cols) == 1:
1591 if cols[0] >= self.columns():
1592 return None
1593 if rows is None:
1594 return None
1595 elif isinstance(rows, slice):
1596 return np.asarray(self.data[cols[0]][rows])
1597 elif isinstance(rows, (list, tuple, np.ndarray)):
1598 return np.asarray([self.data[cols[0]][r] for r in rows if r < len(self.data[cols[0]])])
1599 elif rows < len(self.data[cols[0]]):
1600 return self.data[cols[0]][rows]
1601 else:
1602 return None
1603 else:
1604 data = TableData()
1605 sec_indices = [-1] * self.nsecs
1606 for c in cols:
1607 data.append(*self.column_head(c, secs=True))
1608 if rows is None:
1609 continue
1610 if isinstance(rows, (list, tuple, np.ndarray)):
1611 for r in rows:
1612 data.data[-1].append(self.data[c][r])
1613 else:
1614 try:
1615 if isinstance(self.data[c][rows],
1616 (list, tuple, np.ndarray)):
1617 data.data[-1].extend(self.data[c][rows])
1618 else:
1619 data.data[-1].append(self.data[c][rows])
1620 except IndexError:
1621 data.data[-1].append(np.nan)
1622 data.nsecs = self.nsecs
1623 return data
1625 def __setitem__(self, key, value):
1626 """Assign values to data elements specified by slice.
1628 Parameters
1629 -----------
1630 key:
1631 First key specifies row, (optional) second one the column.
1632 Columns can be specified by index or name,
1633 see `index()` for details.
1634 A single key of strings selects columns by their names: `td[:, 'col'] == td['col']`
1635 If a stop column is specified by name,
1636 it is inclusively!
1637 value: TableData, list, ndarray, float, ...
1638 Value(s) used to assing to the table elements as specified by `key`.
1640 Raises
1641 ------
1642 IndexError:
1643 If an invalid column was specified.
1644 """
1645 rows, cols = self.__setupkey(key)
1646 if rows is None:
1647 return
1648 if isinstance(value, TableData):
1649 if isinstance(self.data[cols[0]][rows], (list, tuple, np.ndarray)):
1650 for k, c in enumerate(cols):
1651 self.data[c][rows] = value.data[k]
1652 else:
1653 for k, c in enumerate(cols):
1654 self.data[c][rows] = value.data[k][0]
1655 else:
1656 if len(cols) == 1:
1657 if isinstance(rows, (list, tuple, np.ndarray)):
1658 if len(rows) == 1:
1659 self.data[cols[0]][rows[0]] = value
1660 elif isinstance(value, (list, tuple, np.ndarray)):
1661 for k, r in enumerate(rows):
1662 self.data[cols[0]][r] = value[k]
1663 else:
1664 for r in rows:
1665 self.data[cols[0]][r] = value
1666 elif isinstance(value, (list, tuple, np.ndarray)):
1667 self.data[cols[0]][rows] = value
1668 elif isinstance(rows, (np.integer, int)):
1669 self.data[cols[0]][rows] = value
1670 else:
1671 n = len(self.data[cols[0]][rows])
1672 if n > 1:
1673 self.data[cols[0]][rows] = [value]*n
1674 else:
1675 self.data[cols[0]][rows] = value
1676 else:
1677 if isinstance(self.data[0][rows], (list, tuple, np.ndarray)):
1678 for k, c in enumerate(cols):
1679 self.data[c][rows] = value[:,k]
1680 elif isinstance(value, (list, tuple, np.ndarray)):
1681 for k, c in enumerate(cols):
1682 self.data[c][rows] = value[k]
1683 else:
1684 for k, c in enumerate(cols):
1685 self.data[c][rows] = value
1687 def __delitem__(self, key):
1688 """Delete data elements or whole columns or rows.
1690 Parameters
1691 -----------
1692 key:
1693 First key specifies row, (optional) second one the column.
1694 Columns can be specified by index or name,
1695 see `index()` for details.
1696 A single key of strings selects columns by their names: `td[:, 'col'] == td['col']`
1697 If a stop column is specified by name,
1698 it is inclusively!
1699 If all rows are selected, then the specified columns are removed from the table.
1700 Otherwise only data values are removed.
1701 If all columns are selected than entire rows of data values are removed.
1702 Otherwise only data values in the specified rows are removed.
1704 Raises
1705 ------
1706 IndexError:
1707 If an invalid column was specified.
1708 """
1709 rows, cols = self.__setupkey(key)
1710 if rows is None:
1711 return
1712 row_indices = np.arange(self.rows(), dtype=int)[rows]
1713 if isinstance(row_indices, np.ndarray):
1714 if len(row_indices) == self.rows():
1715 # delete whole columns:
1716 self.remove(cols)
1717 elif len(row_indices) > 0:
1718 for r in reversed(sorted(row_indices)):
1719 for c in cols:
1720 if r < len(self.data[c]):
1721 del self.data[c][r]
1722 self.__recompute_shape()
1723 else:
1724 for c in cols:
1725 del self.data[c][row_indices]
1726 self.__recompute_shape()
1728 def array(self, row=None):
1729 """The table data as a ndarray.
1731 Parameters
1732 ----------
1733 row: int or None
1734 If specified, a 1D ndarray of that row will be returned.
1736 Returns
1737 -------
1738 data: 2D or 1D ndarray
1739 If no row is specified, the data content of the entire table
1740 as a 2D ndarray (rows first).
1741 If a row is specified, a 1D ndarray of that row.
1742 """
1743 if row is None:
1744 return np.array(self.data).T
1745 else:
1746 return np.array([d[row] for d in self.data])
1748 def data_frame(self):
1749 """The table data as a pandas DataFrame.
1751 Returns
1752 -------
1753 data: pandas.DataFrame
1754 A pandas DataFrame of the whole table.
1755 """
1756 return pd.DataFrame(self.dict())
1758 def dicts(self, raw_values=True, missing=default_missing_str):
1759 """The table as a list of dictionaries.
1761 Parameters
1762 ----------
1763 raw_values: bool
1764 If True, use raw table values as values,
1765 else format the values and add unit string.
1766 missing: str
1767 String indicating non-existing data elements.
1769 Returns
1770 -------
1771 table: list of dict
1772 For each row of the table a dictionary with header as key.
1773 """
1774 table = []
1775 for row in range(self.rows()):
1776 data = {}
1777 for col in range(len(self.header)):
1778 if raw_values:
1779 v = self.data[col][row];
1780 else:
1781 if isinstance(self.data[col][row], (float, np.floating)) and m.isnan(self.data[col][row]):
1782 v = missing
1783 else:
1784 u = ''
1785 if not self.units[col] in '1-' and self.units[col] != 'a.u.':
1786 u = self.units[col]
1787 v = (self.formats[col] % self.data[col][row]) + u
1788 data[self.header[col][0]] = v
1789 table.append(data)
1790 return table
1792 def dict(self):
1793 """The table as a dictionary.
1795 Returns
1796 -------
1797 table: dict
1798 A dictionary with keys being the column headers and
1799 values the list of data elements of the corresponding column.
1800 """
1801 table = {k: v for k, v in self.items()}
1802 return table
1804 def _add_table_data(self, data, add_all):
1805 """Add data of a TableData.
1807 Parameters
1808 ----------
1809 data: TableData
1810 Table with the data to be added.
1811 add_all: bool
1812 If False, then only data of columns that already exist in
1813 the table are added to the table. If the table is empty or
1814 `add_all` is set to `True` then all data is added and if
1815 necessary new columns are appended to the table.
1816 """
1817 empty = False
1818 if self.shape[1] == 0:
1819 add_all = True
1820 empty = True
1821 maxr = self.rows()
1822 for k in data.keys():
1823 col = self.index(k)
1824 if empty or col is None:
1825 if not add_all:
1826 continue
1827 self.append(*data.column_head(k, secs=True),
1828 value=[np.nan]*maxr)
1829 col = len(self.data) - 1
1830 c = data.index(k)
1831 self.data[col].extend(data.data[c])
1832 self.__recompute_shape()
1834 def _add_dict(self, data, add_all):
1835 """Add data of a TableData.
1837 Parameters
1838 ----------
1839 data: dict
1840 Keys are column labels and values are single values or
1841 lists of values to be added to the corresponding table columns.
1842 add_all: bool
1843 If False, then only data of columns that already exist in
1844 the table are added to the table. If the table is empty or
1845 `add_all` is set to `True` then all data is added and if
1846 necessary new columns are appended to the table.
1848 """
1849 empty = False
1850 if self.shape[1] == 0:
1851 add_all = True
1852 empty = True
1853 maxr = self.rows()
1854 for key in data:
1855 new_key = key
1856 new_unit = ''
1857 if '/' in key:
1858 p = key.split('/')
1859 new_key = p[0].strip()
1860 new_unit = '/'.join(p[1:])
1861 col = self.index(new_key)
1862 if empty or col is None:
1863 if not add_all:
1864 continue
1865 self.append(new_key, new_unit,
1866 value=[np.nan]*maxr)
1867 col = len(self.data) - 1
1868 if isinstance(data[key], (list, tuple, np.ndarray)):
1869 self.data[col].extend(data[key])
1870 else:
1871 self.data[col].append(data[key])
1872 self.__recompute_shape()
1874 def add(self, data, column=None, add_all=False):
1875 """Add data elements to successive columns.
1877 The current column is set behid the added columns.
1879 Parameters
1880 ----------
1881 data: float, int, str, etc. or list thereof or list of list thereof or dict or list of dict or TableData
1882 Data values to be appended to successive columns:
1883 - A single value is simply appended to the specified
1884 column of the table.
1885 - A 1D-list of values is appended to successive columns of the table
1886 starting with the specified column.
1887 - The columns (second index) of a 2D-list of values are
1888 appended to successive columns of the table starting
1889 with the specified column.
1890 - Values of a dictionary or of a list of dictionaries are
1891 added to the columns specified by the keys. Dictionary
1892 values can also be lists of values. Their values are
1893 added to successive rows of the columns specified by the
1894 dictionary keys. Does not affect the current column.
1895 - All elements of a TableData are added to matching columns.
1896 Does not affect the current column.
1897 column: None, int, or str
1898 The first column to which the data should be appended,
1899 if `data` does not specify columns.
1900 If None, append to the current column.
1901 See self.index() for more information on how to specify a column.
1902 add_all: bool
1903 If the data are given as dictionaries or TableData, then
1904 only data of columns that already exist in the table are
1905 added to the table. If the table is empty or `add_all` is
1906 set to `True` then all data is added and if necessary new
1907 columns are appended to the table.
1908 """
1909 if self.shape[1] == 0:
1910 add_all = True
1911 column = self.index(column)
1912 if column is None:
1913 column = self.setcol
1914 if isinstance(data, TableData):
1915 self._add_table_data(data, add_all)
1916 elif isinstance(data, (list, tuple, np.ndarray)) and not \
1917 (isinstance(data, np.ndarray) and len(data.shape) == 0):
1918 if len(data) > 0 and \
1919 isinstance(data[0], (list, tuple, np.ndarray)) and not \
1920 (isinstance(data[0], np.ndarray) and len(data[0].shape) == 0):
1921 # 2D list, rows first:
1922 for row in data:
1923 for i, val in enumerate(row):
1924 self.data[column + i].append(val)
1925 self.setcol = column + len(data[0])
1926 elif len(data) > 0 and isinstance(data[0], dict):
1927 # list of dictionaries:
1928 for d in data:
1929 self._add_dict(d, add_all)
1930 else:
1931 # 1D list:
1932 for val in data:
1933 self.data[column].append(val)
1934 column += 1
1935 self.setcol = column
1936 elif isinstance(data, dict):
1937 # dictionary with values:
1938 self._add_dict(data, add_all)
1939 else:
1940 # single value:
1941 self.data[column].append(data)
1942 self.setcol = column + 1
1943 if self.setcol >= len(self.data):
1944 self.setcol = 0
1945 self.__recompute_shape()
1947 def append_data_column(self, data, column=None):
1948 """Append data elements to a column.
1950 The current column is incremented by one.
1952 Parameters
1953 ----------
1954 data: float, int, str, etc. or list thereof
1955 Data values to be appended to a column.
1956 column: None, int, or str
1957 The column to which the data should be appended.
1958 If None, append to the current column.
1959 See self.index() for more information on how to specify a column.
1960 """
1961 column = self.index(column)
1962 if column is None:
1963 column = self.setcol
1964 if isinstance(data, (list, tuple, np.ndarray)):
1965 self.data[column].extend(data)
1966 column += 1
1967 self.setcol = column
1968 else:
1969 self.data[column].append(data)
1970 self.setcol = column+1
1971 if self.setcol >= len(self.data):
1972 self.setcol = 0
1973 self.__recompute_shape()
1975 def set_column(self, column):
1976 """Set the column where to add data.
1978 Parameters
1979 ----------
1980 column: int or str
1981 The column to which data elements should be appended.
1982 See self.index() for more information on how to specify a column.
1984 Raises
1985 ------
1986 IndexError:
1987 If an invalid column was specified.
1988 """
1989 col = self.index(column)
1990 if col is None:
1991 if isinstance(column, (np.integer, int)):
1992 column = '%d' % column
1993 raise IndexError('column ' + column + ' not found or invalid')
1994 self.setcol = col
1995 return col
1997 def fill_data(self):
1998 """Fill up all columns with missing data to have the same number of
1999 data elements.
2000 """
2001 # maximum rows:
2002 maxr = self.rows()
2003 # fill up:
2004 for c in range(len(self.data)):
2005 while len(self.data[c]) < maxr:
2006 self.data[c].append(np.nan)
2007 self.setcol = 0
2008 self.__recompute_shape()
2010 def clear_data(self):
2011 """Clear content of the table but keep header.
2012 """
2013 for c in range(len(self.data)):
2014 self.data[c] = []
2015 self.setcol = 0
2016 self.__recompute_shape()
2018 def clear(self):
2019 """Clear the table of any content and header information.
2020 """
2021 self.ndim = 2
2022 self.size = 0
2023 self.shape = (0, 0)
2024 self.nsecs = 0
2025 self.header = []
2026 self.units = []
2027 self.formats = []
2028 self.descriptions = []
2029 self.data = []
2030 self.hidden = []
2031 self.setcol = 0
2032 self.addcol = 0
2034 def sort(self, columns, reverse=False):
2035 """Sort the table rows in place.
2037 Parameters
2038 ----------
2039 columns: int or str or list of int or str
2040 A column specifier or a list of column specifiers of the columns
2041 to be sorted.
2042 reverse: boolean
2043 If `True` sort in descending order.
2045 Raises
2046 ------
2047 IndexError:
2048 If an invalid column was specified.
2049 """
2050 # fix columns:
2051 if not isinstance(columns, (list, tuple, np.ndarray)):
2052 columns = [ columns ]
2053 if not columns:
2054 return
2055 cols = []
2056 for col in columns:
2057 c = self.index(col)
2058 if c is None:
2059 if isinstance(col, (np.integer, int)):
2060 col = '%d' % col
2061 raise IndexError('sort column ' + col + ' not found')
2062 continue
2063 cols.append(c)
2064 # get sorted row indices:
2065 row_inx = range(self.rows())
2066 row_inx = sorted(row_inx, key=lambda x : [float('-inf') if self.data[c][x] is np.nan \
2067 or self.data[c][x] != self.data[c][x] \
2068 else self.data[c][x] for c in cols], reverse=reverse)
2069 # sort table according to indices:
2070 for c in range(self.columns()):
2071 self.data[c] = [self.data[c][r] for r in row_inx]
2073 def key_value(self, row, col, missing=default_missing_str):
2074 """A data element returned as a key-value pair.
2076 Parameters
2077 ----------
2078 row: int
2079 Specifies the row from which the data element should be retrieved.
2080 col: None, int, or str
2081 A specification of a column.
2082 See self.index() for more information on how to specify a column.
2083 missing: str
2084 String indicating non-existing data elements.
2086 Returns
2087 -------
2088 key: str
2089 Header label of the column
2090 value: str
2091 A textual representation of the data element according to the format
2092 of the column, followed by the unit of the column.
2093 """
2094 col = self.index(col)
2095 if col is None:
2096 return ''
2097 if isinstance(self.data[col][row], (float, np.floating)) and m.isnan(self.data[col][row]):
2098 v = missing
2099 else:
2100 u = ''
2101 if not self.units[col] in '1-' and self.units[col] != 'a.u.':
2102 u = self.units[col]
2103 v = (self.formats[col] % self.data[col][row]) + u
2104 return self.header[col][0], v
2106 def _aggregate(self, funcs, columns=None, label=None,
2107 numbers_only=False, remove_nans=False, single_row=False,
2108 keep_columns=None):
2109 """Apply functions to columns.
2111 Parameter
2112 ---------
2113 funcs: function, list of function, dict
2114 Functions that are applied to columns of the table.
2115 - a single function that is applied to the `columns`.
2116 The results are named according to the function's `__name__`.
2117 - a list or tuple of functions.
2118 The results are named according to the functions' `__name__`.
2119 - a dictionary. The results are named after the provided keys,
2120 the functions are given by the values.
2121 If the function returns more than one value, then the
2122 corresponding key in the dictionary needs to be a tuple
2123 (not a list!) of names for each of the returned values.
2124 Functions in lists or dictionaries can be just a plain
2125 function, like `max` or `np.mean`. In case a function
2126 needs further arguments, then you need to supply a tuple
2127 with the first elements being the function, the second
2128 element being another tuple holding positional arguments,
2129 and an optional third argument holding a dictionary for
2130 key-word arguments.
2131 columns: None, int or str or list of int or str
2132 Columns of the table on which functions are applied.
2133 If None apply functions on all columns.
2134 label: str or list of str
2135 Column label and optional section names of the first
2136 column with the function labels (if `single_row` is `False`).
2137 numbers_only: bool
2138 If True, skip columns that do not contain numbers.
2139 remove_nans: bool
2140 If True, remove nans before passing column values to function.
2141 single_row: bool
2142 If False, add for each function a row to the table.
2143 If True, add function values in a single row.
2144 keep_columns: None, int or str or list of int or str
2145 Columns of the table from which to simply keep the first value.
2146 Only if single_row is True. Usefull for grouped tables.
2147 Order of columns and keep_columns are kept from the original table.
2149 Returns
2150 -------
2151 dest: TableData
2152 A new table with the column headers specified by `columns`.
2153 A first column is inserted with the function labels.
2154 The functions are applied to all columns specified by `columns`
2155 and their return values are written into the new table.
2157 """
2158 # standardize functions dictionary:
2159 if not isinstance(funcs, (list, tuple, dict)):
2160 funcs = [funcs]
2161 if isinstance(funcs, (list, tuple)):
2162 fs = {}
2163 for f in funcs:
2164 fs[f.__name__] = f
2165 funcs = fs
2166 fs = {}
2167 for k in funcs:
2168 kk = k
2169 if not isinstance(k, tuple):
2170 kk = (k,)
2171 v = funcs[k]
2172 if not isinstance(v, tuple):
2173 v = (funcs[k], (), {})
2174 elif len(v) < 3:
2175 v = (v[0], v[1], {})
2176 fs[kk] = v
2177 funcs = fs
2178 # standardize columns:
2179 if columns is None:
2180 columns = list(range(self.shape[1]))
2181 if not isinstance(columns, (list, tuple, np.ndarray)):
2182 columns = [columns]
2183 if numbers_only:
2184 cols = []
2185 for c in columns:
2186 c = self.index(c)
2187 if len(self.data[c]) > 0 and \
2188 isinstance(self.data[c][0], (float, int, np.floating, np.integer)):
2189 cols.append(c)
2190 columns = cols
2191 if label is None:
2192 label = 'property'
2193 dest = TableData()
2194 if single_row:
2195 if keep_columns is None:
2196 keep_columns = []
2197 elif not isinstance(keep_columns, (list, tuple)):
2198 keep_columns = [keep_columns]
2199 keep_columns = [self.index(c) for c in keep_columns]
2200 columns = [self.index(c) for c in columns]
2201 columns = [c for c in columns if not c in keep_columns]
2202 keep = np.zeros(len(keep_columns) + len(columns), dtype=bool)
2203 keep[:len(keep_columns)] = True
2204 columns = keep_columns + columns
2205 idx = np.argsort(columns)
2206 for i in idx:
2207 c = columns[i]
2208 if keep[i]:
2209 name, unit, format, descr = self.column_head(c, secs=True)
2210 dest.append(name + ['-'], unit, format, descr,
2211 value=self.data[c][0])
2212 else:
2213 name, unit, format, descr = self.column_head(c, secs=True)
2214 values = self[:, c]
2215 if remove_nans:
2216 values = values[np.isfinite(values)]
2217 for k in funcs:
2218 v = funcs[k][0](values, *funcs[k][1], **funcs[k][2])
2219 if len(k) == 1:
2220 dest.append(name + [k[0]], unit, format, descr,
2221 value=v)
2222 else:
2223 for j in range(len(k)):
2224 dest.append(name + [k[j]], unit, format, descr,
2225 value=v[j])
2226 dest.fill_data()
2227 else:
2228 dest.append(label, '', '%-s')
2229 for c in columns:
2230 dest.append(*self.column_head(c, secs=True))
2231 for k in funcs:
2232 for j in range(len(k)):
2233 dest.add(k[j], 0)
2234 for i, c in enumerate(columns):
2235 values = self[:, c]
2236 if remove_nans:
2237 values = values[np.isfinite(values)]
2238 v = funcs[k][0](values, *funcs[k][1], **funcs[k][2])
2239 if len(k) == 1:
2240 dest.add(v, i + 1)
2241 else:
2242 for j in range(len(k)):
2243 dest.add(v[j], i + 1)
2244 dest.fill_data()
2245 return dest
2247 def aggregate(self, funcs, columns=None, label=None,
2248 numbers_only=False, remove_nans=False,
2249 single_row=False, by=None):
2250 """Apply functions to columns.
2252 Parameter
2253 ---------
2254 funcs: function, list of function, dict
2255 Functions that are applied to columns of the table.
2256 - a single function that is applied to the `columns`.
2257 The results are named according to the function's `__name__`.
2258 - a list or tuple of functions.
2259 The results are named according to the functions' `__name__`.
2260 - a dictionary. The results are named after the provided keys,
2261 the functions are given by the values.
2262 If the function returns more than one value, then the
2263 corresponding key in the dictionary needs to be a tuple
2264 (not a list!) of names for each of the returned values.
2265 Functions in lists or dictionaries can be just a plain
2266 function, like `max` or `np.mean`. In case a function
2267 needs further arguments, then you need to supply a tuple
2268 with the first elements being the function, the second
2269 element being another tuple holding positional arguments,
2270 and an optional third argument holding a dictionary for
2271 key-word arguments.
2272 columns: None, int or str or list of int or str
2273 Columns of the table on which functions are applied.
2274 If None apply functions on all columns.
2275 label: str or list of str
2276 Column label and optional section names of the first
2277 column with the function labels (if `single_row` is `False`).
2278 numbers_only: bool
2279 If True, skip columns that do not contain numbers.
2280 remove_nans: bool
2281 If True, remove nans before passing column values to function.
2282 single_row: bool
2283 If False, add for each function a row to the table.
2284 If True, add function values in a single row.
2285 by: None, int or str or list of int or str
2286 Group the table by the specified columns and apply the functions
2287 to each resulting sub-table separately.
2289 Returns
2290 -------
2291 dest: TableData
2292 A new table with the column headers specified by `columns`.
2293 A first column is inserted with the function labels
2294 (if not `single_row`).
2295 The functions are applied to all columns specified by `columns`
2296 and their return values are written into the new table.
2297 """
2298 if by is not None:
2299 # aggregate on grouped table:
2300 if not isinstance(by, (list, tuple)):
2301 by = [by]
2302 if len(by) > 0:
2303 gd = TableData()
2304 for name, values in self.groupby(*by):
2305 ad = values._aggregate(funcs, columns, label,
2306 numbers_only=numbers_only,
2307 remove_nans=remove_nans,
2308 single_row=True, keep_columns=by)
2309 gd.add(ad)
2310 return gd
2311 # aggregate on whole table:
2312 return self._aggregate(funcs, columns, label,
2313 numbers_only=numbers_only,
2314 remove_nans=remove_nans,
2315 single_row=single_row,
2316 keep_columns=None)
2318 def statistics(self, columns=None, label=None,
2319 remove_nans=False, single_row=False, by=None):
2320 """Descriptive statistics of each column.
2322 Parameter
2323 ---------
2324 columns: None, int or str or list of int or str
2325 Columns of the table on which statistics should be computed.
2326 If None apply functions on all columns.
2327 label: str or list of str
2328 Column label and optional section names of the first
2329 column with the function labels (if `single_row` is `False`).
2330 remove_nans: bool
2331 If True, remove nans before passing column values to function.
2332 single_row: bool
2333 If False, add for each function a row to the table.
2334 If True, add function values in a single row.
2335 by: None, int or str or list of int or str
2336 Group the table by the specified columns and compute statistics
2337 to each resulting sub-table separately.
2339 Returns
2340 -------
2341 dest: TableData
2342 A new table with the column headers specified by `columns`.
2343 For each column that contains numbers some basic
2344 descriptive statistics is computed.
2345 """
2346 if label is None:
2347 label = 'statistics'
2348 funcs = {'mean': np.mean,
2349 'std': np.std,
2350 'min': np.min,
2351 ('quartile1', 'median', 'quartile2'):
2352 (np.quantile, ([0.25, 0.5, 0.75],)),
2353 'max': np.max,
2354 'count': len}
2355 ds = self.aggregate(funcs, columns, label,
2356 numbers_only=True,
2357 remove_nans=remove_nans,
2358 single_row=single_row, by=by)
2359 if by is not None:
2360 if not isinstance(by, (list, tuple)):
2361 by = [by]
2362 if len(by) > 0:
2363 single_row = True
2364 c0 = 0
2365 if not single_row:
2366 ds.set_format(0, '%-10s')
2367 c0 = 1
2368 for c in range(c0, ds.shape[1]):
2369 f = ds.formats[c]
2370 if single_row and ds.label(c) == 'count':
2371 ds.set_unit(c, '')
2372 ds.set_format(c, '%d')
2373 elif f[-1] in 'fge':
2374 i0 = f.find('.')
2375 if i0 > 0:
2376 p = int(f[i0 + 1:-1])
2377 f = f'{f[:i0 + 1]}{p + 1}{f[-1]}'
2378 ds.set_format(c, f)
2379 else:
2380 ds.set_format(c, '%.1f')
2381 return ds
2383 def groupby(self, *columns):
2384 """ Iterate through unique values of a column.
2386 Parameter
2387 ---------
2388 columns: int or str
2389 One or several columns used to group the data.
2390 See self.index() for more information on how to specify a column.
2392 Yields
2393 ------
2394 values: float or str or tuple of float or str
2395 The values of the specified columns.
2396 data: TableData
2397 The sub table where the specified columns equals `values`.
2398 """
2399 # check column indices and values:
2400 cols = []
2401 vals = []
2402 for col in columns:
2403 c = self.index(col)
2404 if c is None:
2405 raise StopIteration
2406 cols.append(c)
2407 vals.append(np.unique(self.data[c]))
2408 for values in product(*vals):
2409 mask = np.ones(len(self), dtype=bool)
2410 for c, v in zip(cols, values):
2411 mask &= self[:, c] == v
2412 if len(cols) == 1:
2413 yield values[0], self[mask]
2414 else:
2415 yield values, self[mask]
2417 def hide(self, column):
2418 """Hide a column or a range of columns.
2420 Hidden columns will not be printed out by the write() function.
2422 Parameters
2423 ----------
2424 column: int or str
2425 The column to be hidden.
2426 See self.index() for more information on how to specify a column.
2427 """
2428 c0, c1 = self.find_col(column)
2429 if c0 is not None:
2430 for c in range(c0, c1):
2431 self.hidden[c] = True
2433 def hide_all(self):
2434 """Hide all columns.
2436 Hidden columns will not be printed out by the write() function.
2437 """
2438 for c in range(len(self.hidden)):
2439 self.hidden[c] = True
2441 def hide_empty_columns(self, missing=default_missing_inputs):
2442 """Hide all columns that do not contain data.
2444 Hidden columns will not be printed out by the write() function.
2446 Parameters
2447 ----------
2448 missing: list of str
2449 Strings indicating missing data.
2450 """
2451 for c in range(len(self.data)):
2452 # check for empty column:
2453 isempty = True
2454 for v in self.data[c]:
2455 if isinstance(v, (float, np.floating)):
2456 if not m.isnan(v):
2457 isempty = False
2458 break
2459 else:
2460 if not v in missing:
2461 isempty = False
2462 break
2463 if isempty:
2464 self.hidden[c] = True
2466 def show(self, column):
2467 """Show a column or a range of columns.
2469 Undoes hiding of a column.
2471 Parameters
2472 ----------
2473 column: int or str
2474 The column to be shown.
2475 See self.index() for more information on how to specify a column.
2476 """
2477 c0, c1 = self.find_col(column)
2478 if c0 is not None:
2479 for c in range(c0, c1):
2480 self.hidden[c] = False
2482 def write(self, fh=sys.stdout, table_format=None, delimiter=None,
2483 unit_style=None, column_numbers=None, sections=None,
2484 align_columns=None, shrink_width=True,
2485 missing=default_missing_str, center_columns=False,
2486 latex_unit_package=None, latex_label_command='',
2487 latex_merge_std=False, descriptions_name='-description',
2488 section_headings=None, maxc=80):
2489 """Write the table to a file or stream.
2491 Parameters
2492 ----------
2493 fh: str or Path or file object
2494 If not a file object, the file with path `fh` is opened.
2495 If `fh` does not have an extension,
2496 the `table_format` is appended as an extension.
2497 Otherwise `fh` is used as a stream for writing.
2498 table_format: None or str
2499 The format to be used for output.
2500 One of 'out', 'dat', 'ascii', 'csv', 'rtai', 'md', 'tex', 'html'.
2501 If None or 'auto' then the format is set to the extension of the
2502 filename given by `fh`. If the filename does not have an extension
2503 `fh` is set to 'csv'. if `fh` is a stream the format is set
2504 to 'out'.
2505 delimiter: str
2506 String or character separating columns, if supported by the
2507 `table_format`.
2508 If None or 'auto' use the default for the specified `table_format`.
2509 unit_style: None or str
2510 - None or 'auto': use default of the specified `table_format`.
2511 - 'row': write an extra row to the table header specifying the
2512 units of the columns.
2513 - 'header': add the units to the column headers.
2514 - 'none': do not specify the units.
2515 column_numbers: str or None
2516 Add a row specifying the column index:
2517 - 'index': indices are integers, first column is 0.
2518 - 'num': indices are integers, first column is 1.
2519 - 'aa': use 'a', 'b', 'c', ..., 'z', 'aa', 'ab', ... for indexing
2520 - 'aa': use 'A', 'B', 'C', ..., 'Z', 'AA', 'AB', ... for indexing
2521 - None or 'none': do not add a row with column indices
2522 TableData.column_numbering is a list with the supported styles.
2523 sections: None or int
2524 Number of section levels to be printed.
2525 If `None` or 'auto' use default of selected `table_format`.
2526 align_columns: boolean
2527 - `True`: set width of column formats to make them align.
2528 - `False`: set width of column formats to 0 - no unnecessary spaces.
2529 - None or 'auto': Use default of the selected `table_format`.
2530 shrink_width: boolean
2531 If `True` disregard width specified by the format strings,
2532 such that columns can become narrower.
2533 missing: str
2534 Indicate missing data by this string.
2535 center_columns: boolean
2536 If True center all columns (markdown, html, and latex).
2537 latex_unit_package: None or 'siunitx' or 'SIunit'
2538 Translate units for the specified LaTeX package.
2539 If None set sub- and superscripts in text mode.
2540 If 'siunitx', also use `S` columns for numbers to align
2541 them on the decimal point.
2542 latex_label_command: str
2543 LaTeX command for formatting header labels.
2544 E.g. 'textbf' for making the header labels bold.
2545 latex_merge_std: str
2546 Merge header of columns with standard deviations with
2547 previous column (LaTeX tables only), but separate them
2548 with $\\pm$. Valid labels for standrad deviations are
2549 listed in `TableData.stdev_labels`.
2550 descriptions_name: None or str
2551 If not None and if `fh` is a file path, then write the column
2552 descriptions to a file with the same name as `fh`, but with
2553 `descriptions_name` appended.
2554 section_headings: None or int
2555 How to write treat header sections in the column descriptions.
2556 If set, set header sections as headings with the top-level
2557 section at the level as specified. 0 is the top level.
2558 If False, just produce a nested list.
2559 maxc: int
2560 Maximum character count for each line in the column descriptions.
2562 Returns
2563 -------
2564 file_name: Path or None
2565 The full name of the file into which the data were written.
2567 Supported file formats
2568 ----------------------
2570 ## `dat`: data text file
2571 ``` plain
2572 # info reaction
2573 # size weight delay jitter
2574 # m kg ms mm
2575 2.34 123 98.7 23
2576 56.70 3457 54.3 45
2577 8.90 43 67.9 345
2578 ```
2580 ## `ascii`: ascii-art table
2581 ``` plain
2582 |---------------------------------|
2583 | info | reaction |
2584 | size | weight | delay | jitter |
2585 | m | kg | ms | mm |
2586 |-------|--------|-------|--------|
2587 | 2.34 | 123 | 98.7 | 23 |
2588 | 56.70 | 3457 | 54.3 | 45 |
2589 | 8.90 | 43 | 67.9 | 345 |
2590 |---------------------------------|
2591 ```
2593 ## `csv`: comma separated values
2594 ``` plain
2595 size/m,weight/kg,delay/ms,jitter/mm
2596 2.34,123,98.7,23
2597 56.70,3457,54.3,45
2598 8.90,43,67.9,345
2599 ```
2601 ## `rtai`: rtai-style table
2602 ``` plain
2603 RTH| info | reaction
2604 RTH| size | weight| delay| jitter
2605 RTH| m | kg | ms | mm
2606 RTD| 2.34| 123| 98.7| 23
2607 RTD| 56.70| 3457| 54.3| 45
2608 RTD| 8.90| 43| 67.9| 345
2609 ```
2611 ## `md`: markdown
2612 ``` plain
2613 | size/m | weight/kg | delay/ms | jitter/mm |
2614 |------:|-------:|------:|-------:|
2615 | 2.34 | 123 | 98.7 | 23 |
2616 | 56.70 | 3457 | 54.3 | 45 |
2617 | 8.90 | 43 | 67.9 | 345 |
2618 ```
2620 ## `tex`: latex tabular
2621 ``` tex
2622 \\begin{tabular}{rrrr}
2623 \\hline
2624 \\multicolumn{2}{l}{info} & \\multicolumn{2}{l}{reaction} \\
2625 \\multicolumn{1}{l}{size} & \\multicolumn{1}{l}{weight} & \\multicolumn{1}{l}{delay} & \\multicolumn{1}{l}{jitter} \\
2626 \\multicolumn{1}{l}{m} & \\multicolumn{1}{l}{kg} & \\multicolumn{1}{l}{ms} & \\multicolumn{1}{l}{mm} \\
2627 \\hline
2628 2.34 & 123 & 98.7 & 23 \\
2629 56.70 & 3457 & 54.3 & 45 \\
2630 8.90 & 43 & 67.9 & 345 \\
2631 \\hline
2632 \\end{tabular}
2633 ```
2635 ## `html`: html
2636 ``` html
2637 <table>
2638 <thead>
2639 <tr class="header">
2640 <th align="left" colspan="2">info</th>
2641 <th align="left" colspan="2">reaction</th>
2642 </tr>
2643 <tr class="header">
2644 <th align="left">size</th>
2645 <th align="left">weight</th>
2646 <th align="left">delay</th>
2647 <th align="left">jitter</th>
2648 </tr>
2649 <tr class="header">
2650 <th align="left">m</th>
2651 <th align="left">kg</th>
2652 <th align="left">ms</th>
2653 <th align="left">mm</th>
2654 </tr>
2655 </thead>
2656 <tbody>
2657 <tr class"odd">
2658 <td align="right">2.34</td>
2659 <td align="right">123</td>
2660 <td align="right">98.7</td>
2661 <td align="right">23</td>
2662 </tr>
2663 <tr class"even">
2664 <td align="right">56.70</td>
2665 <td align="right">3457</td>
2666 <td align="right">54.3</td>
2667 <td align="right">45</td>
2668 </tr>
2669 <tr class"odd">
2670 <td align="right">8.90</td>
2671 <td align="right">43</td>
2672 <td align="right">67.9</td>
2673 <td align="right">345</td>
2674 </tr>
2675 </tbody>
2676 </table>
2677 ```
2679 """
2680 # fix parameter:
2681 if table_format is not None:
2682 table_format = table_format.lower()
2683 if table_format == 'auto':
2684 table_format = None
2685 if delimiter == 'auto':
2686 delimiter = None
2687 if unit_style == 'auto':
2688 unit_style = None
2689 if column_numbers == 'none':
2690 column_numbers = None
2691 if sections == 'auto':
2692 sections = None
2693 if align_columns == 'auto':
2694 align_columns = None
2695 # open file:
2696 own_file = False
2697 file_name = None
2698 if not hasattr(fh, 'write'):
2699 fh = Path(fh)
2700 ext = fh.suffix
2701 if table_format is None:
2702 if len(ext) > 1 and ext[1:] in self.ext_formats:
2703 table_format = self.ext_formats[ext[1:]]
2704 else:
2705 table_format = 'csv'
2706 if not ext or not ext[1:].lower() in self.ext_formats:
2707 fh = fh.with_suffix('.' + self.extensions[table_format])
2708 file_name = fh
2709 fh = open(file_name, 'w')
2710 own_file = True
2711 if table_format is None:
2712 table_format = 'out'
2713 # set style:
2714 if table_format[0] == 'd':
2715 align_columns = True
2716 begin_str = ''
2717 end_str = ''
2718 header_start = '# '
2719 header_sep = ' '
2720 header_close = ''
2721 header_end = '\n'
2722 data_start = ' '
2723 data_sep = ' '
2724 data_close = ''
2725 data_end = '\n'
2726 top_line = False
2727 header_line = False
2728 bottom_line = False
2729 if delimiter is not None:
2730 header_sep = delimiter
2731 data_sep = delimiter
2732 if sections is None:
2733 sections = 1000
2734 elif table_format[0] == 'a':
2735 align_columns = True
2736 begin_str = ''
2737 end_str = ''
2738 header_start = '| '
2739 header_sep = ' | '
2740 header_close = ''
2741 header_end = ' |\n'
2742 data_start = '| '
2743 data_sep = ' | '
2744 data_close = ''
2745 data_end = ' |\n'
2746 top_line = True
2747 header_line = True
2748 bottom_line = True
2749 if delimiter is not None:
2750 header_sep = delimiter
2751 data_sep = delimiter
2752 if sections is None:
2753 sections = 1000
2754 elif table_format[0] == 'c':
2755 # csv according to http://www.ietf.org/rfc/rfc4180.txt :
2756 column_numbers=None
2757 if unit_style is None:
2758 unit_style = 'header'
2759 if align_columns is None:
2760 align_columns = False
2761 begin_str = ''
2762 end_str = ''
2763 header_start=''
2764 header_sep = ','
2765 header_close = ''
2766 header_end='\n'
2767 data_start=''
2768 data_sep = ','
2769 data_close = ''
2770 data_end='\n'
2771 top_line = False
2772 header_line = False
2773 bottom_line = False
2774 if delimiter is not None:
2775 header_sep = delimiter
2776 data_sep = delimiter
2777 if sections is None:
2778 sections = 0
2779 elif table_format[0] == 'r':
2780 align_columns = True
2781 begin_str = ''
2782 end_str = ''
2783 header_start = 'RTH| '
2784 header_sep = '| '
2785 header_close = ''
2786 header_end = '\n'
2787 data_start = 'RTD| '
2788 data_sep = '| '
2789 data_close = ''
2790 data_end = '\n'
2791 top_line = False
2792 header_line = False
2793 bottom_line = False
2794 if sections is None:
2795 sections = 1000
2796 elif table_format[0] == 'm':
2797 if unit_style is None or unit_style == 'row':
2798 unit_style = 'header'
2799 align_columns = True
2800 begin_str = ''
2801 end_str = ''
2802 header_start='| '
2803 header_sep = ' | '
2804 header_close = ''
2805 header_end=' |\n'
2806 data_start='| '
2807 data_sep = ' | '
2808 data_close = ''
2809 data_end=' |\n'
2810 top_line = False
2811 header_line = True
2812 bottom_line = False
2813 if sections is None:
2814 sections = 0
2815 elif table_format[0] == 'h':
2816 align_columns = False
2817 begin_str = '<table>\n<thead>\n'
2818 end_str = '</tbody>\n</table>\n'
2819 if center_columns:
2820 header_start=' <tr>\n <th align="center"'
2821 header_sep = '</th>\n <th align="center"'
2822 else:
2823 header_start=' <tr>\n <th align="left"'
2824 header_sep = '</th>\n <th align="left"'
2825 header_close = '>'
2826 header_end='</th>\n </tr>\n'
2827 data_start=' <tr>\n <td'
2828 data_sep = '</td>\n <td'
2829 data_close = '>'
2830 data_end='</td>\n </tr>\n'
2831 top_line = False
2832 header_line = False
2833 bottom_line = False
2834 if sections is None:
2835 sections = 1000
2836 elif table_format[0] == 't':
2837 if align_columns is None:
2838 align_columns = False
2839 begin_str = '\\begin{tabular}'
2840 end_str = '\\end{tabular}\n'
2841 header_start=' '
2842 header_sep = ' & '
2843 header_close = ''
2844 header_end=' \\\\\n'
2845 data_start=' '
2846 data_sep = ' & '
2847 data_close = ''
2848 data_end=' \\\\\n'
2849 top_line = True
2850 header_line = True
2851 bottom_line = True
2852 if sections is None:
2853 sections = 1000
2854 else:
2855 if align_columns is None:
2856 align_columns = True
2857 begin_str = ''
2858 end_str = ''
2859 header_start = ''
2860 header_sep = ' '
2861 header_close = ''
2862 header_end = '\n'
2863 data_start = ''
2864 data_sep = ' '
2865 data_close = ''
2866 data_end = '\n'
2867 top_line = False
2868 header_line = False
2869 bottom_line = False
2870 if sections is None:
2871 sections = 1000
2872 # check units:
2873 if unit_style is None:
2874 unit_style = 'row'
2875 have_units = False
2876 for u in self.units:
2877 if u and u != '1' and u != '-':
2878 have_units = True
2879 break
2880 if not have_units:
2881 unit_style = 'none'
2882 # find std columns:
2883 stdev_col = np.zeros(len(self.header), dtype=bool)
2884 for c in range(len(self.header) - 1):
2885 if self.header[c + 1][0].lower() in self.stdev_labels and \
2886 not self.hidden[c+1]:
2887 stdev_col[c] = True
2888 # begin table:
2889 fh.write(begin_str)
2890 if table_format[0] == 't':
2891 fh.write('{')
2892 merged = False
2893 for h, f, s in zip(self.hidden, self.formats, stdev_col):
2894 if merged:
2895 fh.write('l')
2896 merged = False
2897 continue
2898 if h:
2899 continue
2900 if latex_merge_std and s:
2901 fh.write('r@{$\\,\\pm\\,$}')
2902 merged = True
2903 elif center_columns:
2904 fh.write('c')
2905 elif f[1] == '-':
2906 fh.write('l')
2907 else:
2908 if latex_unit_package is not None and \
2909 latex_unit_package.lower() == 'siunitx':
2910 fh.write('S')
2911 else:
2912 fh.write('r')
2913 fh.write('}\n')
2914 # retrieve column formats and widths:
2915 widths = [] # width from format string
2916 widths_pos = [] # start and end index of width specifyer in format
2917 for c, f in enumerate(self.formats):
2918 w = 0
2919 # position of width specification:
2920 i0 = 1
2921 if len(f) > 1 and f[1] == '-' :
2922 i0 = 2
2923 i1 = f.find('.')
2924 if i1 < 0:
2925 i1 = i0
2926 while i1 < len(f) and f[i1].isdigit():
2927 i1 += 1
2928 if not shrink_width and i1 > i0:
2929 if f[i0:i1]:
2930 w = int(f[i0:i1])
2931 widths_pos.append((i0, i1))
2932 # adapt width to header label:
2933 hw = len(self.header[c][0])
2934 if unit_style == 'header' and self.units[c] and \
2935 self.units[c] != '1' and self.units[c] != '-':
2936 hw += 1 + len(self.units[c])
2937 if w < hw:
2938 w = hw
2939 # adapt width to data:
2940 if f[-1] == 's':
2941 for v in self.data[c]:
2942 if isinstance(v, str) and w < len(v):
2943 w = len(v)
2944 else:
2945 fs = f'{f[:i0]}0{f[i1:]}'
2946 for v in self.data[c]:
2947 if v is None or (isinstance(v, (float, np.floating)) and
2948 m.isnan(v)):
2949 s = missing
2950 else:
2951 try:
2952 s = fs % v
2953 except ValueError:
2954 s = missing
2955 except TypeError:
2956 s = str(v)
2957 if w < len(s):
2958 w = len(s)
2959 widths.append(w)
2960 # adapt width to sections:
2961 sec_indices = [0] * self.nsecs # previous column with this level
2962 sec_widths = [0] * self.nsecs # total width of section level
2963 sec_columns = [0] * self.nsecs # number of columns in section level
2964 for c in range(len(self.header)):
2965 w = widths[c]
2966 for l in range(min(self.nsecs, sections)):
2967 if 1 + l < len(self.header[c]):
2968 if c > 0 and sec_columns[l] > 0 and \
2969 1 + l < len(self.header[sec_indices[l]]) and \
2970 len(self.header[sec_indices[l]][1 + l]) > sec_widths[l]:
2971 dw = len(self.header[sec_indices[l]][1 + l]) - sec_widths[l]
2972 nc = sec_columns[l]
2973 ddw = np.zeros(nc, dtype=int) + dw // nc
2974 ddw[:dw % nc] += 1
2975 wk = 0
2976 for ck in range(sec_indices[l], c):
2977 if not self.hidden[ck]:
2978 widths[ck] += ddw[wk]
2979 wk += 1
2980 sec_indices[l] = c
2981 sec_widths[l] = 0
2982 sec_columns[l] = 0
2983 if not self.hidden[c]:
2984 if sec_widths[l] > 0:
2985 sec_widths[l] += len(header_sep)
2986 sec_widths[l] += w
2987 sec_columns[l] += 1
2988 # set width of format string:
2989 formats = []
2990 for c, (f, w) in enumerate(zip(self.formats, widths)):
2991 formats.append(f'{f[:widths_pos[c][0]]}{w}{f[widths_pos[c][1]:]}')
2992 # top line:
2993 if top_line:
2994 if table_format[0] == 't':
2995 fh.write(' \\hline \\\\[-2ex]\n')
2996 else:
2997 first = True
2998 fh.write(header_start.replace(' ', '-'))
2999 for c in range(len(self.header)):
3000 if self.hidden[c]:
3001 continue
3002 if not first:
3003 fh.write('-'*len(header_sep))
3004 first = False
3005 fh.write(header_close)
3006 w = widths[c]
3007 fh.write(w*'-')
3008 fh.write(header_end.replace(' ', '-'))
3009 # section and column headers:
3010 nsec0 = self.nsecs - sections
3011 if nsec0 < 0:
3012 nsec0 = 0
3013 for ns in range(nsec0, self.nsecs+1):
3014 nsec = self.nsecs - ns
3015 first = True
3016 last = False
3017 merged = False
3018 fh.write(header_start)
3019 for c in range(len(self.header)):
3020 if nsec < len(self.header[c]):
3021 # section width and column count:
3022 sw = -len(header_sep)
3023 columns = 0
3024 if not self.hidden[c]:
3025 sw = widths[c]
3026 columns = 1
3027 for k in range(c+1, len(self.header)):
3028 if nsec < len(self.header[k]):
3029 break
3030 if self.hidden[k]:
3031 continue
3032 sw += len(header_sep) + widths[k]
3033 columns += 1
3034 else:
3035 last = True
3036 if len(header_end.strip()) == 0:
3037 sw = 0 # last entry needs no width
3038 if columns == 0:
3039 continue
3040 if not first and not merged:
3041 fh.write(header_sep)
3042 first = False
3043 if table_format[0] == 'c':
3044 sw -= len(header_sep)*(columns - 1)
3045 elif table_format[0] == 'h':
3046 if columns>1:
3047 fh.write(' colspan="%d"' % columns)
3048 elif table_format[0] == 't':
3049 if merged:
3050 merged = False
3051 continue
3052 if latex_merge_std and nsec == 0 and stdev_col[c]:
3053 merged = True
3054 fh.write('\\multicolumn{%d}{c}{' % (columns+1))
3055 elif center_columns:
3056 fh.write('\\multicolumn{%d}{c}{' % columns)
3057 else:
3058 fh.write('\\multicolumn{%d}{l}{' % columns)
3059 if latex_label_command:
3060 fh.write('\\%s{' % latex_label_command)
3061 fh.write(header_close)
3062 hs = self.header[c][nsec]
3063 if nsec == 0 and unit_style == 'header':
3064 if self.units[c] and self.units[c] != '1' and self.units[c] != '-':
3065 hs += '/' + self.units[c]
3066 if align_columns and not table_format[0] in 'th':
3067 f = '%%-%ds' % sw
3068 fh.write(f % hs)
3069 else:
3070 fh.write(hs)
3071 if table_format[0] == 'c':
3072 if not last:
3073 fh.write(header_sep*(columns - 1))
3074 elif table_format[0] == 't':
3075 if latex_label_command:
3076 fh.write('}')
3077 fh.write('}')
3078 fh.write(header_end)
3079 # units:
3080 if unit_style == 'row':
3081 first = True
3082 merged = False
3083 fh.write(header_start)
3084 for c in range(len(self.header)):
3085 if self.hidden[c] or merged:
3086 merged = False
3087 continue
3088 if not first:
3089 fh.write(header_sep)
3090 first = False
3091 fh.write(header_close)
3092 unit = self.units[c]
3093 if not unit:
3094 unit = '-'
3095 if table_format[0] == 't':
3096 if latex_merge_std and stdev_col[c]:
3097 merged = True
3098 fh.write('\\multicolumn{2}{c}{%s}' % latex_unit(unit, latex_unit_package))
3099 elif center_columns:
3100 fh.write('\\multicolumn{1}{c}{%s}' % latex_unit(unit, latex_unit_package))
3101 else:
3102 fh.write('\\multicolumn{1}{l}{%s}' % latex_unit(unit, latex_unit_package))
3103 else:
3104 if align_columns and not table_format[0] in 'h':
3105 f = '%%-%ds' % widths[c]
3106 fh.write(f % unit)
3107 else:
3108 fh.write(unit)
3109 fh.write(header_end)
3110 # column numbers:
3111 if column_numbers is not None:
3112 first = True
3113 fh.write(header_start)
3114 for c in range(len(self.header)):
3115 if self.hidden[c]:
3116 continue
3117 if not first:
3118 fh.write(header_sep)
3119 first = False
3120 fh.write(header_close)
3121 i = c
3122 if column_numbers == 'num':
3123 i = c+1
3124 aa = index2aa(c, 'a')
3125 if column_numbers == 'AA':
3126 aa = index2aa(c, 'A')
3127 if table_format[0] == 't':
3128 if column_numbers == 'num' or column_numbers == 'index':
3129 fh.write('\\multicolumn{1}{l}{%d}' % i)
3130 else:
3131 fh.write('\\multicolumn{1}{l}{%s}' % aa)
3132 else:
3133 if column_numbers == 'num' or column_numbers == 'index':
3134 if align_columns:
3135 f = '%%%dd' % widths[c]
3136 fh.write(f % i)
3137 else:
3138 fh.write('%d' % i)
3139 else:
3140 if align_columns:
3141 f = '%%-%ds' % widths[c]
3142 fh.write(f % aa)
3143 else:
3144 fh.write(aa)
3145 fh.write(header_end)
3146 # header line:
3147 if header_line:
3148 if table_format[0] == 'm':
3149 fh.write('|')
3150 for c in range(len(self.header)):
3151 if self.hidden[c]:
3152 continue
3153 w = widths[c]+2
3154 if center_columns:
3155 fh.write(':' + (w-2)*'-' + ':|')
3156 elif formats[c][1] == '-':
3157 fh.write(w*'-' + '|')
3158 else:
3159 fh.write((w - 1)*'-' + ':|')
3160 fh.write('\n')
3161 elif table_format[0] == 't':
3162 fh.write(' \\hline \\\\[-2ex]\n')
3163 else:
3164 first = True
3165 fh.write(header_start.replace(' ', '-'))
3166 for c in range(len(self.header)):
3167 if self.hidden[c]:
3168 continue
3169 if not first:
3170 fh.write(header_sep.replace(' ', '-'))
3171 first = False
3172 fh.write(header_close)
3173 w = widths[c]
3174 fh.write(w*'-')
3175 fh.write(header_end.replace(' ', '-'))
3176 # start table data:
3177 if table_format[0] == 'h':
3178 fh.write('</thead>\n<tbody>\n')
3179 # data:
3180 for k in range(self.rows()):
3181 first = True
3182 merged = False
3183 fh.write(data_start)
3184 for c, f in enumerate(formats):
3185 if self.hidden[c] or merged:
3186 merged = False
3187 continue
3188 if not first:
3189 fh.write(data_sep)
3190 first = False
3191 if table_format[0] == 'h':
3192 if center_columns:
3193 fh.write(' align="center"')
3194 elif f[1] == '-':
3195 fh.write(' align="left"')
3196 else:
3197 fh.write(' align="right"')
3198 fh.write(data_close)
3199 if k >= len(self.data[c]) or self.data[c][k] is None or \
3200 (isinstance(self.data[c][k], (float, np.floating)) and m.isnan(self.data[c][k])):
3201 # missing data:
3202 if table_format[0] == 't' and latex_merge_std and stdev_col[c]:
3203 merged = True
3204 fh.write('\\multicolumn{2}{c}{%s}' % missing)
3205 elif align_columns:
3206 if f[1] == '-':
3207 fn = '%%-%ds' % widths[c]
3208 else:
3209 fn = '%%%ds' % widths[c]
3210 fh.write(fn % missing)
3211 else:
3212 fh.write(missing)
3213 else:
3214 # data value:
3215 try:
3216 ds = f % self.data[c][k]
3217 except ValueError:
3218 ds = missing
3219 except TypeError:
3220 ds = str(self.data[c][k])
3221 if not align_columns:
3222 ds = ds.strip()
3223 fh.write(ds)
3224 fh.write(data_end)
3225 # bottom line:
3226 if bottom_line:
3227 if table_format[0] == 't':
3228 fh.write(' \\hline\n')
3229 else:
3230 first = True
3231 fh.write(header_start.replace(' ', '-'))
3232 for c in range(len(self.header)):
3233 if self.hidden[c]:
3234 continue
3235 if not first:
3236 fh.write('-'*len(header_sep))
3237 first = False
3238 fh.write(header_close)
3239 w = widths[c]
3240 fh.write(w*'-')
3241 fh.write(header_end.replace(' ', '-'))
3242 # end table:
3243 fh.write(end_str)
3244 # close file:
3245 if own_file:
3246 fh.close()
3247 # write descriptions:
3248 if file_name is not None and descriptions_name:
3249 write_descriptions = False
3250 for c in range(len(self.descriptions)):
3251 if self.descriptions[c]:
3252 write_descriptions = True
3253 if write_descriptions:
3254 descr_path = file_name.with_name(file_name.stem +
3255 descriptions_name)
3256 if table_format[0] not in 'th': # neither tex nore html
3257 table_format = 'md'
3258 if len(descr_path.suffix) <= 1:
3259 descr_path = descr_path.with_suffix('.' + self.extensions[table_format])
3260 self.write_descriptions(descr_path, table_format=table_format,
3261 sections=sections,
3262 section_headings=section_headings,
3263 latex_unit_package=latex_unit_package,
3264 maxc=maxc)
3265 # return file name:
3266 return file_name
3268 def write_file_stream(self, base_name, file_name, **kwargs):
3269 """Write table to file or stream and return appropriate file name.
3271 Parameters
3272 ----------
3273 base_name: str, Path, or stream
3274 If str, path and basename of file.
3275 `file_name` and an extension are appended.
3276 If stream, write table data into this stream.
3277 file_name: str
3278 Name of file that is appended to `base_name`.
3279 kwargs:
3280 Arguments passed on to `TableData.write()`.
3281 In particular, 'table_format' is used to set the file extension
3282 that is appended to the returned `file_name`.
3284 Returns
3285 -------
3286 file_name: Path
3287 Path and full name of the written file in case of `base_name`
3288 being a string. Otherwise, the file name and extension that
3289 should be appended to a base name.
3290 """
3291 if hasattr(base_name, 'write'):
3292 table_format = kwargs.get('table_format', None)
3293 if table_format is None or table_format == 'auto':
3294 table_format = 'csv'
3295 file_name = Path(file_name)
3296 file_name = file_name.with_suffix('.' + TableData.extensions[table_format])
3297 self.write(base_name, **kwargs)
3298 return file_name
3299 else:
3300 base_name = Path(base_name + file_name)
3301 file_name = self.write(base_name, **kwargs)
3302 return file_name
3304 def __str__(self):
3305 """Write table to a string.
3306 """
3307 stream = StringIO()
3308 self.write(stream, table_format='out')
3309 return stream.getvalue()
3311 def write_descriptions(self, fh=sys.stdout, table_format=None,
3312 sections=None, section_headings=None,
3313 latex_unit_package=None, maxc=80):
3314 """Write column descriptions of the table to a file or stream.
3316 Parameters
3317 ----------
3318 fh: str or Path or file object
3319 If not a stream, the file with path `fh` is opened.
3320 If `fh` does not have an extension,
3321 the `table_format` is appended as an extension.
3322 Otherwise `fh` is used as a stream for writing.
3323 table_format: None or str
3324 The format to be used for output.
3325 One of 'md', 'tex', or 'html'.
3326 If None or 'auto' then the format is set to the extension
3327 of the filename given by `fh`.
3328 If `fh` is a stream the format is set to 'md'.
3329 sections: None or int
3330 Number of section levels to be printed.
3331 If `None` or 'auto' use default of selected `table_format`.
3332 section_headings: None or int
3333 If set, set header sections as headings with the top-level
3334 section at the level as specified. 0 is the top level.
3335 If False, just produce a nested list.
3336 latex_unit_package: None or 'siunitx' or 'SIunit'
3337 Translate units for the specified LaTeX package.
3338 If None set sub- and superscripts in text mode.
3339 If 'siunitx', also use `S` columns for numbers to align
3340 them on the decimal point.
3341 maxc: int
3342 Maximum character count for each line.
3343 """
3344 # fix parameter:
3345 if table_format == 'auto':
3346 table_format = None
3347 if sections is None:
3348 sections = 1000
3349 nsecs = min(self.nsecs, sections)
3350 # open file:
3351 own_file = False
3352 file_name = None
3353 if not hasattr(fh, 'write'):
3354 fh = Path(fh)
3355 ext = fh.suffix
3356 if table_format is None:
3357 if len(ext) > 1 and ext[1:] in self.ext_formats:
3358 table_format = self.ext_formats[ext[1:]]
3359 elif not ext or not ext[1:].lower() in self.ext_formats:
3360 fh = fh.with_suffix('.' + self.extensions[table_format])
3361 file_name = fh
3362 fh = open(fh, 'w')
3363 own_file = True
3364 if table_format is None:
3365 table_format = 'md'
3366 # write descriptions:
3367 headers = ['']*(1 + nsecs)
3368 prev_headers = ['']*(1 + nsecs)
3369 if table_format == 'md':
3370 for c in range(len(self.header)):
3371 headers[:len(self.header[c])] = self.header[c]
3372 if not self.hidden[c]:
3373 changed = False
3374 for k in reversed(range(nsecs)):
3375 if changed or prev_headers[k + 1] != headers[k + 1]:
3376 changed = True
3377 if section_headings is None:
3378 fh.write(f'{" "*2*(nsecs - k - 1)}- '
3379 f'{headers[k + 1]}\n')
3380 else:
3381 level = nsecs - k - 1 + section_headings + 1
3382 fh.write(f'\n{"#"*level} {headers[k + 1]}\n')
3383 prev_headers[k + 1] = headers[k + 1]
3384 indent = 2*nsecs if section_headings is None else 0
3385 fh.write(f'{" "*indent}- **{headers[0]}**')
3386 if self.units[c]:
3387 fh.write(f' [{self.units[c]}]')
3388 fh.write(' \n')
3389 break_text(fh, self.descriptions[c], maxc,
3390 indent=indent + 2)
3391 prev_headers[0] = headers[0]
3392 elif table_format == 'html':
3393 level = -1
3394 for c in range(len(self.header)):
3395 headers[:len(self.header[c])] = self.header[c]
3396 if not self.hidden[c]:
3397 changed = False
3398 for k in reversed(range(nsecs)):
3399 if changed or prev_headers[k + 1] != headers[k + 1]:
3400 new_level = nsecs - k - 1
3401 if not changed:
3402 if section_headings is None:
3403 while level > new_level:
3404 fh.write(f'{" "*2*level}</ul>\n')
3405 level -= 1
3406 elif level >= 0:
3407 fh.write(f'{" "*2*level}</ul>\n')
3408 level -= 1
3409 changed = True
3410 if section_headings is None:
3411 while level < new_level:
3412 level += 1
3413 fh.write(f'{" "*2*level}<ul>\n')
3414 fh.write(f'{" "*2*(level + 1)}<li><b>{headers[k + 1]}</b></li>\n')
3415 else:
3416 fh.write(f'\n<h{new_level + section_headings + 1}>{headers[k + 1]}</h{new_level + section_headings + 1}>\n')
3417 prev_headers[k + 1] = headers[k + 1]
3418 if changed:
3419 level += 1
3420 fh.write(f'{" "*2*level}<ul>\n')
3422 fh.write(f'{" "*2*(level + 1)}<li><b>{headers[0]}</b>')
3423 if self.units[c]:
3424 fh.write(f'[{self.units[c]}]')
3425 fh.write('<br>\n')
3426 break_text(fh, self.descriptions[c], maxc,
3427 indent=2*(level + 1))
3428 fh.write(f'{" "*2*(level + 1)}</li>\n')
3429 prev_headers[0] = headers[0]
3430 while level >= 0:
3431 fh.write(f'{" "*2*level}</ul>\n')
3432 level -= 1
3433 elif table_format == 'tex':
3434 headings = [r'\section', r'\subsection', r'\subsubsection',
3435 r'\paragraph', r'\subparagraph']
3436 level = -1
3437 for c in range(len(self.header)):
3438 headers[:len(self.header[c])] = self.header[c]
3439 if not self.hidden[c]:
3440 changed = False
3441 for k in reversed(range(nsecs)):
3442 if changed or prev_headers[k + 1] != headers[k + 1]:
3443 new_level = nsecs - k - 1
3444 if not changed:
3445 if section_headings is None:
3446 while level > new_level:
3447 fh.write(f'{" "*2*level}\\end{{enumerate}}\n')
3448 level -= 1
3449 elif level >= 0:
3450 fh.write(f'{" "*2*level}\\end{{enumerate}}\n')
3451 level -= 1
3452 changed = True
3453 if section_headings is None:
3454 while level < new_level:
3455 level += 1
3456 fh.write(f'{" "*2*level}\\begin{{enumerate}}\n')
3457 fh.write(f'{" "*2*(level + 1)}\\item \\textbf{{{headers[k + 1]}}}\n')
3458 else:
3459 fh.write(f'\n{headings[new_level + section_headings]}{{{headers[k + 1]}}}\n')
3460 prev_headers[k + 1] = headers[k + 1]
3461 if changed:
3462 level += 1
3463 fh.write(f'{" "*2*level}\\begin{{enumerate}}\n')
3464 fh.write(f'{" "*2*(level + 1)}\\item \\textbf{{{headers[0]}}}')
3465 if self.units[c]:
3466 fh.write(f' [{latex_unit(self.units[c], latex_unit_package)}]')
3467 fh.write('\n')
3468 break_text(fh, self.descriptions[c], maxc,
3469 indent=2*(level + 1))
3470 prev_headers[0] = headers[0]
3471 while level >= 0:
3472 fh.write(f'{" "*2*level}\\end{{enumerate}}\n')
3473 level -= 1
3474 else:
3475 raise ValueError(f'File format "{table_format}" not supported for writing column descriptions')
3476 # close file:
3477 if own_file:
3478 fh.close()
3479 # return file name:
3480 return file_name
3482 def load_descriptions(fh):
3483 """Load column descriptions from file or stream.
3485 Parameters
3486 ----------
3487 fh: str, Path, or stream
3488 If not a stream, the file with path `fh` is opened for reading.
3489 """
3490 # open file:
3491 own_file = False
3492 if not hasattr(fh, 'readline'):
3493 fh = open(fh, 'r')
3494 own_file = True
3495 # read file:
3496 print('WARNING: load_descriptions() not implemented yet')
3497 for line in fh:
3498 if line[0] == '#':
3499 heading_level = len(line.split()[0])
3500 section_name = line[heading_level + 1:]
3501 elif line[0] == '-':
3502 lp = line.split('**')
3503 label = lp[1]
3504 unit = lp[-1].strip().lstrip('[').rstrip(']')
3505 # close file:
3506 if own_file:
3507 fh.close()
3509 def load(self, fh, missing=default_missing_inputs, sep=None, stop=None):
3510 """Load table from file or stream.
3512 File type and properties are automatically inferred.
3514 Parameters
3515 ----------
3516 fh: str, Path, or stream
3517 If not a stream, the file with path `fh` is opened for reading.
3518 missing: str or list of str
3519 Missing data are indicated by this string and
3520 are translated to np.nan.
3521 sep: str or None
3522 Column separator.
3523 stop: str or None
3524 If a line matches `stop`, stop reading the file. `stop`
3525 can be an empty string to stop reading at the first empty
3526 line.
3528 Raises
3529 ------
3530 FileNotFoundError:
3531 If `fh` is a path that does not exist.
3533 """
3535 def read_key_line(line, sep, table_format):
3536 if sep is None:
3537 cols, indices = zip(*[(m.group(0), m.start()) for m in re.finditer(r'( ?[\S]+)+(?=[ ][ ]+|\Z)', line.strip())])
3538 elif table_format == 'csv':
3539 cols, indices = zip(*[(c.strip(), i) for i, c in enumerate(line.strip().split(sep)) if c.strip()])
3540 else:
3541 seps = r'[^'+re.escape(sep)+']+'
3542 cols, indices = zip(*[(m.group(0), m.start()) for m in re.finditer(seps, line.strip())])
3543 colss = []
3544 indicess = []
3545 if table_format == 'tex':
3546 i = 0
3547 for c in cols:
3548 if 'multicolumn' in c:
3549 fields = c.split('{')
3550 n = int(fields[1].strip().rstrip('}').rstrip())
3551 colss.append(fields[3].strip().rstrip('}').rstrip())
3552 indicess.append(i)
3553 i += n
3554 else:
3555 colss.append(c.strip())
3556 indicess.append(i)
3557 i += 1
3558 else:
3559 for k, (c, i) in enumerate(zip(cols, indices)):
3560 if table_format != 'csv':
3561 if k == 0:
3562 c = c.lstrip('|')
3563 if k == len(cols) - 1:
3564 c = c.rstrip('|')
3565 cs = c.strip()
3566 if len(cs) >= 2 and cs[0] == '"' and cs[-1] == '"':
3567 cs = cs.strip('"')
3568 colss.append(cs)
3569 indicess.append(i)
3570 return colss, indicess
3572 def read_data_line(line, sep, post, precd, alld, numc, exped,
3573 fixed, strf, missing, nans):
3574 # read line:
3575 cols = []
3576 if sep is None:
3577 cols = [m.group(0) for m in re.finditer(r'\S+', line.strip())]
3578 else:
3579 if sep.isspace():
3580 seps = r'[^'+re.escape(sep)+']+'
3581 cols = [m.group(0) for m in re.finditer(seps, line.strip())]
3582 else:
3583 cols = line.split(sep)
3584 if len(cols) > 0 and len(cols[0]) == 0:
3585 cols = cols[1:]
3586 if len(cols) > 0 and len(cols[-1]) == 0:
3587 cols = cols[:-1]
3588 if len(cols) > 0:
3589 cols[0] = cols[0].lstrip('|').lstrip()
3590 cols[-1] = cols[-1].rstrip('|').rstrip()
3591 cols = [c.strip() for c in cols if c != '|']
3592 # read columns:
3593 for k, c in enumerate(cols):
3594 try:
3595 v = float(c)
3596 ad = 0
3597 ve = c.split('e')
3598 if len(ve) <= 1:
3599 exped[k] = False
3600 else:
3601 ad = len(ve[1])+1
3602 vc = ve[0].split('.')
3603 ad += len(vc[0])
3604 prec = len(vc[0].lstrip('-').lstrip('+').lstrip('0'))
3605 if len(vc) == 2:
3606 if numc[k] and post[k] != len(vc[1]):
3607 fixed[k] = False
3608 if post[k] < len(vc[1]):
3609 post[k] = len(vc[1])
3610 ad += len(vc[1])+1
3611 prec += len(vc[1].rstrip('0'))
3612 if precd[k] < prec:
3613 precd[k] = prec
3614 if alld[k] < ad:
3615 alld[k] = ad
3616 numc[k] = True
3617 except ValueError:
3618 if c in missing:
3619 v = np.nan
3620 nans[k] = c
3621 elif len(c) == 0 and not strf[k]:
3622 v = np.nan
3623 else:
3624 strf[k] = True
3625 if alld[k] < len(c):
3626 alld[k] = len(c)
3627 if len(c) >= 2 and c[0] == '"' and c[-1] == '"':
3628 v = c.strip('"')
3629 else:
3630 v = c
3631 self.add(v, k)
3632 self.fill_data()
3634 # initialize:
3635 if isinstance(missing, str):
3636 missing = [missing]
3637 self.data = []
3638 self.ndim = 2
3639 self.shape = (0, 0)
3640 self.header = []
3641 self.nsecs = 0
3642 self.units = []
3643 self.formats = []
3644 self.descriptions = []
3645 self.hidden = []
3646 self.setcol = 0
3647 self.addcol = 0
3648 # open file:
3649 own_file = False
3650 if not hasattr(fh, 'readline'):
3651 fh = open(fh, 'r')
3652 own_file = True
3653 # read inital lines of file:
3654 key = []
3655 data = []
3656 target = data
3657 comment = False
3658 table_format='dat'
3659 for line in fh:
3660 line = line.rstrip()
3661 if line == stop:
3662 break;
3663 if line:
3664 if r'\begin{tabular' in line:
3665 table_format='tex'
3666 target = key
3667 continue
3668 if table_format == 'tex':
3669 if r'\end{tabular' in line:
3670 break
3671 if r'\hline' in line:
3672 if key:
3673 target = data
3674 continue
3675 line = line.rstrip(r'\\')
3676 if line[0] == '#':
3677 comment = True
3678 table_format='dat'
3679 target = key
3680 line = line.lstrip('#')
3681 elif comment:
3682 target = data
3683 if line[0:3] == 'RTH':
3684 target = key
3685 line = line[3:]
3686 table_format='rtai'
3687 elif line[0:3] == 'RTD':
3688 target = data
3689 line = line[3:]
3690 table_format='rtai'
3691 if (line[0:3] == '|--' or line[0:3] == '|:-') and \
3692 (line[-3:] == '--|' or line[-3:] == '-:|'):
3693 if not data and not key:
3694 table_format='ascii'
3695 target = key
3696 continue
3697 elif not key:
3698 table_format='md'
3699 key = data
3700 data = []
3701 target = data
3702 continue
3703 elif not data:
3704 target = data
3705 continue
3706 else:
3707 break
3708 target.append(line)
3709 else:
3710 break
3711 if len(data) > 5:
3712 break
3713 # find column separator of data and number of columns:
3714 col_seps = ['|', ',', ';', ':', '\t', '&', None]
3715 if sep is not None:
3716 col_seps = [sep]
3717 colstd = np.zeros(len(col_seps))
3718 colnum = np.zeros(len(col_seps), dtype=int)
3719 for k, sep in enumerate(col_seps):
3720 cols = []
3721 s = 5 if len(data) >= 8 else len(data) - 3
3722 if s < 0 or key:
3723 s = 0
3724 for line in data[s:]:
3725 cs = line.strip().split(sep)
3726 if not cs[0]:
3727 cs = cs[1:]
3728 if cs and not cs[-1]:
3729 cs = cs[:-1]
3730 cols.append(len(cs))
3731 colstd[k] = np.std(cols)
3732 colnum[k] = np.median(cols)
3733 if np.max(colnum) < 2:
3734 sep = None
3735 colnum = 1
3736 else:
3737 ci = np.where(np.array(colnum) > 1.5)[0]
3738 ci = ci[np.argmin(colstd[ci])]
3739 sep = col_seps[ci]
3740 colnum = int(colnum[ci])
3741 # fix key:
3742 if not key and sep is not None and sep in ',;:\t|':
3743 table_format = 'csv'
3744 # read key:
3745 key_cols = []
3746 key_indices = []
3747 for line in key:
3748 cols, indices = read_key_line(line, sep, table_format)
3749 key_cols.append(cols)
3750 key_indices.append(indices)
3751 if not key_cols:
3752 # no obviously marked table key:
3753 key_num = 0
3754 for line in data:
3755 cols, indices = read_key_line(line, sep, table_format)
3756 numbers = 0
3757 for c in cols:
3758 try:
3759 v = float(c)
3760 numbers += 1
3761 except ValueError:
3762 pass
3763 if numbers == 0:
3764 key_cols.append(cols)
3765 key_indices.append(indices)
3766 key_num += 1
3767 else:
3768 break
3769 if len(key_cols) == len(data):
3770 key_num = 1
3771 key_cols = key_cols[:1]
3772 key_indices = key_indices[:1]
3773 colnum = len(key_cols[0])
3774 data = data[key_num:]
3775 kr = len(key_cols) - 1
3776 # check for key with column indices:
3777 if kr >= 0:
3778 cols = key_cols[kr]
3779 numrow = True
3780 try:
3781 pv = int(cols[0])
3782 for c in cols[1:]:
3783 v = int(c)
3784 if v != pv+1:
3785 numrow = False
3786 break
3787 pv = v
3788 except ValueError:
3789 try:
3790 pv = aa2index(cols[0])
3791 for c in cols[1:]:
3792 v = aa2index(c)
3793 if v != pv+1:
3794 numrow = False
3795 break
3796 pv = v
3797 except ValueError:
3798 numrow = False
3799 if numrow:
3800 kr -= 1
3801 # check for unit line:
3802 units = None
3803 if kr > 0 and len(key_cols[kr]) == len(key_cols[kr - 1]):
3804 units = key_cols[kr]
3805 kr -= 1
3806 # column labels:
3807 if kr >= 0:
3808 if units is None:
3809 # units may be part of the label:
3810 labels = []
3811 units = []
3812 for c in key_cols[kr]:
3813 if c[-1] == ')':
3814 lu = c[:-1].split('(')
3815 if len(lu) >= 2:
3816 labels.append(lu[0].strip())
3817 units.append('('.join(lu[1:]).strip())
3818 continue
3819 lu = c.split('/')
3820 if len(lu) >= 2:
3821 labels.append(lu[0].strip())
3822 units.append('/'.join(lu[1:]).strip())
3823 else:
3824 labels.append(c)
3825 units.append('')
3826 else:
3827 labels = key_cols[kr]
3828 indices = key_indices[kr]
3829 # init table columns:
3830 for k in range(colnum):
3831 self.append(labels[k], units[k], '%g')
3832 # read in sections:
3833 while kr > 0:
3834 kr -= 1
3835 for sec_label, sec_inx in zip(key_cols[kr], key_indices[kr]):
3836 col_inx = indices.index(sec_inx)
3837 self.header[col_inx].append(sec_label)
3838 if self.nsecs < len(self.header[col_inx]) - 1:
3839 self.nsecs = len(self.header[col_inx]) - 1
3840 # read data:
3841 post = np.zeros(colnum, dtype=int)
3842 precd = np.zeros(colnum, dtype=int)
3843 alld = np.zeros(colnum, dtype=int)
3844 numc = [False] * colnum
3845 exped = [True] * colnum
3846 fixed = [True] * colnum
3847 strf = [False] * colnum
3848 nans = [None] * colnum # for each column the missing string that was encountered.
3849 for line in data:
3850 read_data_line(line, sep, post, precd, alld, numc, exped, fixed,
3851 strf, missing, nans)
3852 # read remaining data:
3853 for line in fh:
3854 line = line.rstrip()
3855 if line == stop:
3856 break;
3857 if table_format == 'tex':
3858 if r'\end{tabular' in line or r'\hline' in line:
3859 break
3860 line = line.rstrip(r'\\')
3861 if (line[0:3] == '|--' or line[0:3] == '|:-') and \
3862 (line[-3:] == '--|' or line[-3:] == '-:|'):
3863 break
3864 if line[0:3] == 'RTD':
3865 line = line[3:]
3866 read_data_line(line, sep, post, precd, alld, numc, exped, fixed,
3867 strf, missing, nans)
3868 # set formats:
3869 for k in range(len(alld)):
3870 if strf[k]:
3871 self.set_format(k, '%%-%ds' % alld[k])
3872 # make sure all elements are strings:
3873 for i in range(len(self.data[k])):
3874 if self.data[k][i] is np.nan:
3875 self.data[k][i] = nans[k]
3876 else:
3877 self.data[k][i] = str(self.data[k][i])
3878 elif exped[k]:
3879 self.set_format(k, '%%%d.%de' % (alld[k], post[k]))
3880 elif fixed[k]:
3881 self.set_format(k, '%%%d.%df' % (alld[k], post[k]))
3882 else:
3883 self.set_format(k, '%%%d.%dg' % (alld[k], precd[k]))
3884 # close file:
3885 if own_file:
3886 fh.close()
3889def add_write_table_config(cfg, table_format=None, delimiter=None,
3890 unit_style=None, column_numbers=None,
3891 sections=None, align_columns=None,
3892 shrink_width=True, missing='-',
3893 center_columns=False,
3894 latex_label_command='',
3895 latex_merge_std=False):
3896 """Add parameter specifying how to write a table to a file as a new
3897section to a configuration.
3899 Parameters
3900 ----------
3901 cfg: ConfigFile
3902 The configuration.
3903 """
3905 cfg.add_section('File format for storing analysis results:')
3906 cfg.add('fileFormat', table_format or 'auto', '', 'Default file format used to store analysis results.\nOne of %s.' % ', '.join(TableData.formats))
3907 cfg.add('fileDelimiter', delimiter or 'auto', '', 'String used to separate columns or "auto".')
3908 cfg.add('fileUnitStyle', unit_style or 'auto', '', 'Add units as extra row ("row"), add units to header label separated by "/" ("header"), do not print out units ("none"), or "auto".')
3909 cfg.add('fileColumnNumbers', column_numbers or 'none', '', 'Add line with column indices ("index", "num", "aa", "AA", or "none")')
3910 cfg.add('fileSections', sections or 'auto', '', 'Maximum number of section levels or "auto"')
3911 cfg.add('fileAlignColumns', align_columns or 'auto', '', 'If True, write all data of a column using the same width, if False write the data without any white space, or "auto".')
3912 cfg.add('fileShrinkColumnWidth', shrink_width, '', 'Allow to make columns narrower than specified by the corresponding format strings.')
3913 cfg.add('fileMissing', missing, '', 'String used to indicate missing data values.')
3914 cfg.add('fileCenterColumns', center_columns, '', 'Center content of all columns instead of left align columns of strings and right align numbers (markdown, html, and latex).')
3915 cfg.add('fileLaTeXLabelCommand', latex_label_command, '', 'LaTeX command name for formatting column labels of the table header.')
3916 cfg.add('fileLaTeXMergeStd', latex_merge_std, '', 'Merge header of columns with standard deviations with previous column (LaTeX tables only).')
3919def write_table_args(cfg):
3920 """Translates a configuration to the respective parameter names for
3921writing a table to a file.
3923 The return value can then be passed as key-word arguments to TableData.write().
3925 Parameters
3926 ----------
3927 cfg: ConfigFile
3928 The configuration.
3930 Returns
3931 -------
3932 a: dict
3933 Dictionary with names of arguments of the `TableData.write` function
3934 and their values as supplied by `cfg`.
3935 """
3936 d = cfg.map({'table_format': 'fileFormat',
3937 'delimiter': 'fileDelimiter',
3938 'unit_style': 'fileUnitStyle',
3939 'column_numbers': 'fileColumnNumbers',
3940 'sections': 'fileSections',
3941 'align_columns': 'fileAlignColumns',
3942 'shrink_width': 'fileShrinkColumnWidth',
3943 'missing': 'fileMissing',
3944 'center_columns': 'fileCenterColumns',
3945 'latex_label_command': 'fileLaTeXLabelCommand',
3946 'latex_merge_std': 'fileLaTeXMergeStd'})
3947 if 'sections' in d:
3948 if d['sections'] != 'auto':
3949 d['sections'] = int(d['sections'])
3950 return d
3953def latex_unit(unit, unit_package=None):
3954 """Translate unit string into LaTeX code.
3956 Parameters
3957 ----------
3958 unit: str
3959 String denoting a unit.
3960 unit_package: None or 'siunitx' or 'SIunit'
3961 Translate unit string for the specified LaTeX package.
3962 If None set sub- and superscripts in text mode.
3964 Returns
3965 -------
3966 unit: str
3967 Unit string as valid LaTeX code.
3968 """
3969 si_prefixes = {'y': '\\yocto',
3970 'z': '\\zepto',
3971 'a': '\\atto',
3972 'f': '\\femto',
3973 'p': '\\pico',
3974 'n': '\\nano',
3975 'u': '\\micro',
3976 'm': '\\milli',
3977 'c': '\\centi',
3978 'd': '\\deci',
3979 'h': '\\hecto',
3980 'k': '\\kilo',
3981 'M': '\\mega',
3982 'G': '\\giga',
3983 'T': '\\tera',
3984 'P': '\\peta',
3985 'E': '\\exa',
3986 'Z': '\\zetta',
3987 'Y': '\\yotta' }
3988 si_units = {'m': '\\metre',
3989 'g': '\\gram',
3990 's': '\\second',
3991 'A': '\\ampere',
3992 'K': '\\kelvin',
3993 'mol': '\\mole',
3994 'M': '\\mole',
3995 'cd': '\\candela',
3996 'Hz': '\\hertz',
3997 'N': '\\newton',
3998 'Pa': '\\pascal',
3999 'J': '\\joule',
4000 'W': '\\watt',
4001 'C': '\\coulomb',
4002 'V': '\\volt',
4003 'F': '\\farad',
4004 'O': '\\ohm',
4005 'S': '\\siemens',
4006 'Wb': '\\weber',
4007 'T': '\\tesla',
4008 'H': '\\henry',
4009 'C': '\\celsius',
4010 'lm': '\\lumen',
4011 'lx': '\\lux',
4012 'Bq': '\\becquerel',
4013 'Gv': '\\gray',
4014 'Sv': '\\sievert'}
4015 other_units = {"'": '\\arcminute',
4016 "''": '\\arcsecond',
4017 'a': '\\are',
4018 'd': '\\dday',
4019 'eV': '\\electronvolt',
4020 'ha': '\\hectare',
4021 'h': '\\hour',
4022 'L': '\\liter',
4023 'l': '\\litre',
4024 'min': '\\minute',
4025 'Np': '\\neper',
4026 'rad': '\\rad',
4027 't': '\\ton',
4028 '%': '\\%'}
4029 unit_powers = {'^2': '\\squared',
4030 '^3': '\\cubed',
4031 '/': '\\per',
4032 '^-1': '\\power{}{-1}',
4033 '^-2': '\\rpsquared',
4034 '^-3': '\\rpcubed'}
4035 if unit_package is None:
4036 # without any unit package:
4037 units = ''
4038 k = 0
4039 while k < len(unit):
4040 if unit[k] == '^':
4041 j = k + 1
4042 while j < len(unit) and (unit[j] == '-' or unit[j].isdigit()):
4043 j += 1
4044 units = units + '$^{\\text{' + unit[k + 1:j] + '}}$'
4045 k = j
4046 elif unit[k] == '_':
4047 j = k + 1
4048 while j < len(unit) and not unit[j].isspace():
4049 j += 1
4050 units = units + '$_{\\text{' + unit[k + 1:j] + '}}$'
4051 k = j
4052 else:
4053 units = units + unit[k]
4054 k += 1
4055 elif unit_package.lower() in ['siunit', 'siunitx']:
4056 # use SIunit package:
4057 if '\\' in unit: # this string is already translated!
4058 return unit
4059 units = ''
4060 j = len(unit)
4061 while j >= 0:
4062 for k in range(-3, 0):
4063 if j+k < 0:
4064 continue
4065 uss = unit[j+k:j]
4066 if uss in unit_powers:
4067 units = unit_powers[uss] + units
4068 break
4069 elif uss in other_units:
4070 units = other_units[uss] + units
4071 break
4072 elif uss in si_units:
4073 units = si_units[uss] + units
4074 j = j+k
4075 k = 0
4076 if j - 1 >= 0:
4077 uss = unit[j - 1:j]
4078 if uss in si_prefixes:
4079 units = si_prefixes[uss] + units
4080 k = -1
4081 break
4082 else:
4083 k = -1
4084 units = unit[j+k:j] + units
4085 j = j + k
4086 if unit_package.lower() == 'siunitx':
4087 units = '\\unit{' + units + '}'
4088 else:
4089 raise ValueError(f'latex_unit(): invalid unit_package={unit_package}!')
4090 return units
4093def break_text(stream, text, maxc=80, indent=0):
4094 """Write text to stream and break lines at maximum character count.
4096 Parameters
4097 ----------
4098 stream: io
4099 Stream into which the text is written.
4100 text: str
4101 The text to be written to the stream.
4102 maxc: int
4103 Maximum character count for each line.
4104 indent: int
4105 Number of characters each line is indented.
4106 """
4107 nc = 0
4108 nw = 0
4109 stream.write(' '*indent)
4110 nc += indent
4111 for word in text.split():
4112 if nc + len(word) > maxc:
4113 stream.write('\n')
4114 nc = 0
4115 nw = 0
4116 stream.write(' '*indent)
4117 nc += indent
4118 if nw > 0:
4119 stream.write(' ')
4120 nc += 1
4121 stream.write(word)
4122 nc += len(word)
4123 nw += 1
4124 stream.write('\n')
4127def index2aa(n, a='a'):
4128 """Convert an integer into an alphabetical representation.
4130 The integer number is converted into 'a', 'b', 'c', ..., 'z',
4131 'aa', 'ab', 'ac', ..., 'az', 'ba', 'bb', ...
4133 Inspired by https://stackoverflow.com/a/37604105
4135 Parameters
4136 ----------
4137 n: int
4138 An integer to be converted into alphabetical representation.
4139 a: str ('a' or 'A')
4140 Use upper or lower case characters.
4142 Returns
4143 -------
4144 ns: str
4145 Alphabetical represtnation of an integer.
4146 """
4147 d, m = divmod(n, 26)
4148 bm = chr(ord(a)+m)
4149 return index2aa(d - 1, a) + bm if d else bm
4152def aa2index(s):
4153 """Convert an alphabetical representation to an index.
4155 The alphabetical representation 'a', 'b', 'c', ..., 'z',
4156 'aa', 'ab', 'ac', ..., 'az', 'ba', 'bb', ...
4157 is converted to an index starting with 0.
4159 Parameters
4160 ----------
4161 s: str
4162 Alphabetical representation of an index.
4164 Returns
4165 -------
4166 index: int
4167 The corresponding index.
4169 Raises
4170 ------
4171 ValueError:
4172 Invalid character in input string.
4173 """
4174 index = 0
4175 maxc = ord('z') - ord('a') + 1
4176 for c in s.lower():
4177 index *= maxc
4178 if ord(c) < ord('a') or ord(c) > ord('z'):
4179 raise ValueError('invalid character "%s" in string.' % c)
4180 index += ord(c) - ord('a') + 1
4181 return index - 1
4184class IndentStream(object):
4185 """Filter an output stream and start each newline with a number of
4186 spaces.
4187 """
4188 def __init__(self, stream, indent=4):
4189 self.stream = stream
4190 self.indent = indent
4191 self.pending = True
4193 def __getattr__(self, attr_name):
4194 return getattr(self.stream, attr_name)
4196 def write(self, data):
4197 if not data:
4198 return
4199 if self.pending:
4200 self.stream.write(' '*self.indent)
4201 self.pending = False
4202 substr = data.rstrip('\n')
4203 rn = len(data) - len(substr)
4204 if len(substr) > 0:
4205 self.stream.write(substr.replace('\n', '\n'+' '*self.indent))
4206 if rn > 0:
4207 self.stream.write('\n'*rn)
4208 self.pending = True
4210 def flush(self):
4211 self.stream.flush()
4214def main():
4215 # setup a table:
4216 df = TableData()
4217 df.append(["data", "specimen", "ID"], "", "%-s", value=list('ABCBAACB'))
4218 df.append("size", "m", "%6.2f", value=[2.34, 56.7, 8.9])
4219 df.append("full weight", "kg", "%.0f", value=122.8)
4220 df.append_section("all measures")
4221 df.append("speed", "m/s", "%.3g", value=98.7)
4222 df.append("median jitter", "mm", "%.1f", value=23)
4223 df.append("size", "g", "%.2e", value=1.234)
4224 df.set_descriptions({'ID': 'A unique identifier of a snake.',
4225 'size': 'The total length of each snake.',
4226 'full weight': 'Weight of each snake',
4227 'speed': 'Maximum speed the snake can climb a tree.',
4228 'median jitter': 'The jitter around a given path the snake should follow.',
4229 'all measures>size': 'Weight of mouse the snake has eaten before.',
4230 })
4231 df.add(np.nan, 2) # single value
4232 df.add([0.543, 45, 1.235e2]) # remaining row
4233 df.add([43.21, 6789.1, 3405, 1.235e-4], 2) # next row
4234 a = 0.5*np.arange(1, 6)*np.random.randn(5, 5) + 10.0 + np.arange(5)
4235 df.add(a.T, 1) # rest of table
4236 #df[3:6,'weight'] = [11.0]*3
4237 df.insert('median jitter', 's.d.', 'm/s', '%.3g',
4238 'Standard deviation of all speeds',
4239 value=2*np.random.rand(df.rows()))
4241 # write out in all formats:
4242 for tf in TableData.formats:
4243 print(' - `%s`: %s' % (tf, TableData.descriptions[tf]))
4244 print(' ```')
4245 iout = IndentStream(sys.stdout, 4+2)
4246 df.write(iout, table_format=tf, latex_unit_package='siunitx',
4247 latex_merge_std=True)
4248 print(' ```')
4249 print()
4251 # write descriptions:
4252 for tf in ['md', 'html', 'tex']:
4253 df.write_descriptions(table_format=tf, maxc=40)
4254 print()
4256 # aggregate demos:
4257 print(df)
4258 print(df.aggregate(np.mean, numbers_only=True))
4259 print(df.aggregate(dict(count=len, maximum=np.max), numbers_only=True))
4260 print(df.aggregate([np.mean, len, max],
4261 ['size', 'full weight', 'speed'], 'statistics',
4262 remove_nans=True, single_row=False))
4263 print(df.aggregate({('25%', '50%', '75%'):
4264 (np.quantile, ([0.25, 0.6, 0.75],))},
4265 numbers_only=True))
4267 print(df.statistics(single_row=False))
4268 print(df.statistics(single_row=True, remove_nans=True))
4269 print(df.statistics(remove_nans=True, by='ID'))
4271 # groupby demo:
4272 for name, values in df.groupby('ID'):
4273 print(name)
4274 print(values)
4275 print()
4277 # aggregrate on groups demo:
4278 print(df.aggregate(np.mean, by='ID'))
4279 print()
4281 # write descriptions:
4282 df.write_descriptions(table_format='md', section_headings=0)
4283 print()
4286if __name__ == "__main__":
4287 main()