Source code for pygaps.parsing.csv

"""
Parse to and from a CSV string/file format for isotherms.

The _parser_version variable documents any changes to the format,
and is used to check for any deprecations.

"""

from io import StringIO

import pandas

from pygaps import logger
from pygaps.core.baseisotherm import BaseIsotherm
from pygaps.core.modelisotherm import ModelIsotherm
from pygaps.core.pointisotherm import PointIsotherm
from pygaps.modelling import model_from_dict
from pygaps.parsing import _PARSER_PRECISION
from pygaps.utilities.exceptions import ParsingError
from pygaps.utilities.string_utilities import _from_list
from pygaps.utilities.string_utilities import _to_string
from pygaps.utilities.string_utilities import cast_string

_parser_version = "3.0"


[docs]def isotherm_to_csv(isotherm, path=None, separator=','): """ Convert isotherm into a CSV representation. If the path is specified, the isotherm is saved as a file, otherwise it is returned as a string. Parameters ---------- isotherm : Isotherm Isotherm to be written to csv. path : str, None Path to the file to be written. separator : str, optional Separator used int the csv file. Defaults to '',''. Returns ------- str: optional String representation of the CSV, if path not provided. """ output = StringIO() iso_dict = isotherm.to_dict() iso_dict['file_version'] = _parser_version # version # Parse material material = iso_dict['material'] if isinstance(material, dict): iso_dict['material'] = material.pop('name') iso_dict.update({f"_material_{key}": val for key, val in material.items()}) output.writelines([x + separator + _to_string(y) + '\n' for (x, y) in iso_dict.items()]) if isinstance(isotherm, PointIsotherm): # We get data and replace adsorption terminology data = isotherm.data_raw.copy() data['branch'] = data['branch'].replace(0, 'ads').replace(1, 'des') output.write('data:[pressure,loading,branch,(otherdata)]\n') data.round(_PARSER_PRECISION).to_csv(output, sep=separator, index=False, header=True) elif isinstance(isotherm, ModelIsotherm): output.write('model:[name and parameters]\n') output.write(('name' + separator + isotherm.model.name + '\n')) output.write(('rmse' + separator + _to_string(isotherm.model.rmse) + '\n')) output.write( ('pressure range' + separator + _to_string(isotherm.model.pressure_range) + '\n') ) output.write( ('loading range' + separator + _to_string(isotherm.model.loading_range) + '\n') ) output.writelines([ param + separator + str(isotherm.model.params[param]) + '\n' for param in isotherm.model.params ]) if path: with open(path, mode='w', newline='\n', encoding='utf-8') as file: file.write(output.getvalue()) else: return output.getvalue()
[docs]def isotherm_from_csv(str_or_path, separator=',', **isotherm_parameters): """ Load an isotherm from a CSV file. Parameters ---------- str_or_path : str The isotherm in a CSV string format or a path to where one can be read. separator : str, optional Separator used int the csv file. Defaults to `,`. isotherm_parameters : Any other options to be overridden in the isotherm creation. Returns ------- Isotherm The isotherm contained in the csv string or file. """ try: with open(str_or_path, encoding='utf-8') as f: raw_csv = StringIO(f.read()) except OSError: try: raw_csv = StringIO(str_or_path) except Exception as err: raise ParsingError( "Could not parse CSV isotherm. " "The `str_or_path` is invalid or does not exist. " ) from err line = raw_csv.readline().rstrip() raw_dict = {} try: while not (line.startswith('data') or line.startswith('model') or line == ""): values = line.strip().split(sep=separator) if len(values) > 2: raise ParsingError(f"The isotherm metadata {values} contains more than two values.") key, val = values val = cast_string(val) raw_dict[key] = val line = raw_csv.readline().rstrip() except Exception as err: raise ParsingError( "Could not parse CSV isotherm. " f"The format may be wrong, check for errors in line {line}." ) from err # version check version = raw_dict.pop("file_version", None) if not version or float(version) < float(_parser_version): logger.warning( f"The file version is {version} while the parser uses version {_parser_version}. " "Strange things might happen, so double check your data." ) # check if material needs parsing material = {} for key, val in raw_dict.items(): if key.startswith("_material_"): material[key.replace("_material_", "")] = val if material: for key in material.keys(): raw_dict.pop("_material_" + key) material['name'] = raw_dict['material'] raw_dict['material'] = material # Update dictionary with any user parameters raw_dict.update(isotherm_parameters) # Now read specific type of isotherm (Point, Model, Base) if line.startswith('data'): data = pandas.read_csv(raw_csv, sep=separator) # process isotherm branches if they exist if 'branch' in data.columns: data['branch'] = data['branch'].apply(lambda x: 0 if x == 'ads' else 1) else: raw_dict['branch'] = 'guess' isotherm = PointIsotherm( isotherm_data=data, pressure_key=data.columns[0], loading_key=data.columns[1], **raw_dict ) elif line.startswith('model'): model = {} line = raw_csv.readline().rstrip() model['name'] = line.split(sep=separator)[1] line = raw_csv.readline().rstrip() model['rmse'] = line.split(sep=separator)[1] line = raw_csv.readline().rstrip() model['pressure_range'] = _from_list(line.split(sep=separator)[1]) line = raw_csv.readline().rstrip() model['loading_range'] = _from_list(line.split(sep=separator)[1]) line = raw_csv.readline().rstrip() model['parameters'] = {} while line != "": values = line.split(sep=separator) model['parameters'][values[0]] = float(values[1]) line = raw_csv.readline().rstrip() isotherm = ModelIsotherm( model=model_from_dict(model), **raw_dict, ) else: isotherm = BaseIsotherm(**raw_dict) return isotherm