Source code for pygaps.parsing.aif

"""
An AIF (adsorption information file) parsing implementation.

Format developed in this publication:

Evans, Jack D., Volodymyr Bon, Irena Senkovska, and Stefan Kaskel.
‘A Universal Standard Archive File for Adsorption Data’. Langmuir, 2 April 2021,
acs.langmuir.1c00122. https://doi.org/10.1021/acs.langmuir.1c00122.

"""
import os
import pathlib

import pandas
from adsorption_file_parser.utils.unit_parsing import parse_loading_string
from adsorption_file_parser.utils.unit_parsing import parse_pressure_string
from adsorption_file_parser.utils.unit_parsing import parse_temperature_string
from gemmi import cif

from pygaps import logger
from pygaps.core.baseisotherm import BaseIsotherm
from pygaps.core.modelisotherm import ModelIsotherm
from pygaps.core.pointisotherm import PointIsotherm
from pygaps.modelling import model_from_dict
from pygaps.parsing import _PARSER_PRECISION
from pygaps.utilities.exceptions import ParsingError
from pygaps.utilities.string_utilities import cast_string

_parser_version = "d546195"

_META_DICT = {
    '_exptl_temperature': {
        'text': 'temperature',
        'type': float
    },
    '_exptl_adsorptive': {
        'text': 'adsorbate',
        'type': str
    },
    '_adsnt_material_id': {
        'text': 'material',
        'type': str
    },
    '_exptl_operator': {
        'text': 'user',
        'type': str
    },
    '_exptl_date': {
        'text': 'date',
        'type': str
    },
    '_exptl_instrument': {
        'text': 'instrument',
        'type': str
    },
    '_adsnt_sample_mass': {
        'text': 'material_mass',
        'type': float
    },
    '_units_mass': {
        'text': 'material_mass_unit',
        'type': str
    },
    '_adsnt_degas_temperature': {
        'text': 'activation_temperature',
        'type': float
    },
    '_adsnt_sample_id': {
        'text': 'material_batch',
        'type': str
    },
}
_DATA_DICT = {
    'pressure': 'pressure',
    'p0': 'pressure_saturation',
    'amount': 'loading',
    'enthalpy': 'enthalpy',
}
_UNITS_DICT = [
    "_units_pressure",
    "_units_loading",
    "_units_mass",
    "_units_temperature",
]
# This dictionary contains old keys that are no longer used
# but are still present in some AIF files
_META_DICT_OLD = {
    '_sample_material_id': {
        'text': 'material',
        'type': str
    },
    '_exptl_sample_mass': {
        'text': 'material_mass',
        'type': float
    },
    '_exptl_activation_temperature': {
        'text': 'activation_temperature',
        'type': float
    },
    '_sample_id': {
        'text': 'material_batch',
        'type': str
    },
}


[docs]def isotherm_to_aif(isotherm: PointIsotherm, path: str = None): """ Convert isotherm into an AIF representation [#]_. If the path is specified, the isotherm is saved as a file, otherwise it is returned as a string. Parameters ---------- isotherm : Isotherm Isotherm to be written to AIF. path : str, None Path to the file to be written. Returns ------- str: optional String representation of the AIF, if path not provided. References ---------- .. [#] Evans, Jack D., Volodymyr Bon, Irena Senkovska, and Stefan Kaskel. ‘A Universal Standard Archive File for Adsorption Data’. Langmuir, 2 April 2021, acs.langmuir.1c00122. https://doi.org/10.1021/acs.langmuir.1c00122. """ iso_dict = isotherm.to_dict() # Parse material material = iso_dict['material'] if isinstance(material, dict): iso_dict['material'] = material.pop('name') iso_dict.update({f"sample_{key}": val for key, val in material.items()}) # Start writing AIF aif = cif.Document() # initialize aif block aif.add_new_block(str(isotherm.iso_id)) block = aif.sole_block() # write metadata block.set_pair('_audit_aif_version', _parser_version) block.set_pair('_audit_creation_method', 'pyGAPS') # required pygaps data block.set_pair('_exptl_adsorptive', f"\'{iso_dict.pop('adsorbate')}\'") block.set_pair('_exptl_temperature', f"{iso_dict.pop('temperature')}") block.set_pair('_adsnt_material_id', f"\'{iso_dict.pop('material')}\'") # other possible specs for key, val in _META_DICT.items(): if val['text'] in iso_dict: block.set_pair(key, f"\'{iso_dict.pop(val['text'])}\'") # units block.set_pair('_units_temperature', f"'{isotherm.temperature_unit}'") if isotherm.pressure_mode == 'absolute': block.set_pair('_units_pressure', isotherm.pressure_unit) else: block.set_pair('_units_pressure', isotherm.pressure_mode) if isotherm.loading_basis == 'fraction': block.set_pair('_units_loading', f"'fraction {isotherm.material_basis}'") elif isotherm.loading_basis == 'percent': block.set_pair('_units_loading', f"'% {isotherm.material_basis}'") else: block.set_pair('_units_loading', f"'{isotherm.loading_unit}/{isotherm.material_unit}'") # TODO introduce these as standard in AIF for unit in BaseIsotherm._unit_params: block.set_pair(f"_pygaps_{unit}", f"'{iso_dict[unit]}'") iso_dict.pop(unit) # remaining metadata for meta in iso_dict: block.set_pair(f"_pygaps_{meta.replace(' ', '_')}", f"\'{iso_dict[meta]}\'") # data if isinstance(isotherm, PointIsotherm): other_keys = isotherm.other_keys columns = [isotherm.pressure_key, isotherm.loading_key] + other_keys # write adsorption data if isotherm.has_branch('ads'): loop_ads = block.init_loop('_adsorp_', ['pressure', 'amount'] + other_keys) df = isotherm.data(branch='ads')[columns] loop_ads.set_all_values(df.round(_PARSER_PRECISION).astype("string").values.T.tolist()) # write desorption data if isotherm.has_branch('des'): loop_des = block.init_loop('_desorp_', ['pressure', 'amount'] + other_keys) df = isotherm.data(branch='des')[columns] loop_des.set_all_values(df.round(_PARSER_PRECISION).astype("string").values.T.tolist()) elif isinstance(isotherm, ModelIsotherm): block.set_pair("_pygaps_model_name", isotherm.model.name) block.set_pair("_pygaps_model_rmse", f"{isotherm.model.rmse}") block.set_pair("_pygaps_model_pressure_range_min", f"{isotherm.model.pressure_range[0]}") block.set_pair("_pygaps_model_pressure_range_max", f"{isotherm.model.pressure_range[1]}") block.set_pair("_pygaps_model_loading_range_min", f"{isotherm.model.loading_range[0]}") block.set_pair("_pygaps_model_loading_range_max", f"{isotherm.model.loading_range[1]}") for key, val in isotherm.model.params.items(): block.set_pair(f"_pygaps_model_param_{key}", f"{val}") if path: aif.write_file(f"{os.path.splitext(path)[0]}.aif") else: return aif.as_string()
[docs]def isotherm_from_aif(str_or_path: str, **isotherm_parameters: dict): """ Parse an isotherm from an AIF format (file or raw string) [#]_. Parameters ---------- str_or_path : str The isotherm in a AIF string format or a path to where one can be read. isotherm_parameters : Any other options to be overridden in the isotherm creation. Returns ------- Isotherm The isotherm contained in the AIF file or string. References ---------- .. [#] Evans, Jack D., Volodymyr Bon, Irena Senkovska, and Stefan Kaskel. ‘A Universal Standard Archive File for Adsorption Data’. Langmuir, 2 April 2021, acs.langmuir.1c00122. https://doi.org/10.1021/acs.langmuir.1c00122. """ if pathlib.Path(str_or_path).exists(): aif = cif.read_file(str(str_or_path)) else: try: aif = cif.read_string(str_or_path) except Exception as ex: raise ParsingError( "Could not parse AIF isotherm. " "The `path/string` is invalid or does not exist. " ) from ex block = aif.sole_block() raw_dict = {} # read version version = block.find_value('_audit_aif_version') try_other_version = False if not version or version.strip("'") != (_parser_version): logger.warning( f"The file version is {version} while the parser uses version {_parser_version}. " "Strange things might happen, so double check your data." ) try_other_version = True else: version = version.strip("'") # creation method (excluded if created in pygaps) cmethod = block.find_value('_audit_creation_method') if cmethod and cmethod.strip("'") != "pyGAPS": raw_dict["_audit_creation_method"] = cmethod.strip("'") # read data and metadata through sequential iteration # some properties are special and read separately excluded = [ "_audit_aif_version", "_audit_creation_method", ] + _UNITS_DICT columns = [] for item in block: # metadata handling if item.pair is not None: key, val = item.pair val = val.strip("'") if key in _META_DICT: try: raw_dict[_META_DICT[key]['text']] = _META_DICT[key]['type'](val) except ValueError: logger.warning( f"""Could not parse parameter {_META_DICT[key]['text']}, currently {val}""" ) elif key.startswith('_pygaps_'): raw_dict[key[8:]] = cast_string(val) elif key not in excluded: if try_other_version: if key in _META_DICT_OLD: try: raw_dict[_META_DICT_OLD[key]['text']] = _META_DICT_OLD[key]['type'](val) except ValueError: pass raw_dict[key] = cast_string(val) # data handling elif item.loop is not None: loop = item.loop loop_data = block.find(loop.tags) # check for adsorption or desorption branch = 0 if loop.tags[0].startswith('_desorp_'): branch = 1 if not columns: for col in [tag[8:] for tag in loop.tags]: def_col = _DATA_DICT.get(col, col) columns.append(def_col) # data is often as strings # need to use to_numeric to convert what is appropriate data_df = pandas.DataFrame( loop_data, columns=columns, ).apply(pandas.to_numeric, errors='ignore') data_df['branch'] = branch raw_dict[f"data{branch:d}"] = data_df # deal with units gracefully # if the AIF was created with pygaps, exact backup units are created parse_units = False for unit_name in BaseIsotherm._unit_params: if unit_name not in raw_dict: parse_units = True break if isotherm_parameters and isotherm_parameters.pop("_parse_units"): parse_units = True if parse_units: # pressure units pressure_units = block.find_value('_units_pressure').strip("'") pressure_dict = parse_pressure_string(pressure_units) raw_dict.update(pressure_dict) # loading/material units loading_material_units = block.find_value('_units_loading').strip("'") loading_material_dict = parse_loading_string(loading_material_units) raw_dict.update(loading_material_dict) # temperature units temperature_units = block.find_value('_units_temperature').strip("'") raw_dict['temperature_unit'] = parse_temperature_string(temperature_units) # check if material needs parsing material = {} for key, val in raw_dict.items(): if key.startswith("sample_"): material[key.replace("sample_", "")] = val if material: for key in material: raw_dict.pop("sample_" + key) material['name'] = raw_dict['material'] raw_dict['material'] = material # update anything needed if isotherm_parameters: raw_dict.update(isotherm_parameters) if any(a.startswith("data") for a in raw_dict): ads_branch = raw_dict.pop("data0", None) des_branch = raw_dict.pop("data1", None) if des_branch is not None: ads_branch = pandas.concat([ads_branch, des_branch], ignore_index=True) # generate the isotherm return PointIsotherm( isotherm_data=ads_branch, pressure_key='pressure', loading_key='loading', **raw_dict, ) if any(a.startswith("model") for a in raw_dict): model = {} model['name'] = raw_dict.pop("model_name") model['rmse'] = raw_dict.pop("model_rmse") model['pressure_range'] = [ raw_dict.pop("model_pressure_range_min"), raw_dict.pop("model_pressure_range_max"), ] model['loading_range'] = [ raw_dict.pop("model_loading_range_min"), raw_dict.pop("model_loading_range_max"), ] model_parameters = {} keys = [key for key in raw_dict if key.startswith("model_param")] for key in keys: model_parameters[key[12:]] = raw_dict.pop(key) model["parameters"] = model_parameters return ModelIsotherm( model=model_from_dict(model), **raw_dict, ) return BaseIsotherm(**raw_dict)