Source code for

"""Tools for working with IAMC-structured data."""
from typing import Optional

import pandas as pd
import sdmx.model.v21 as m
from sdmx.message import StructureMessage

[docs]def describe(data: pd.DataFrame, extra: Optional[str] = None) -> StructureMessage: """Generate SDMX structure information from `data` in IAMC format. Parameters ---------- data : Data in "wide" or "long" IAMC format. extra : str, optional Extra text added to the description of each Codelist. Returns ------- sdmx.message.StructureMessage The message contains one :class:`.Codelist` for each of the MODEL, SCENARIO, REGION, VARIABLE, and UNIT dimensions. Codes for the VARIABLE code list have annotations with :py:`id="preferred-unit-measure"` that give the corresponding UNIT Code(s) that appear with each VARIABLE. """ sm = StructureMessage() def _cl(dim: str) -> m.Codelist: result = m.Codelist( id=dim, description=f"Codes appearing in the {dim!r} dimension of " + (extra or "data") + ".", is_final=True, is_external_reference=False, ) sm.add(result) return result for dim in ("MODEL", "SCENARIO", "REGION"): cl = _cl(dim) for value in sorted(data[dim].unique()): cl.append(m.Code(id=value)) # Handle "VARIABLE" and "UNIT" jointly dims = ["VARIABLE", "UNIT"] cl_variable = _cl("VARIABLE") cl_unit = _cl("UNIT") for variable, group_data in ( data[dims].sort_values(dims).drop_duplicates().groupby("VARIABLE") ): group_units = group_data["UNIT"].unique() cl_variable.append( m.Code( id=variable, annotations=[ m.Annotation( id="preferred-unit-measure", text=", ".join(group_units) ) ], ) ) for unit in group_units: try: cl_unit.append(m.Code(id=unit)) except ValueError: pass return sm