Source code for message_ix_models.tests.tools.iea.test_web

"""Tests of :mod:`.tools`."""

from importlib.metadata import version
from typing import TYPE_CHECKING

import pandas as pd
import pytest
from genno import Computer
from packaging.version import parse

from message_ix_models.testing import GHA
from message_ix_models.tools.exo_data import prepare_computer
from message_ix_models.tools.iea.web import (
    DIMS,
    generate_code_lists,
    get_mapping,
    load_data,
)
from message_ix_models.util import HAS_MESSAGE_DATA

if TYPE_CHECKING:
    from collections.abc import Generator

# Dask < 2024.4.1 is incompatible with Python >= 3.11.9, but we pin dask in this range
# for tests of message_ix < 3.7.0. Skip these tests:
MARK_DASK_PYTHON = pytest.mark.skipif(
    condition=parse(version("message_ix")) < parse("3.7.0"),
    reason="Pinned dask version and certain Python versions are incompatible",
)


[docs] @pytest.fixture def user_local_data(pytestconfig, request) -> "Generator": # pragma: no cover """Symlink :path:`…/iea/` in the test local data directory to the user's.""" if "test_context" not in request.fixturenames: return test_local_data = request.getfixturevalue("test_context").core.local_data user_local_data = pytestconfig.user_local_data source = test_local_data.joinpath("iea") source.symlink_to(user_local_data.joinpath("iea")) try: yield finally: source.unlink()
[docs] class TestIEA_EWEB: @MARK_DASK_PYTHON # Uncomment the following line to use the full data files from a local copy # @pytest.mark.usefixtures("user_local_data") @pytest.mark.parametrize("source", ("IEA_EWEB",)) @pytest.mark.parametrize( "source_kw", ( dict( provider="OECD", edition="2021", product=["CHARCOAL"], flow=["RESIDENT"] ), # All flows related to transport dict( provider="OECD", edition="2022", flow=[ "DOMESAIR", "DOMESNAV", "PIPELINE", "RAIL", "ROAD", "TOTTRANS", "TRNONSPE", "WORLDAV", "WORLDMAR", ], ), pytest.param( dict(provider="IEA", edition="2023", extra_kw="FOO"), marks=pytest.mark.xfail(raises=ValueError), ), dict(provider="IEA", edition="2024", flow=["AVBUNK"]), pytest.param( dict(provider="IEA", edition="2024", transform="B"), marks=pytest.mark.xfail( raises=ValueError, reason="Missing regions= kwarg" ), ), dict(provider="IEA", edition="2024", transform="B", regions="R12"), ), ) def test_prepare_computer(self, test_context, source, source_kw): # FIXME The following should be redundant, but appears mutable on GHA linux and # Windows runners. test_context.model.regions = "R14" c = Computer() keys = prepare_computer(test_context, c, source, source_kw) # Preparation of data runs successfully result = c.get(keys[0]) # print(result.to_string()) # Data has the expected dimensions assert {"n", "y", "product", "flow"} == set(result.dims) # Data contain expected coordinates # NB cannot test total counts here because the fuzzed test data does not # necessarily include ≥1 data point from each (n, y) n = source_kw.get("regions", "R14") assert {f"{n}_AFR", f"{n}_WEU"} < set(result.coords["n"].data) assert {1980, 2018} < set(result.coords["y"].data)
# NB once there is a fuzzed version of the (IEA, 2023) data available, usage of this # variable can be replaced with list(FILES.keys()) PROVIDER_EDITION = ( pytest.param( "IEA", "2023", marks=pytest.mark.xfail( GHA or not HAS_MESSAGE_DATA, reason="No fuzzed version of this data" ), ), ("IEA", "2024"), ("OECD", "2021"), ("OECD", "2022"), ("OECD", "2023"), )
[docs] @MARK_DASK_PYTHON @pytest.mark.parametrize("provider, edition", PROVIDER_EDITION) def test_load_data(test_context, tmp_path, provider, edition): # # Store in the temporary directory for this test # test_context.cache_path = tmp_path.joinpath("cache") result = load_data(provider, edition) # Result has the correct type assert isinstance(result, pd.DataFrame) # print(result.head().to_string()) # DEBUG # print(result.head(1).transpose()) # DEBUG # Data have the expected dimensions assert (set(DIMS) & {"Value"}) < set(result.columns)
[docs] @MARK_DASK_PYTHON @pytest.mark.parametrize("provider, edition", PROVIDER_EDITION) def test_generate_code_lists(tmp_path, provider, edition): # generate_code_lists() runs generate_code_lists(provider, edition, tmp_path)
[docs] @pytest.mark.parametrize("provider, edition", PROVIDER_EDITION) def test_get_mapping(provider, edition) -> None: # MappingAdapter can be generated result = get_mapping(provider, edition) # Only "COUNTRY" labels are mapped assert {"n"} == set(result.maps) # Expected number of values are mapped assert { ("IEA", "2023"): 191, ("IEA", "2024"): 191, ("OECD", "2021"): 190, ("OECD", "2022"): 185, ("OECD", "2023"): 191, }[(provider, edition)] == len(result.maps["n"])
[docs] @pytest.mark.parametrize( "urn, N", ( ("IEA:COUNTRY_IEA(2023)", 191), ("IEA:COUNTRY_IEA(2024)", 191), ("IEA:COUNTRY_OECD(2021)", 190), ("IEA:COUNTRY_OECD(2022)", 185), ("IEA:COUNTRY_OECD(2023)", 191), ("IEA:FLOW_IEA(2023)", 108), ("IEA:FLOW_IEA(2024)", 108), ("IEA:FLOW_OECD(2021)", 108), ("IEA:FLOW_OECD(2022)", 108), ("IEA:FLOW_OECD(2023)", 108), ("IEA:PRODUCT_IEA(2023)", 68), ("IEA:PRODUCT_IEA(2024)", 68), ("IEA:PRODUCT_OECD(2021)", 68), ("IEA:PRODUCT_OECD(2022)", 68), ("IEA:PRODUCT_OECD(2023)", 68), ), ) def test_load_codelists(urn, N): from message_ix_models.util.sdmx import read # Code list can be read using its URN cl = read(urn) # Code list has the expected number of codes assert N == len(cl)