#!/usr/bin/env python3
"""
Run integration-e2e inside the shadow network simulator. This can be helpful
vs running it directly for several reasons.

* shadow simulates time, and can collapse idle time. This speeds up the
  network bootstrapping step in particular.
* shadow tries to be deterministic. There are some gaps, but in general
  there *should* be less nondeterministic flakiness under shadow
  than when running natively.
"""

import argparse
import os
import pathlib
import shutil
import subprocess
import sys
import yaml

from pathlib import Path
from typing import Any

import common

assert __name__ == "__main__", "Can't determine _SCRIPT_DIR"
_SCRIPT_DIR = Path(sys.argv[0]).parent.resolve()

SHADOW_DATA_DIR = "shadow.chutney.data"
SHADOW_CONFIG_FILE = "shadow.chutney.yaml"
SHADOW_LOG_FILE = "shadow.log"


def gen_shadow_config(seed: int) -> dict[str, Any]:
    """
    Generate a shadow config file, as a string, for the given parameters.
    """

    env = {
        common.RUNNING_IN_SHADOW_ENV: "yes",
        # AF_UNIX sockets aren't supported in shadow
        "CHUTNEY_ENABLE_CONTROLSOCKET": "no",
        # ipv6 isn't supported in shadow
        "CHUTNEY_DISABLE_IPV6": "yes",
        # sandboxing isn't supported in shadow
        "CHUTNEY_TOR_SANDBOX": "no",
        # re-export PATH. The test scripts assume that
        # usual shell utilities are on it.
        "PATH": os.getenv("PATH"),
    }

    return {
        "general": {
            "stop_time": "10m",
            "model_unblocked_syscall_latency": True,
            "seed": seed,
        },
        "network": {
            "graph": {"type": "1_gbit_switch"},
        },
        "experimental": {
            # shadow only actually increments simulated time (and potentially
            # switches threads) if this much time would have been consumed by an
            # unbroken sequence of unblocked syscalls. Using a relatively large
            # value here (vs the default 1us) makes the simulation scheduling
            # more stable and predictable; e.g. adding additional logging to
            # debug an issue is less likely to make the issue disappear.
            #
            # The primary tradeoffs are:
            # * Larger values can result in managed processes measuring elapsed
            #   time where not much happens as *zero*, which may not be handled
            #   gracefully. e.g. in c-tor, using values of 1 ms or more here can
            #   result in a flood of warnings "compute_drain_rate(): Bug:
            #   Computing stream drain rate with zero time delta".
            # * Time will move forward at a larger granularity when unblocked syscall
            #   latency is applied. 10ms is still small enough though that this
            #   shouldn't be terribly strange; e.g. larger time jumps are likely
            #   to be observed on over-loaded systems with normal preemptive
            #   scheduling.
            # * when the simulation does hit a
            #   busy loop, it may spend a bit longer "spinning" before moving
            #   time forward, potentially causing the simulation to take a bit
            #   longer to run. (if it would have otherwise timed out earlier than 10ms)
            "max_unapplied_cpu_latency": "100us",
        },
        "hosts": {
            "host": {
                "network_node_id": 0,
                "processes": [
                    {
                        "path": "sh",
                        "args": f"-c '{_SCRIPT_DIR.joinpath('integration-e2e')} 2>&1'",
                        "environment": env,
                        # Give the web server below a little time to start.
                        "start_time": "5s",
                    }
                ],
            },
            common.TEST_DOMAIN: {
                "network_node_id": 0,
                "processes": [
                    {
                        "path": "python3",
                        "args": "-m http.server 80",
                        "start_time": "0",
                        "expected_final_state": "running",
                    }
                ],
            },
        },
    }


def main() -> None:
    parser = argparse.ArgumentParser(
        prog="integration-e2e-shadow",
        description="Runs integration-e2e inside a shadow simulation",
    )
    parser.add_argument(
        "-s", "--seed", type=int, default=1, help="Simulation PRNG seed"
    )
    args = parser.parse_args()

    toplevel = pathlib.Path(
        os.fsdecode(
            subprocess.check_output("git rev-parse --show-toplevel", shell=True)
        ).strip()
    )
    os.chdir(toplevel)

    # Write out shadow config. We could just pipe it directly to the shadow
    # process below, but writing it out is useful for debugging.
    shadow_config = gen_shadow_config(args.seed)
    with open(SHADOW_CONFIG_FILE, "w") as f:
        f.write(yaml.safe_dump(shadow_config))

    # Remove shadow's data dir. (It will bail if the directory already exists)
    if os.path.isdir(SHADOW_DATA_DIR):
        shutil.rmtree(SHADOW_DATA_DIR)

    shadow_args = [
        "shadow",
        "--data-directory=" + SHADOW_DATA_DIR,
        "--progress=true",
        SHADOW_CONFIG_FILE,
    ]
    with open(SHADOW_LOG_FILE, "w") as shadow_log_file:
        subprocess.run(shadow_args, check=True, stdout=shadow_log_file)


if __name__ == "__main__":
    main()
