Carbon Bridge Analysis — data pipeline and figure builder.#

Loads all tests that contain carbon bridge / pont de carbone data, normalises heterogeneous column names, converts raw cell values into a “time without carbon bridge” score (seconds, higher = better).

x = Energy per pulse (mJ) y = Frequency (kHz) color = Time without carbon bridge (s) [red=0 s → green=3600 s]

Public API#

build_dataframe() -> pd.DataFrame

Run the full pipeline and return the analysis DataFrame.

build_figure(df, selected_tests, selected_generators) -> plotly.go.Figure

Build a Plotly figure (heatmap + scatter + iso-power lines) from the DataFrame returned by build_dataframe().

Standalone use#

Run directly to build the DataFrame and display an interactive Plotly chart:

python examples/carbon_bridge_analysis.py

For the full Dash dashboard with live filter controls:

python examples/carbon_bridge_analysis_dash.py

Imports#

import re
import warnings

import numpy as np
import pandas as pd

import sda
from sda.analysis import (
    ENERGY_COLS,
    FREQ_COLS,
    VOLTAGE_COLS,
    background_heatmap_trace,
    calibrate_k,
    concat_notes,
    get_row_generator,
    iso_power_traces,
    load_supplementary_xlsx,
    parse_plasma_homogene,
)

warnings.filterwarnings("ignore")

Configuration#

TESTS = [
    "T116",
    "T126",
    "T156",
    "T158",
    "T165",
    "T183",
    "T192",
    "T288",
    "T314",
    "T324",
    "T330",
    "T339",
    "T341",
    "T343",
    "T344",
    "T345",
    "T346",
    "T364",
]

CARBON_COLS_PRIORITY = [
    "pont de carbone",
    "pont de carbone2",
    "Carbon bridge (yes/no)",
    "carbon bridge [yes/no]_x000a_Position",
    "Pont de carbone",
    "Nombre de pont de carbone",
]

# Mixed NRP+DC tests — exclude from V→E calibration
CALIB_EXCLUDE = {"T314"}

# Score cap: anything ≥ 3600 s = "never formed" → full green
SCORE_CAP = 3600

# Log-scale colormap ticks (original seconds → displayed label)
_LOG_TICK_S = [0, 30, 60, 120, 300, 600, 1200, 1800, 3600]
_LOG_TICK_LABELS = [
    "0",
    "30 s",
    "1 min",
    "2 min",
    "5 min",
    "10 min",
    "20 min",
    "30 min",
    "≥60 min",
]
_LOG_CMAX = float(np.log1p(SCORE_CAP))


def _score_to_log(s: float) -> float:
    """Apply log1p transform for colormap (preserves 0, expands low range)."""
    return float(np.log1p(min(s, SCORE_CAP)))

Step 2: Score converter — Path B (single-value)#

def _parse_carbon_bridge(value, description: str | None = None) -> float:
    """Convert raw carbon bridge value to time-without-bridge score in seconds."""

    def _from_text(s: str) -> float | None:
        if re.search(r"instantann|début pdc|pdc en cours|pdc direct|pdc →|pdc->", s):
            return 0.0
        m = re.search(r"(\d{1,2}):(\d{2})\s*(?:pdc|début|formation|arrêt)", s)
        if m:
            return int(m.group(1)) * 60 + int(m.group(2))
        if re.search(r"pdc\s*[<≤]\s*\d+\s*min", s):
            return 0.0
        m2 = re.search(r"pdc\s+(\d+)\s*min", s)
        if m2:
            return int(m2.group(1)) * 60
        m3 = re.search(
            r"after\s+(\d+)\s*min.*?(start|carbon bridge|pdc)|"
            r"(start|formation)\s+(?:of\s+)?(?:carbon bridge|pdc).*?(\d+)\s*min",
            s,
            re.IGNORECASE,
        )
        if m3:
            mins = int(m3.group(1) or m3.group(4))
            return mins * 60
        m4 = re.search(
            r"after\s+(\d+)\s*min.*?no\s+carbon|zero\s+apr[eè]s\s+(\d+)\s*min",
            s,
            re.IGNORECASE,
        )
        if m4:
            return int(m4.group(1) or m4.group(2)) * 60
        m5 = re.search(r"(\d+)\s*min", s)
        if m5:
            return int(m5.group(1)) * 60
        return None

    raw_score: float | None = None

    if value is None or (isinstance(value, float) and np.isnan(value)):
        raw_score = None
    elif isinstance(value, pd.Timestamp):
        raw_score = 0.0
    else:
        s = str(value).strip().strip("'\"")
        sl = s.lower()
        if not s or sl in ("nan", "none", ""):
            raw_score = None
        elif re.match(r"\d{4}-\d{2}-\d{2}", sl):
            raw_score = 0.0
        elif sl in ("no", "non", "none", "zéro", "zero"):
            raw_score = float(SCORE_CAP)
        elif sl in ("yes", "oui", "yes ", "yes?"):
            raw_score = 0.0
        else:
            try:
                n = float(s)
                raw_score = float(SCORE_CAP) if n == 0 else 0.0
            except ValueError:
                extracted = _from_text(sl)
                if extracted is not None:
                    raw_score = extracted
                elif "yes" in sl or "oui" in sl or "pdc" in sl:
                    raw_score = 0.0
                elif "no" in sl or "non" in sl:
                    raw_score = float(SCORE_CAP)
                else:
                    raw_score = None

    if raw_score is None and description:
        desc_l = str(description).lower()
        extracted = _from_text(desc_l)
        if extracted is not None:
            raw_score = extracted
        elif "pdc" in desc_l or "carbon bridge" in desc_l:
            raw_score = 0.0

    return float(raw_score) if raw_score is not None else np.nan

Step 3: Time-series scorer — Path A#

def _compute_timeseries_score(df: pd.DataFrame, bridge_col: str) -> float:
    """Mean inter-bridge interval from a timestamped yes/no time series."""
    intervals: list[float] = []
    last_yes: float | None = None
    for t_val, b_val in zip(df["Time (s)"].tolist(), df[bridge_col].tolist()):
        t_num = pd.to_numeric(t_val, errors="coerce")
        if pd.isna(t_num):
            # Session reset marker (e.g. "Jour 2")
            last_yes = None
            continue
        b_str = str(b_val).strip().lower() if pd.notna(b_val) else ""
        if b_str in ("yes", "oui"):
            if last_yes is None:
                intervals.append(float(t_num))
            else:
                intervals.append(float(t_num) - last_yes)
            last_yes = float(t_num)

    return float(np.mean(intervals)) if intervals else np.nan


def _is_timeseries_test(df: pd.DataFrame, bridge_col: str) -> bool:
    if "Time (s)" not in df.columns:
        return False
    yes_mask = df[bridge_col].astype(str).str.lower().isin(["yes", "oui"])
    return bool(
        pd.to_numeric(df.loc[yes_mask, "Time (s)"], errors="coerce").notna().any()
    )

Step 4: Description extractor#

def _extract_description(row: pd.Series, df_cols: list[str]) -> str | None:
    """Return note fields that mention PDC (carbon bridge) for hover tooltips."""
    return concat_notes(row, df_cols, keyword_filter="pdc")


def _parse_plasma_homogene_cb(value) -> float:
    """Map 'Plasma homogène' to a carbon-bridge score (seconds).

    Delegates raw normalisation to :func:`~sda.analysis.scorers.parse_plasma_homogene`
    then maps the [0, 1] homogeneity score to the SCORE_CAP time scale:
        1.0 → SCORE_CAP (3600 s)  — fully homogeneous, no bridge
        0.5 → SCORE_CAP / 2       — partial
        0.0 → 0 s                 — non-homogeneous, bridge likely
    """
    h = parse_plasma_homogene(value)
    if np.isnan(h):
        return np.nan
    return h * float(SCORE_CAP)


def _heure_to_sec(h) -> float | None:
    """Convert an 'HH:MM' cell value (str or datetime.time) to seconds since midnight."""
    import datetime

    if h is None:
        return None
    try:
        if pd.isna(h):
            return None
    except (TypeError, ValueError):
        pass
    if isinstance(h, (datetime.time, datetime.datetime)):
        t = h if isinstance(h, datetime.time) else h.time()
        return t.hour * 3600 + t.minute * 60 + t.second
    s = str(h).strip()
    parts = s.split(":")
    if len(parts) >= 2:
        try:
            return int(parts[0]) * 3600 + int(parts[1]) * 60
        except ValueError:
            pass
    return None


def _parse_pdc_present_with_elapsed(pdc_val, elapsed_s: float | None) -> float:
    """Map PDC présent + elapsed time → carbon-bridge score (seconds).

    PDC présent = "Non"  → SCORE_CAP (no bridge at this observation)
    PDC présent = "Oui"  → elapsed_s (time from run start = apparition time)
    PDC présent = "—"/NaN → NaN
    """
    if pdc_val is None:
        return np.nan
    try:
        if pd.isna(pdc_val):
            return np.nan
    except (TypeError, ValueError):
        pass
    raw = str(pdc_val).strip()
    # Treat dash/em-dash/tiret as missing
    if raw in ("\u2014", "\u2013", "-", "—", "–", ""):
        return np.nan
    s = raw.lower()
    if s in ("non", "no"):
        return float(SCORE_CAP)
    if s in ("oui", "yes"):
        return float(elapsed_s) if elapsed_s is not None else 0.0
    return np.nan


def _process_t344_ep(df_all: pd.DataFrame, k_map: dict) -> pd.DataFrame:
    """Load T344_analyse_EP.xlsx and append one row per observation to df_all.

    Score = elapsed seconds from run start when PDC appeared (Oui),
            SCORE_CAP when PDC not present (Non), NaN otherwise.
    """
    df_ep = load_supplementary_xlsx("T344", "T344_analyse_EP.xlsx", label="T344-EP")
    if df_ep.empty:
        print("  T344-EP: file not found or empty")
        return df_all

    col_pdc = next(
        (c for c in df_ep.columns if "pdc" in c.lower() and "pr" in c.lower()), None
    )
    col_heure = next((c for c in df_ep.columns if "heure" in c.lower()), None)
    col_run = next((c for c in df_ep.columns if "run" in c.lower()), None)
    col_volt = next(
        (c for c in df_ep.columns if "voltage" in c.lower() or "tension" in c.lower()),
        None,
    )
    col_freq = next((c for c in df_ep.columns if "freq" in c.lower()), None)
    col_notes = next(
        (c for c in df_ep.columns if "note" in c.lower() or "observ" in c.lower()), None
    )

    if col_pdc is None:
        print("  T344-EP: 'PDC présent' column not found")
        return df_all

    _k_t344 = k_map.get("EP", k_map.get("default", 0.07))

    _t344_rows: list[dict] = []
    current_run: str | None = None
    run_start_sec: float | None = None

    for _, row in df_ep.iterrows():
        run_val = str(row[col_run]).strip() if col_run is not None else None
        if run_val and run_val not in ("nan", "", "None"):
            if run_val != current_run:
                current_run = run_val
                run_start_sec = _heure_to_sec(row[col_heure]) if col_heure else None

        pdc_raw = row[col_pdc] if col_pdc else None
        heure_sec = _heure_to_sec(row[col_heure]) if col_heure else None

        if run_start_sec is not None and heure_sec is not None:
            elapsed = heure_sec - run_start_sec
            if elapsed < 0:
                elapsed += 86400
        else:
            elapsed = None

        cb_score = _parse_pdc_present_with_elapsed(pdc_raw, elapsed)
        if np.isnan(cb_score):
            continue

        volt_val = row[col_volt] if col_volt is not None else np.nan
        try:
            volt_f = float(volt_val)
        except (TypeError, ValueError):
            volt_f = np.nan

        freq_val = row[col_freq] if col_freq is not None else np.nan
        try:
            freq_f = float(freq_val)
        except (TypeError, ValueError):
            freq_f = np.nan

        energy_mj = np.nan
        analysis_note = "T344 EP observations log"
        if pd.notna(volt_f) and _k_t344 is not None:
            energy_mj = _k_t344 * volt_f**2
            analysis_note += f"; E=k·V² (k={_k_t344:.4f} mJ/kV², V={volt_f:.1f} kV)"

        notes_str: str | None = None
        if col_notes is not None:
            raw_note = row[col_notes]
            try:
                if not pd.isna(raw_note):
                    notes_str = str(raw_note).strip() or None
            except (TypeError, ValueError):
                notes_str = str(raw_note).strip() or None

        pdc_label = str(pdc_raw).strip() if pdc_raw is not None else ""
        elapsed_label = f"{int(elapsed)}s" if elapsed is not None else "?"

        _t344_rows.append(
            {
                "test": "T344_sup",
                "run": current_run,
                "generator": "EP",
                "energy_per_pulse_mJ": energy_mj,
                "frequency_kHz": freq_f,
                "carbon_bridge_score_s": cb_score,
                "carbon_bridge_raw": f"PDC={pdc_label} @{elapsed_label}",
                "carbon_bridge_description": notes_str,
                "analysis_note": analysis_note,
            }
        )

    if _t344_rows:
        df_all = pd.concat([df_all, pd.DataFrame(_t344_rows)], ignore_index=True)
        _oui = sum(1 for r in _t344_rows if r["carbon_bridge_score_s"] < SCORE_CAP)
        _non = sum(1 for r in _t344_rows if r["carbon_bridge_score_s"] == SCORE_CAP)
        print(
            f"  Merged {len(_t344_rows)} T344-EP rows | PDC Oui={_oui}, PDC Non={_non}"
        )
    else:
        print("  T344-EP: no usable rows")

    return df_all

Step 5: Main data loading pipeline#

def build_dataframe() -> pd.DataFrame:
    """Run the full carbon bridge pipeline and return the analysis DataFrame.

    Steps performed:
    1. Calibrate k = E/V² per generator type from tests that have both columns.
    2. Load each test in TESTS, score rows for carbon bridge susceptibility.
    3. Append T346 supplementary EP pilot data.

    Returns
    -------
    pd.DataFrame
        One row per run/condition.  Columns:
        test, run, generator, energy_per_pulse_mJ, frequency_kHz,
        carbon_bridge_score_s, carbon_bridge_raw, carbon_bridge_description,
        analysis_note.
    """
    print("Calibrating E = k·V²…")
    k_map = calibrate_k(TESTS, exclude=CALIB_EXCLUDE)
    print(f"  k values: {k_map}")

    print("\nLoading tests…")
    all_rows: list[dict] = []

    for test_name in TESTS:
        print(f"  {test_name}…", end=" ", flush=True)
        try:
            df = sda.load_test(test_name)
        except Exception as e:
            print(f"ERROR: {e}")
            continue

        df_cols = list(df.columns)
        ecol = next((c for c in ENERGY_COLS if c in df_cols), None)
        vcol = next((c for c in VOLTAGE_COLS if c in df_cols), None)
        fcol = next((c for c in FREQ_COLS if c in df_cols), None)
        run_col = next((c for c in ["run", "Run"] if c in df_cols), None)

        # Carbon bridge columns
        bridge_col: str | None = None
        bridge_col2: str | None = None
        for cname in CARBON_COLS_PRIORITY:
            if cname in df_cols:
                if bridge_col is None:
                    bridge_col = cname
                elif bridge_col2 is None and cname != bridge_col:
                    bridge_col2 = cname
                    break

        is_ts = bridge_col is not None and _is_timeseries_test(df, bridge_col)

        if is_ts:
            assert bridge_col is not None
            score = _compute_timeseries_score(df, bridge_col)
            energy_val = np.nan
            freq_val = np.nan
            if fcol:
                fv = pd.to_numeric(df[fcol], errors="coerce").dropna().unique()
                freq_val = float(fv[0]) if len(fv) == 1 else np.nan
            analysis_note = (
                f"Time-series scoring: mean inter-bridge interval = {score:.0f} s"
                if not np.isnan(score)
                else ""
            )
            # Representative generator for the whole time-series test
            first_row = df.iloc[0] if len(df) > 0 else pd.Series(dtype=object)
            gen = get_row_generator(first_row, df_cols, test_name, df)
            # Infer energy from voltage for time-series tests
            if vcol and np.isnan(energy_val):
                k = k_map.get(gen, k_map["default"])
                v_vals = pd.to_numeric(df[vcol], errors="coerce").dropna()
                if len(v_vals) > 0:
                    v_med = float(v_vals.median())
                    energy_val = k * v_med**2
                    suffix = (
                        f"Energy inferred: E=k·V² (gen={gen}, k={k:.4f} mJ/kV²,"
                        f" V={v_med:.1f} kV)"
                    )
                    analysis_note = (
                        f"{analysis_note}; {suffix}" if analysis_note else suffix
                    )

            all_rows.append(
                {
                    "test": test_name,
                    "run": None,
                    "generator": gen,
                    "energy_per_pulse_mJ": energy_val,
                    "frequency_kHz": freq_val,
                    "carbon_bridge_score_s": score,
                    "carbon_bridge_raw": f"[time-series: {len(df)} rows]",
                    "carbon_bridge_description": None,
                    "analysis_note": analysis_note,
                }
            )
            print(f"time-series score={score:.0f}s")
            continue

        # Path B: row-by-row
        row_count = 0
        for _, row in df.iterrows():
            energy_val = np.nan
            analysis_note = ""

            if ecol:
                ev = pd.to_numeric(row.get(ecol), errors="coerce")
                energy_val = float(ev) if pd.notna(ev) else np.nan

            # Generator per row (used for energy inference and tooltip)
            generator = get_row_generator(row, df_cols, test_name, df)

            if np.isnan(energy_val) and vcol:
                v_raw = pd.to_numeric(row.get(vcol), errors="coerce")
                if pd.notna(v_raw) and float(v_raw) > 0:
                    v = float(v_raw)
                    k = k_map.get(generator, k_map["default"])
                    energy_val = k * v**2
                    analysis_note = (
                        f"Energy inferred: E=k·V² (gen={generator},"
                        f" k={k:.4f} mJ/kV², V={v:.1f} kV)"
                    )

            freq_val = np.nan
            if fcol:
                fv = pd.to_numeric(row.get(fcol), errors="coerce")
                freq_val = float(fv) if pd.notna(fv) else np.nan

            run_val = None
            if run_col:
                rv = row.get(run_col)
                if pd.notna(rv):
                    try:
                        run_val = str(int(float(rv)))
                    except (ValueError, TypeError):
                        run_val = str(rv)

            # Carbon bridge raw value with T339–T345 fallback
            raw_val = None
            if bridge_col:
                raw_val = row.get(bridge_col)
                if bridge_col2:
                    primary_str = (
                        str(raw_val).strip().strip("'\"").lower()
                        if pd.notna(raw_val)
                        else ""
                    )
                    _ts_like = bool(re.match(r"\d{4}-\d{2}-\d{2}", primary_str))
                    if (
                        isinstance(raw_val, pd.Timestamp)
                        or _ts_like
                        or primary_str in ("none", "", "nan")
                    ):
                        alt = row.get(bridge_col2)
                        if pd.notna(alt):
                            raw_val = alt

            description = _extract_description(row, df_cols)
            score = _parse_carbon_bridge(raw_val, description)

            if pd.isna(score) and pd.isna(energy_val) and pd.isna(freq_val):
                continue

            all_rows.append(
                {
                    "test": test_name,
                    "run": run_val,
                    "generator": generator,
                    "energy_per_pulse_mJ": energy_val,
                    "frequency_kHz": freq_val,
                    "carbon_bridge_score_s": score,
                    "carbon_bridge_raw": str(raw_val) if raw_val is not None else None,
                    "carbon_bridge_description": description,
                    "analysis_note": analysis_note,
                }
            )
            row_count += 1

        print(f"{row_count} rows")

    df_all = pd.DataFrame(all_rows)
    print(f"\nTotal rows: {len(df_all)}")
    _plottable = df_all.dropna(subset=["energy_per_pulse_mJ", "frequency_kHz"]).shape[0]
    print(f"Plottable rows (energy+freq): {_plottable}")
    print(f"Rows with score: {df_all['carbon_bridge_score_s'].notna().sum()}")

    # ── Step 5b: Load T346_analyse_EP.xlsx (supplementary) ───────────────────
    print("\nLoading T346 pilot campaign (supplementary)…")
    _df_ep_raw = load_supplementary_xlsx("T346", label="T346-EP")

    if not _df_ep_raw.empty:
        _EP_FREQ_COL = "Frequency (kHz)"
        _EP_VOLT_COL = "Voltage input (kV)"
        _EP_HOMOG_COL = "Plasma homogène"
        _EP_NOTES_COL = "Notes"
        _EP_REACTOR_COL = "Réacteur"
        _EP_POINT_COL = "Point Test"
        _ep_cols = list(_df_ep_raw.columns)

        _k_ep = k_map.get("default", 0.07)
        _ep_rows: list[dict] = []

        for _, row in _df_ep_raw.iterrows():
            freq_ep = (
                pd.to_numeric(row.get(_EP_FREQ_COL), errors="coerce")
                if _EP_FREQ_COL in _ep_cols
                else np.nan
            )
            volt_ep = (
                pd.to_numeric(row.get(_EP_VOLT_COL), errors="coerce")
                if _EP_VOLT_COL in _ep_cols
                else np.nan
            )
            energy_ep = (
                _k_ep * float(volt_ep) ** 2
                if pd.notna(volt_ep) and float(volt_ep) > 0
                else np.nan
            )

            homog_raw = row.get(_EP_HOMOG_COL) if _EP_HOMOG_COL in _ep_cols else None
            cb_score = _parse_plasma_homogene_cb(homog_raw)

            notes = (
                str(row.get(_EP_NOTES_COL, "")).strip()
                if _EP_NOTES_COL in _ep_cols
                else ""
            )
            reactor = (
                str(row.get(_EP_REACTOR_COL, "")).strip()
                if _EP_REACTOR_COL in _ep_cols
                else ""
            )
            point = (
                str(row.get(_EP_POINT_COL, "")).strip()
                if _EP_POINT_COL in _ep_cols
                else ""
            )

            if pd.isna(energy_ep) and pd.isna(cb_score):
                continue

            desc_parts = []
            if reactor and reactor not in ("nan", ""):
                desc_parts.append(f"Reactor: {reactor}")
            if point and point not in ("nan", ""):
                desc_parts.append(f"Point: {point}")
            if notes and notes not in ("nan", ""):
                if re.search(
                    r"pdc|pont de carbone|carbon bridge", notes, re.IGNORECASE
                ):
                    desc_parts.append(notes[:120])

            _ep_rows.append(
                {
                    "test": "T346_sup",
                    "run": point if point and point not in ("nan", "") else None,
                    "generator": "EP",
                    "energy_per_pulse_mJ": energy_ep,
                    "frequency_kHz": float(freq_ep) if pd.notna(freq_ep) else np.nan,
                    "carbon_bridge_score_s": cb_score,
                    "carbon_bridge_raw": str(homog_raw)
                    if homog_raw is not None
                    else None,
                    "carbon_bridge_description": (
                        " | ".join(desc_parts) if desc_parts else None
                    ),
                    "analysis_note": (
                        f"T346 pilot (EP); score from Plasma homogène='{homog_raw}'; "
                        f"E=k·V² (k={_k_ep:.4f} mJ/kV², V={float(volt_ep):.1f} kV)"
                        if pd.notna(volt_ep)
                        else f"T346 pilot (EP); score from Plasma homogène='{homog_raw}'"
                    ),
                }
            )

        if _ep_rows:
            df_all = pd.concat([df_all, pd.DataFrame(_ep_rows)], ignore_index=True)
            _ep_oui = sum(
                1 for r in _ep_rows if r["carbon_bridge_score_s"] == SCORE_CAP
            )
            _ep_non = sum(1 for r in _ep_rows if r["carbon_bridge_score_s"] == 0.0)
            _ep_part = sum(
                1 for r in _ep_rows if r["carbon_bridge_score_s"] == SCORE_CAP / 2
            )
            print(
                f"  Merged {len(_ep_rows)} T346-EP rows | "
                f"Oui={_ep_oui}, Partiel={_ep_part}, Non={_ep_non}"
            )
        else:
            print("  T346-EP: no usable rows (no energy or plasma homogène data)")
    else:
        print("  T346-EP: file not found or empty")

    # 5c. T344 supplementary EP log (T344_analyse_EP.xlsx)
    print("\nStep 5c: T344 EP observations log …")
    df_all = _process_t344_ep(df_all, k_map)

    return df_all

Step 6: Figure helpers (iso-power lines + heatmap) delegated to sda.analysis.viz

Step 7: Build figure for a given selection#

def build_figure(
    df: pd.DataFrame, selected_tests: list[str], selected_generators: list[str]
):
    """Build Plotly figure with heatmap + iso-power lines + scatter.

    Parameters
    ----------
    df:
        DataFrame returned by :func:`build_dataframe`.
    selected_tests:
        Subset of test names to display (e.g. from a Dash checklist).
    selected_generators:
        Subset of generator names to display.

    Returns
    -------
    plotly.graph_objects.Figure
    """
    import plotly.graph_objects as go

    sub = df[
        df["Test"].isin(selected_tests) & df["generator"].isin(selected_generators)
    ].copy()
    plot = sub.dropna(
        subset=["energy_per_pulse_mJ", "frequency_kHz", "carbon_bridge_score_s"]
    ).copy()
    # Apply log1p transform for the colormap (expands the 0-10 min region)
    plot["score_capped"] = plot["carbon_bridge_score_s"].clip(upper=SCORE_CAP)
    plot["score_log"] = plot["score_capped"].apply(_score_to_log)

    traces = []

    # Use overall (unfiltered) data range for stable axes and iso-power lines
    base = df.dropna(subset=["energy_per_pulse_mJ", "frequency_kHz"])
    x_all = base["energy_per_pulse_mJ"].values
    y_all = base["frequency_kHz"].values
    x_min, x_max = float(x_all.min()), float(x_all.max())
    y_min, y_max = float(y_all.min()), float(y_all.max())

    _log_tick_vals = [float(np.log1p(t)) for t in _LOG_TICK_S]

    # --- Layer 1: Background heatmap (interpolated, log-scaled) ---
    heatmap = background_heatmap_trace(
        x_pts=plot["energy_per_pulse_mJ"].values,
        y_pts=plot["frequency_kHz"].values,
        z_pts=plot["score_log"].values,
        x_range=(x_min, x_max),
        y_range=(y_min, y_max),
        colorscale="RdYlGn",
        zmin=0,
        zmax=_LOG_CMAX,
    )
    if heatmap is not None:
        traces.append(heatmap)

    # --- Layer 2: Iso-power lines ---
    power_line_traces, power_annotations = iso_power_traces(x_min, x_max, y_min, y_max)
    traces.extend(power_line_traces)

    # --- Layer 3: Scatter points ---
    def _fmt(val, unit="", fallback="—", decimals=2):
        if val is None or (isinstance(val, float) and np.isnan(val)):
            return fallback
        try:
            return f"{float(val):.{decimals}f} {unit}".strip()
        except (TypeError, ValueError):
            return f"{val} {unit}".strip()

    hover_texts = []
    for _, row in plot.iterrows():
        e = row["energy_per_pulse_mJ"]
        f = row["frequency_kHz"]
        power_str = f"{e * f:.1f} W" if (pd.notna(e) and pd.notna(f)) else "—"
        lines = [
            f"<b>Test:</b> {row['test']}",
            f"<b>Run:</b> {row['run'] if row['run'] is not None else '—'}",
            f"<b>Generator:</b> {row['generator']}",
            f"<b>Energy:</b> {_fmt(e, 'mJ')}",
            f"<b>Frequency:</b> {_fmt(f, 'kHz')}",
            f"<b>Power:</b> {power_str}",
            f"<b>Score:</b> {_fmt(row['carbon_bridge_score_s'], 's', decimals=0)}",
            f"<b>Raw value:</b> {row['carbon_bridge_raw'] or '—'}",
        ]
        if row["carbon_bridge_description"] and not pd.isna(
            row["carbon_bridge_description"]
        ):
            lines.append(f"<b>Description:</b> {row['carbon_bridge_description']}")
        if row["analysis_note"]:
            lines.append(f"<b>Note:</b> {row['analysis_note']}")
        hover_texts.append("<br>".join(lines))

    traces.append(
        go.Scatter(
            x=plot["energy_per_pulse_mJ"],
            y=plot["frequency_kHz"],
            mode="markers",
            marker=dict(
                size=12,
                color=plot["score_log"],  # log-transformed for perceptual spread
                colorscale="RdYlGn",
                cmin=0,
                cmax=_LOG_CMAX,
                colorbar=dict(
                    title="Time without<br>carbon bridge<br>(log scale)",
                    tickvals=_log_tick_vals,
                    ticktext=_LOG_TICK_LABELS,
                    thickness=18,
                ),
                line=dict(width=0.8, color="black"),
            ),
            text=hover_texts,
            hovertemplate="%{text}<extra></extra>",
        )
    )

    n = len(plot)
    title_suffix = f" ({n} point{'s' if n != 1 else ''})"
    x_pad_ax = (x_max - x_min) * 0.10 or 1
    y_pad_ax = (y_max - y_min) * 0.10 or 0.5
    fig = go.Figure(traces)
    fig.update_layout(
        title=(
            "Carbon Bridge Susceptibility"
            + title_suffix
            + "<br><sup>Color = time without carbon bridge — red=bad, green=good"
            " (cap=60 min) | Diagonals = constant power (P = E × f)</sup>"
        ),
        xaxis=dict(
            title="Energy per pulse (mJ)",
            zeroline=False,
            range=[x_min - x_pad_ax, x_max + x_pad_ax],
        ),
        yaxis=dict(
            title="Frequency (kHz)",
            zeroline=False,
            range=[y_min - y_pad_ax, y_max + y_pad_ax],
        ),
        hoverlabel=dict(bgcolor="white", font_size=12),
        plot_bgcolor="white",
        width=None,
        height=600,
        margin=dict(l=60, r=20, t=90, b=60),
        annotations=power_annotations,
    )
    return fig

Standalone entry-point — static Plotly chart (no Dash)#

if __name__ == "__main__":
    _df = build_dataframe()
    _all_tests = sorted(_df["Test"].unique().tolist())
    _all_gens = sorted(_df["generator"].unique().tolist())
    print(f"\nOpening interactive Plotly chart ({len(_all_tests)} tests)…")
    build_figure(_df, _all_tests, _all_gens).show()