#!/usr/bin/env python3
"""
transform_csv.py — CPG In-Store Execution Platform data transformer

Reads product_pick_status CSV exports and writes data.json for the prototype.

INF (Item Not Found) definition:
  to_refund  → item not found, order refunded             INF event + lost ABI sale
  replaced   → item not found, replaced with non-ABI item INF event + lost ABI sale
  INF rate   = (to_refund + replaced) / total

All replaced events are treated as lost ABI sales: this CSV contains only ABI
pick records, so any substitute is by definition a non-ABI product.

Usage:
    cd "/Users/samanthahoy/Claude Cursor/In-store conditions"
    python3 transform_csv.py
    python3 -m http.server 8000
    # Open: http://localhost:8000/index.html?data=http://localhost:8000/data.json
"""

import csv
import json
import math
import os
import sys
from collections import defaultdict
from datetime import datetime, timedelta

# ─────────────────────────────────────────────────────────────────────────────
# Tunable constants
# ─────────────────────────────────────────────────────────────────────────────

CSV_FILES = [
    'product_pick_status_2026_03_06_193723.csv',
    'product_pick_status_2026_03_06_194846.csv',
    'cpg_staging_f_ES_65526.csv',             # Jan–Mar 2026 with INITIAL_CHARGE_AMT / LOST_SALES
]
OUTPUT_FILE = 'data.json'

# Risk score weights — must sum to 1.0
WEIGHT_LOST_SALES = 0.40  # Est. lost sales (primary revenue impact signal — highest weight)
WEIGHT_INF_RATE   = 0.30  # Not found rate
WEIGHT_VOLUME     = 0.20  # Pick volume relative to max
WEIGHT_TREND      = 0.10  # Worsening trend adds risk; improvement reduces it

# Risk tier thresholds (composite score 0–100)
TIER_CRITICAL = 60   # Deploy field rep within 24h
TIER_HIGH     = 50   # Priority visit within 48h
TIER_MEDIUM   = 25   # Schedule this week

# Minimum total sales (sum of INITIAL_CHARGE_AMT across all picks) required to
# include a retailer/store/SKU. Filters out dirty data with negligible volume.
MIN_RETAILER_SALES = 1_000   # $1k total sales

# Recurrence: flag a store as chronic if INF rate > threshold on MIN_DAYS of last LOOKBACK_DAYS
RECURRENCE_INF_THRESHOLD = 5.0   # INF rate % per day
RECURRENCE_MIN_DAYS      = 3     # days above threshold required
RECURRENCE_LOOKBACK_DAYS = 14    # rolling window in days

# Trend comparison window in days
TREND_WINDOW_DAYS = 7

# Number of weeks shown in multi-series charts (byRetailer / bySKU / byCategory)
SERIES_WEEKS = 12

# Top N entities per multi-series chart
TOP_RETAILERS_CHART  = 5
TOP_SKUS_CHART       = 5
TOP_CATEGORIES_CHART = 5

# Chart series colors (cycles if more than 7 series)
SERIES_COLORS = ['#DE3534', '#F89F1A', '#108910', '#01A3AC', '#5D5FEF', '#7008A1', '#EC269A']

US_STATES = {
    'AL': 'Alabama',        'AK': 'Alaska',         'AZ': 'Arizona',
    'AR': 'Arkansas',       'CA': 'California',     'CO': 'Colorado',
    'CT': 'Connecticut',    'DE': 'Delaware',        'DC': 'District of Columbia',
    'FL': 'Florida',        'GA': 'Georgia',         'HI': 'Hawaii',
    'ID': 'Idaho',          'IL': 'Illinois',        'IN': 'Indiana',
    'IA': 'Iowa',           'KS': 'Kansas',          'KY': 'Kentucky',
    'LA': 'Louisiana',      'ME': 'Maine',           'MD': 'Maryland',
    'MA': 'Massachusetts',  'MI': 'Michigan',        'MN': 'Minnesota',
    'MS': 'Mississippi',    'MO': 'Missouri',        'MT': 'Montana',
    'NE': 'Nebraska',       'NV': 'Nevada',          'NH': 'New Hampshire',
    'NJ': 'New Jersey',     'NM': 'New Mexico',      'NY': 'New York',
    'NC': 'North Carolina', 'ND': 'North Dakota',    'OH': 'Ohio',
    'OK': 'Oklahoma',       'OR': 'Oregon',          'PA': 'Pennsylvania',
    'RI': 'Rhode Island',   'SC': 'South Carolina',  'SD': 'South Dakota',
    'TN': 'Tennessee',      'TX': 'Texas',           'UT': 'Utah',
    'VT': 'Vermont',        'VA': 'Virginia',        'WA': 'Washington',
    'WV': 'West Virginia',  'WI': 'Wisconsin',       'WY': 'Wyoming',
}

# ─────────────────────────────────────────────────────────────────────────────
# Helpers
# ─────────────────────────────────────────────────────────────────────────────

def parse_dt(r):
    return datetime.strptime(r['PICKED_DATE_TIME_LOCAL'][:19], '%Y-%m-%d %H:%M:%S')

def is_inf(status):
    return status in ('to_refund', 'replaced')

def inf_rate_pct(recs):
    if not recs:
        return 0.0
    return sum(1 for r in recs if r['_inf']) / len(recs) * 100

def found_rate_pct(recs):
    if not recs:
        return 0.0
    return sum(1 for r in recs if not r['_inf']) / len(recs) * 100

def risk_tier(score):
    if score >= TIER_CRITICAL: return 'critical'
    if score >= TIER_HIGH:     return 'high'
    if score >= TIER_MEDIUM:   return 'medium'
    return 'low'

def compute_risk(inf_r, max_inf_r, lost_sales, volume, max_volume, inf_last, inf_prior):
    # Lost sales: log scale anchored at $10k = 100 pts.
    # log10(10000) ≈ 4.0  →  score = log10(max(1, ls)) / 4.0 * 100, capped at 100.
    # $100 → 25 pts   $1k → 50 pts   $10k → 75 pts   $100k → 100 pts
    ls_s    = min(100.0, math.log10(max(1.0, lost_sales)) / 4.0 * 100) if lost_sales > 0 else 0.0

    # INF rate confidence: dampen the signal for low-pick-count entities.
    # Uses a sigmoid-style ramp: confidence reaches ~50% at 10 picks, ~90% at 50 picks,
    # ~99% at 200 picks.  Prevents a single INF out of 3 picks from dominating.
    confidence = volume / (volume + 20)
    inf_s   = (inf_r / max_inf_r * 100) * confidence if max_inf_r > 0 else 0.0

    vol_s   = (volume / max_volume * 100) if max_volume > 0 else 0.0
    trend_s = max(0.0, min(100.0, (inf_last - inf_prior) * 10))
    return round(
        ls_s    * WEIGHT_LOST_SALES +
        inf_s   * WEIGHT_INF_RATE   +
        vol_s   * WEIGHT_VOLUME     +
        trend_s * WEIGHT_TREND,
        1,
    )

def trend_dir_val(inf_last, inf_prior):
    """Return (direction, abs_delta). 'up' means worsening (INF rate increased)."""
    delta = round(abs(inf_last - inf_prior), 1)
    return ('up' if inf_last > inf_prior else 'down'), delta

def week_start(dt):
    """Return the Monday of dt's ISO week as a date."""
    return (dt - timedelta(days=dt.weekday())).date()

def fmt_week(d):
    """Format a week-start date as '6 Jan' (no leading zero)."""
    return d.strftime('%d %b').lstrip('0')

def hour_label(h):
    if h == 0:  return '12am'
    if h < 12:  return f'{h}am'
    if h == 12: return '12pm'
    return f'{h - 12}pm'

def kpi_card(label, value, trend_str, trend_up):
    return {
        'label':      label,
        'value':      value,
        'trend':      trend_str,
        'trendClass': 'trend-up-good' if trend_up else 'trend-up-bad',
    }

# Lost-sales helpers ──────────────────────────────────────────────────────────
# Formula: sum INITIAL_CHARGE_AMT for every INF event (to_refund or replaced).
# HAS_PRICE_DATA is set to True after CSV load if the column is present.
HAS_PRICE_DATA = False   # updated after records are loaded

def lost_sales_amt(recs):
    """Return total lost sales (float) for recs, or None if no price data."""
    if not HAS_PRICE_DATA:
        return None
    return sum(float(r.get('INITIAL_CHARGE_AMT') or 0) for r in recs if r['_inf'])

def total_sales_amt(recs):
    """Return total sales (all picks, not just INF) — used for dirty-data filtering."""
    return sum(float(r.get('INITIAL_CHARGE_AMT') or 0) for r in recs)

def fmt_lost_sales(val):
    """Format a lost-sales amount for display, or return 'N/A'."""
    if val is None:
        return 'N/A'
    if val >= 1_000_000:
        return f'${val / 1_000_000:.1f}M'
    if val >= 1_000:
        return f'${val / 1_000:.1f}K'
    return f'${val:,.2f}'

def compute_perf_window(recs, window_days):
    """Compute windowed performance data for a filtered records subset.
    Used to populate performanceByWindow in each filterIndex slice so the client
    can apply the correct date-range performance when an entity filter is active."""
    win_end   = max_date
    win_start = win_end - timedelta(days=window_days - 1)
    pri_end   = win_start - timedelta(days=1)
    pri_start = pri_end - timedelta(days=window_days - 1)
    recs_win = [r for r in recs if win_start <= r['_date'] <= win_end]
    recs_pri = [r for r in recs if pri_start <= r['_date'] <= pri_end]
    if not recs_win:
        return {'foundRateTrend': [], 'categoryPerformance': [], 'infBreakdown': {}, 'dowFoundRate': []}
    # foundRateTrend — weekly buckets within the window
    by_week_w = defaultdict(list)
    for r in recs_win:
        by_week_w[r['_week']].append(r)
    found_rate_trend_w = [
        {'week': fmt_week(wb), 'label': fmt_week(wb), 'rate': round(found_rate_pct(by_week_w[wb]), 1)}
        for wb in sorted(by_week_w.keys())
    ]
    # categoryPerformance
    by_cat_w = defaultdict(list)
    by_cat_w_pri = defaultdict(list)
    for r in recs_win: by_cat_w[r['CATEGORY']].append(r)
    for r in recs_pri: by_cat_w_pri[r['CATEGORY']].append(r)
    cat_perf_w = sorted([
        {'name': cat, 'rate': round(found_rate_pct(cr), 1),
         'change': round(found_rate_pct(cr) - found_rate_pct(by_cat_w_pri.get(cat, [])), 1)}
        for cat, cr in by_cat_w.items()
    ], key=lambda x: -x['rate'])
    # infBreakdown
    to_refund_w = sum(1 for r in recs_win if r['SYSTEM_ORDER_ITEM_STATUS'] == 'to_refund')
    replaced_w  = sum(1 for r in recs_win if r['SYSTEM_ORDER_ITEM_STATUS'] == 'replaced')
    inf_tot_w   = to_refund_w + replaced_w
    inf_breakdown_w = {
        'toRefund':    to_refund_w,
        'replaced':    replaced_w,
        'total':       inf_tot_w,
        'toRefundPct': round(to_refund_w / inf_tot_w * 100, 1) if inf_tot_w else 0,
        'replacedPct': round(replaced_w  / inf_tot_w * 100, 1) if inf_tot_w else 0,
    }
    # dowFoundRate
    _DAY_NAMES = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    by_dow_w = defaultdict(list)
    for r in recs_win:
        by_dow_w[r['_date'].weekday()].append(r)
    dow_found_rate_w = [
        {'day': _DAY_NAMES[d], 'rate': round(found_rate_pct(by_dow_w[d]), 1)}
        for d in range(7)
    ]
    return {
        'foundRateTrend':      found_rate_trend_w,
        'categoryPerformance': cat_perf_w,
        'infBreakdown':        inf_breakdown_w,
        'dowFoundRate':        dow_found_rate_w,
    }


def compute_slice(recs):
    """
    Compute chart/KPI data slices for a subset of records.
    Used to build filterIndex — pre-computed slices for each filter dimension
    so the client can filter charts without sending raw records.

    References module-level variables: max_date, window_start/end, prior_start/end,
    TREND_WINDOW_DAYS, US_STATES.  All are assigned before this function is called.
    """
    if not recs:
        return {
            'kpis':             {'mvp': [], 'ltv': []},
            'nilTrend':         {'byDay': [], 'byHour': []},
            'stateRisks':       [],
            'retailerRisk':     {'categories': [], 'riskValues': [], 'detected': [], 'confirmed': []},
            'performance':      {'kpis': [], 'foundRateTrend': [], 'categoryPerformance': [], 'retailerBenchmark': [], 'substitutionData': [], 'infBreakdown': {'toRefund': 0, 'replaced': 0, 'total': 0, 'toRefundPct': 0, 'replacedPct': 0}, 'dowFoundRate': []},
            'performanceByWindow': {'7d': {}, '14d': {}, '30d': {}, '90d': {}},
        }

    total_s      = len(recs)
    inf_s        = sum(1 for r in recs if r['_inf'])
    slice_inf_r  = inf_s / total_s * 100
    slice_fr     = (total_s - inf_s) / total_s * 100
    all_stores_s = {r['STORE_NUMBER'] for r in recs}
    imp_stores_s = {r['STORE_NUMBER'] for r in recs if r['_inf']}
    stores_pct_s = len(imp_stores_s) / len(all_stores_s) * 100 if all_stores_s else 0

    sl_last  = [r for r in recs if in_window(r, window_start, window_end)]
    sl_prior = [r for r in recs if in_window(r, prior_start, prior_end)]
    sl_lyear = [r for r in recs if in_window(r, lyear_start, lyear_end)]
    ir_l = inf_rate_pct(sl_last)
    ir_p = inf_rate_pct(sl_prior)
    ir_d = round(ir_l - ir_p, 1)
    fr_l = found_rate_pct(sl_last)
    fr_p = found_rate_pct(sl_prior)
    wow  = round(fr_l - fr_p, 1)
    fr_ly = found_rate_pct(sl_lyear)
    yoy_s = round(fr_l - fr_ly, 1)

    kpis_s = [
        kpi_card('Not found rate', f'{slice_inf_r:.1f}%',
                 f'{"↑" if ir_d > 0 else "↓"} {abs(ir_d):.1f}% vs prior {TREND_WINDOW_DAYS}d', ir_d < 0),
        kpi_card('Found rate', f'{slice_fr:.1f}%',
                 f'{"↑" if wow > 0 else "↓"} {abs(wow):.1f}% vs prior {TREND_WINDOW_DAYS}d', wow >= 0),
        kpi_card('Stores impacted', f'{len(imp_stores_s):,}',
                 f'{stores_pct_s:.0f}% of {len(all_stores_s)} stores', False),
        kpi_card(
            'Est. lost sales',
            fmt_lost_sales(lost_sales_amt(recs)),
            f'{inf_s:,} INF events' if HAS_PRICE_DATA else 'Price feed required',
            False,
        ),
    ]

    # byDay — 90 days so client can slice for any date-range option (7/14/30/90)
    by_date_s = defaultdict(list)
    for r in recs:
        by_date_s[r['_date']].append(r)
    nil_by_day_s = []
    for i in range(89, -1, -1):
        d = max_date - timedelta(days=i)
        day_recs = by_date_s.get(d, [])
        nil_by_day_s.append({'label': d.strftime('%d %b').lstrip('0'), 'rate': round(inf_rate_pct(day_recs), 2)})

    by_hour_s = defaultdict(list)
    for r in recs:
        by_hour_s[r['_hour']].append(r)
    nil_by_hour_s = [{'label': hour_label(h), 'rate': round(inf_rate_pct(by_hour_s[h]), 2)} for h in range(24)]

    by_state_s = defaultdict(list)
    for r in recs:
        by_state_s[r['STATE']].append(r)
    state_risks_s = sorted(
        [{'abbr': a, 'name': US_STATES.get(a, a), 'risk': round(inf_rate_pct(sr), 1)}
         for a, sr in by_state_s.items()],
        key=lambda x: -x['risk'],
    )

    by_ret_s = defaultdict(list)
    for r in recs:
        by_ret_s[r['WAREHOUSE_NAME']].append(r)
    # Compute composite risk score per retailer (same formula as priority table)
    by_ret_s_prior = defaultdict(list)
    for r in sl_prior:
        by_ret_s_prior[r['WAREHOUSE_NAME']].append(r)
    max_ir_s  = max((inf_rate_pct(v) for v in by_ret_s.values()), default=1) or 1
    max_vol_s = max((len(v) for v in by_ret_s.values()), default=1) or 1
    max_ls_s  = max((lost_sales_amt(v) or 0 for v in by_ret_s.values()), default=0) or 1
    ret_rows_s = []
    for ret, rr in by_ret_s.items():
        ir_ret    = inf_rate_pct(rr)
        if ir_ret >= 100.0:
            continue  # data artifact — 100% NFR
        if HAS_PRICE_DATA and total_sales_amt(rr) < MIN_RETAILER_SALES:
            continue  # dirty data — insufficient sales volume
        ir_ret_p  = inf_rate_pct(by_ret_s_prior[ret])
        ls_ret    = lost_sales_amt(rr) or 0
        score_ret = compute_risk(ir_ret, max_ir_s, ls_ret, len(rr), max_vol_s, ir_ret, ir_ret_p)
        ret_rows_s.append({
            'retailer': ret,
            'score':    score_ret,
            'detected': sum(1 for r in rr if r['_inf']),
            'confirmed': sum(1 for r in rr if r['SYSTEM_ORDER_ITEM_STATUS'] == 'to_refund'),
        })
    ret_rows_s.sort(key=lambda x: -x['score'])
    retailer_risk_s = {
        'categories':    [x['retailer']       for x in ret_rows_s],
        'riskValues':    [x['score']          for x in ret_rows_s],
        'notFoundRates': [round(inf_rate_pct(by_ret_s[x['retailer']]), 1) for x in ret_rows_s],
        'storesAffected':[len({r['STORE_NUMBER'] for r in by_ret_s[x['retailer']] if r['_inf']}) for x in ret_rows_s],
        'detected':      [x['detected']       for x in ret_rows_s],
        'confirmed':     [x['confirmed']      for x in ret_rows_s],
    }

    # Performance section for this slice
    by_week_perf_s = defaultdict(list)
    for r in recs:
        by_week_perf_s[r['_week']].append(r)
    perf_trend_s = [
        {'week': fmt_week(wb), 'rate': round(found_rate_pct(by_week_perf_s.get(wb, [])), 1)}
        for wb in week_buckets
    ]

    by_cat_perf_s      = defaultdict(list)
    by_cat_perf_last_s = defaultdict(list)
    by_cat_perf_pri_s  = defaultdict(list)
    for r in recs:
        cat = r['CATEGORY']
        by_cat_perf_s[cat].append(r)
        if in_window(r, window_start, window_end): by_cat_perf_last_s[cat].append(r)
        if in_window(r, prior_start, prior_end):   by_cat_perf_pri_s[cat].append(r)
    perf_cat_s = sorted([
        {
            'name':   cat,
            'rate':   round(found_rate_pct(by_cat_perf_s[cat]), 1),
            'change': round(found_rate_pct(by_cat_perf_last_s[cat]) - found_rate_pct(by_cat_perf_pri_s[cat]), 1),
        }
        for cat in by_cat_perf_s
    ], key=lambda x: -x['rate'])

    perf_bench_s = sorted([
        {'retailer': ret, 'foundRate': round(found_rate_pct(rr), 1), 'avgIssueHrs': 0}
        for ret, rr in by_ret_s.items()
    ], key=lambda x: -x['foundRate'])

    fr_s = round(found_rate_pct(recs), 1)
    perf_kpis_s = [
        {'label': 'Found Rate',          'value': f'{fr_s:.1f}%',   'trend': f'{wow:+.1f}% WoW',                          'trendUp': wow >= 0},
        {'label': 'IC total found rate', 'value': 'N/A',            'trend': 'Platform benchmark not in data',             'trendUp': True},
        {'label': 'Week over Week',      'value': f'{wow:+.1f}%',   'trend': f'vs prior {TREND_WINDOW_DAYS} days',         'trendUp': wow >= 0},
        {'label': 'vs. Last Year',       'value': f'{yoy_s:+.1f}%', 'trend': f'same {TREND_WINDOW_DAYS}-day window, prior year', 'trendUp': yoy_s >= 0},
    ]

    # INF status breakdown (out-of-stock vs substituted)
    to_refund_s = sum(1 for r in recs if r['SYSTEM_ORDER_ITEM_STATUS'] == 'to_refund')
    replaced_s  = sum(1 for r in recs if r['SYSTEM_ORDER_ITEM_STATUS'] == 'replaced')
    inf_tot_s   = to_refund_s + replaced_s
    inf_breakdown_s = {
        'toRefund':    to_refund_s,
        'replaced':    replaced_s,
        'total':       inf_tot_s,
        'toRefundPct': round(to_refund_s / inf_tot_s * 100, 1) if inf_tot_s else 0,
        'replacedPct': round(replaced_s  / inf_tot_s * 100, 1) if inf_tot_s else 0,
    }

    # Day-of-week found rate (0=Mon … 6=Sun)
    _DAY_NAMES = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    by_dow_s = defaultdict(list)
    for r in recs:
        by_dow_s[r['_date'].weekday()].append(r)
    dow_found_rate_s = [
        {'day': _DAY_NAMES[d], 'rate': round(found_rate_pct(by_dow_s[d]), 1)}
        for d in range(7)
    ]

    return {
        'kpis':         {'mvp': kpis_s, 'ltv': []},
        'nilTrend':     {'byDay': nil_by_day_s, 'byHour': nil_by_hour_s},
        'stateRisks':   state_risks_s,
        'retailerRisk': retailer_risk_s,
        'performance':  {
            'kpis':                perf_kpis_s,
            'foundRateTrend':      perf_trend_s,
            'categoryPerformance': perf_cat_s,
            'retailerBenchmark':   perf_bench_s,
            'substitutionData':    [],
            'infBreakdown':        inf_breakdown_s,
            'dowFoundRate':        dow_found_rate_s,
        },
        'performanceByWindow': {f'{w}d': compute_perf_window(recs, w) for w in [7, 14, 30, 90]},
    }

# ─────────────────────────────────────────────────────────────────────────────
# Load & merge CSVs
# ─────────────────────────────────────────────────────────────────────────────

print('Loading CSV files...')
records = []
for fname in CSV_FILES:
    if not os.path.exists(fname):
        print(f'  WARNING: {fname} not found, skipping', file=sys.stderr)
        continue
    with open(fname, newline='', encoding='utf-8') as f:
        batch = []
        skipped = 0
        for row in csv.DictReader(f):
            # New-schema files use COUNTRY_ID (840=US, 124=Canada).
            # Filter to US only; old-schema files have no COUNTRY_ID so pass through.
            if 'COUNTRY_ID' in row and row['COUNTRY_ID'] != '840':
                skipped += 1
                continue
            batch.append(row)
        records.extend(batch)
        suffix = f' ({skipped:,} non-US rows skipped)' if skipped else ''
        print(f'  {fname}: {len(batch):,} rows{suffix}')

if not records:
    print('ERROR: No records loaded. Check that CSV files are in the current directory.', file=sys.stderr)
    sys.exit(1)

print(f'Total: {len(records):,} records')

# Check if INITIAL_CHARGE_AMT is present in any record (new schema added by ES-65526).
# Records from old CSVs that lack the column contribute 0 via r.get(...) or 0.
HAS_PRICE_DATA = any('INITIAL_CHARGE_AMT' in r for r in records)
if HAS_PRICE_DATA:
    print('  ✓ INITIAL_CHARGE_AMT found — est. lost sales will be calculated')
else:
    print('  ⚠  INITIAL_CHARGE_AMT not in CSV — est. lost sales will show N/A')

# ─────────────────────────────────────────────────────────────────────────────
# Parse timestamps and pre-compute per-row flags
# ─────────────────────────────────────────────────────────────────────────────

print('Parsing timestamps...')
for r in records:
    dt = parse_dt(r)
    r['_dt']   = dt
    r['_date'] = dt.date()
    r['_week'] = week_start(dt)
    r['_hour'] = dt.hour
    r['_inf']  = is_inf(r['SYSTEM_ORDER_ITEM_STATUS'])

max_date = max(r['_date'] for r in records)
min_date = min(r['_date'] for r in records)
print(f'Date range: {min_date} → {max_date}')

# Reference windows — all relative to most recent date in the data
window_end   = max_date
window_start = window_end   - timedelta(days=TREND_WINDOW_DAYS - 1)
prior_end    = window_start - timedelta(days=1)
prior_start  = prior_end    - timedelta(days=TREND_WINDOW_DAYS - 1)
lyear_end    = window_end   - timedelta(days=365)
lyear_start  = window_start - timedelta(days=365)

def in_window(r, start, end):
    return start <= r['_date'] <= end

recs_last  = [r for r in records if in_window(r, window_start, window_end)]
recs_prior = [r for r in records if in_window(r, prior_start, prior_end)]
recs_lyear = [r for r in records if in_window(r, lyear_start, lyear_end)]

# ─────────────────────────────────────────────────────────────────────────────
# Global KPIs
# ─────────────────────────────────────────────────────────────────────────────

total        = len(records)
inf_count    = sum(1 for r in records if r['_inf'])
global_inf_r = inf_count / total * 100
global_fr    = (total - inf_count) / total * 100

all_stores      = set(r['STORE_NUMBER'] for r in records)
impacted_stores = set(r['STORE_NUMBER'] for r in records if r['_inf'])
stores_pct      = len(impacted_stores) / len(all_stores) * 100 if all_stores else 0

ir_last  = inf_rate_pct(recs_last)
ir_prior = inf_rate_pct(recs_prior)
ir_delta = round(ir_last - ir_prior, 1)

fr_last  = found_rate_pct(recs_last)
fr_prior = found_rate_pct(recs_prior)
wow_delta = round(fr_last - fr_prior, 1)

kpis_mvp = [
    kpi_card(
        'Not found rate',
        f'{global_inf_r:.1f}%',
        f'{"↑" if ir_delta > 0 else "↓"} {abs(ir_delta):.1f}% vs prior {TREND_WINDOW_DAYS}d',
        ir_delta < 0,  # improving = INF rate going down
    ),
    kpi_card(
        'Found rate',
        f'{global_fr:.1f}%',
        f'{"↑" if wow_delta > 0 else "↓"} {abs(wow_delta):.1f}% vs prior {TREND_WINDOW_DAYS}d',
        wow_delta >= 0,
    ),
    kpi_card(
        'Stores impacted',
        f'{len(impacted_stores):,}',
        f'{stores_pct:.0f}% of {len(all_stores)} stores',
        False,
    ),
    kpi_card(
        'Est. lost sales',
        fmt_lost_sales(lost_sales_amt(records)),
        f'{inf_count:,} INF events' if HAS_PRICE_DATA else 'Price feed required',
        False,
    ),
]

# ─────────────────────────────────────────────────────────────────────────────
# NIL trend — by day, by hour, multi-series by retailer/SKU/category
# ─────────────────────────────────────────────────────────────────────────────

# byDay: last 30 days grouped by calendar date
by_date_recs = defaultdict(list)
for r in records:
    by_date_recs[r['_date']].append(r)

nil_by_day = []
for i in range(89, -1, -1):
    d = max_date - timedelta(days=i)
    day_recs = by_date_recs.get(d, [])
    nil_by_day.append({
        'label': d.strftime('%d %b').lstrip('0'),  # e.g. '6 Jan'
        'rate':  round(inf_rate_pct(day_recs), 2),
    })

# byHour: INF rate per hour of day, averaged across all data
by_hour_recs = defaultdict(list)
for r in records:
    by_hour_recs[r['_hour']].append(r)

nil_by_hour = [
    {'label': hour_label(h), 'rate': round(inf_rate_pct(by_hour_recs[h]), 2)}
    for h in range(24)
]

# Week buckets for multi-series charts: last SERIES_WEEKS complete weeks
latest_week  = week_start(datetime.combine(max_date, datetime.min.time()))
week_buckets = [latest_week - timedelta(weeks=i) for i in range(SERIES_WEEKS - 1, -1, -1)]

def build_series(entity_col, top_n):
    """Build INF rate time series per top-N entities, aligned to week_buckets."""
    # Rank entities by total pick volume
    entity_vol = defaultdict(int)
    for r in records:
        entity_vol[r[entity_col]] += 1
    top_entities = sorted(entity_vol, key=lambda k: -entity_vol[k])[:top_n]

    # Group records by (entity, week)
    ew_recs = defaultdict(lambda: defaultdict(list))
    for r in records:
        ew_recs[r[entity_col]][r['_week']].append(r)

    series = []
    for i, entity in enumerate(top_entities):
        rates = [
            round(inf_rate_pct(ew_recs[entity].get(wb, [])), 2)
            for wb in week_buckets
        ]
        series.append({
            'name':  entity,
            'data':  rates,
            'color': SERIES_COLORS[i % len(SERIES_COLORS)],
        })
    return series

nil_by_retailer = build_series('WAREHOUSE_NAME',  TOP_RETAILERS_CHART)
nil_by_sku      = build_series('UPC_DESCRIPTION', TOP_SKUS_CHART)
nil_by_category = build_series('CATEGORY',        TOP_CATEGORIES_CHART)

# ─────────────────────────────────────────────────────────────────────────────
# State risks (heat map)
# ─────────────────────────────────────────────────────────────────────────────

by_state_recs = defaultdict(list)
for r in records:
    by_state_recs[r['STATE']].append(r)

state_risks = sorted(
    [
        {
            'abbr': abbr,
            'name': US_STATES.get(abbr, abbr),
            'risk': round(inf_rate_pct(recs), 1),
        }
        for abbr, recs in by_state_recs.items()
    ],
    key=lambda x: -x['risk'],
)

# ─────────────────────────────────────────────────────────────────────────────
# Retailer risk distribution (bar chart)
# ─────────────────────────────────────────────────────────────────────────────

by_retailer_recs = defaultdict(list)
for r in records:
    by_retailer_recs[r['WAREHOUSE_NAME']].append(r)

# ─────────────────────────────────────────────────────────────────────────────
# Priority table — Retailer view
# ─────────────────────────────────────────────────────────────────────────────

retailer_last_recs  = defaultdict(list)
retailer_prior_recs = defaultdict(list)
for r in records:
    ret = r['WAREHOUSE_NAME']
    if in_window(r, window_start, window_end): retailer_last_recs[ret].append(r)
    if in_window(r, prior_start, prior_end):   retailer_prior_recs[ret].append(r)

retailer_stats = {}
for ret, recs in by_retailer_recs.items():
    all_skus = {r['UPC'] for r in recs}
    inf_skus = {r['UPC'] for r in recs if r['_inf']}
    inf_r       = inf_rate_pct(recs)
    inf_last_r  = inf_rate_pct(retailer_last_recs[ret])
    inf_prior_r = inf_rate_pct(retailer_prior_recs[ret])
    retailer_stats[ret] = {
        'inf_r':       inf_r,
        'lost_sales':  lost_sales_amt(recs) or 0,
        'total_sales': total_sales_amt(recs),
        'volume':      len(recs),
        'inf_last':    inf_last_r,
        'inf_prior':   inf_prior_r,
        'all_stores':  {r['STORE_NUMBER'] for r in recs},
        'inf_stores':  {r['STORE_NUMBER'] for r in recs if r['_inf']},
        'all_skus':    all_skus,
        'inf_skus':    inf_skus,
    }

max_inf_retailer = max((s['inf_r']      for s in retailer_stats.values()), default=1)
max_vol_retailer = max((s['volume']     for s in retailer_stats.values()), default=1)
max_ls_retailer  = max((s['lost_sales'] for s in retailer_stats.values()), default=0) or 1

table_data = []
for ret, s in retailer_stats.items():
    if s['inf_r'] >= 100.0:
        continue  # data artifact — 100% NFR
    if HAS_PRICE_DATA and s['total_sales'] < MIN_RETAILER_SALES:
        continue  # dirty data — insufficient sales volume
    score = compute_risk(
        s['inf_r'], max_inf_retailer,
        s['lost_sales'],
        s['volume'], max_vol_retailer,
        s['inf_last'], s['inf_prior'],
    )
    t_dir, t_val = trend_dir_val(s['inf_last'], s['inf_prior'])
    table_data.append({
        'retailer':       ret,
        'storesCount':    len(s['all_stores']),
        'estLostSales':   lost_sales_amt(by_retailer_recs[ret]) or 0,
        'notFound':       round(s['inf_r'], 1),
        'compliance':     round(100 - s['inf_r'], 1),
        'storesAffected': len(s['inf_stores']),
        'skusAffected':   len(s['inf_skus']),
        'trendDir':       t_dir,
        'trendVal':       t_val,
        'riskScore':      score,
        'riskTier':       risk_tier(score),
    })

table_data.sort(key=lambda x: -x['riskScore'])
for i, row in enumerate(table_data):
    row['rank'] = i + 1

# retailer_risk — built from table_data so chart uses same composite riskScore as the priority table
# (est. lost sales 40% + INF rate 30% + volume 20% + trend 10%), sorted highest-risk first
retailer_risk = {
    'categories':    [r['retailer']              for r in table_data],
    'riskValues':    [r['riskScore']             for r in table_data],
    'notFoundRates': [r['notFound']              for r in table_data],
    'storesAffected':[r['storesAffected']        for r in table_data],
    'detected':      [sum(1 for rec in by_retailer_recs[r['retailer']] if rec['_inf']) for r in table_data],
    'confirmed':     [
        sum(1 for rec in by_retailer_recs[r['retailer']] if rec['SYSTEM_ORDER_ITEM_STATUS'] == 'to_refund')
        for r in table_data
    ],
}

# ─────────────────────────────────────────────────────────────────────────────
# Priority table — Store view (field agent dispatch list)
# ─────────────────────────────────────────────────────────────────────────────

by_store_recs = defaultdict(list)
for r in records:
    by_store_recs[r['STORE_NUMBER']].append(r)

store_last_recs  = defaultdict(list)
store_prior_recs = defaultdict(list)
for r in records:
    sn = r['STORE_NUMBER']
    if in_window(r, window_start, window_end): store_last_recs[sn].append(r)
    if in_window(r, prior_start, prior_end):   store_prior_recs[sn].append(r)

# Recurrence: per store, count days in lookback window where daily INF rate > threshold
lookback_start = max_date - timedelta(days=RECURRENCE_LOOKBACK_DAYS - 1)
store_day_recs = defaultdict(lambda: defaultdict(list))
for r in records:
    if r['_date'] >= lookback_start:
        store_day_recs[r['STORE_NUMBER']][r['_date']].append(r)

def recurrence(store_num):
    chronic_days = sum(
        1 for day_recs in store_day_recs[store_num].values()
        if inf_rate_pct(day_recs) > RECURRENCE_INF_THRESHOLD
    )
    return chronic_days >= RECURRENCE_MIN_DAYS, chronic_days

# Cache store metadata (retailer + address) from first occurrence
store_meta = {}
for r in records:
    sn = r['STORE_NUMBER']
    if sn not in store_meta:
        store_meta[sn] = {
            'retailer': r['WAREHOUSE_NAME'],
            'address':  f"{r['STREET']}, {r['CITY']}, {r['STATE']}",
        }

max_inf_store = max((inf_rate_pct(recs) for recs in by_store_recs.values()), default=1)
max_vol_store = max((len(recs) for recs in by_store_recs.values()), default=1)
max_ls_store  = max((lost_sales_amt(recs) or 0 for recs in by_store_recs.values()), default=0) or 1

store_data = []
for sn, recs in by_store_recs.items():
    all_skus   = {r['UPC'] for r in recs}
    inf_skus   = {r['UPC'] for r in recs if r['_inf']}
    inf_r      = inf_rate_pct(recs)
    if inf_r >= 100.0:
        continue  # data artifact — 100% NFR
    inf_last_r  = inf_rate_pct(store_last_recs[sn])
    inf_prior_r = inf_rate_pct(store_prior_recs[sn])
    is_recurrent, chronic_days = recurrence(sn)
    ls_store    = lost_sales_amt(recs) or 0

    score = compute_risk(
        inf_r, max_inf_store,
        ls_store,
        len(recs), max_vol_store,
        inf_last_r, inf_prior_r,
    )
    t_dir, t_val = trend_dir_val(inf_last_r, inf_prior_r)
    meta = store_meta[sn]

    _tier = risk_tier(score)
    # Top SKUs by INF rate for this store (skip 100% NFR)
    by_upc_st = defaultdict(list)
    for r in recs:
        by_upc_st[r['UPC']].append(r)
    # Include all SKUs with at least one INF event (matches skusAffected count)
    top_skus_st = sorted(
        [{'upc': upc, 'name': ur[0].get('UPC_DESCRIPTION', upc) if ur else upc, 'infRate': round(inf_rate_pct(ur), 1)}
         for upc, ur in by_upc_st.items() if any(r['_inf'] for r in ur)],
        key=lambda x: -x['infRate'],
    )
    to_refund_s = sum(1 for r in recs if r['SYSTEM_ORDER_ITEM_STATUS'] == 'to_refund')
    replaced_s  = sum(1 for r in recs if r['SYSTEM_ORDER_ITEM_STATUS'] == 'replaced')
    total_s     = len(recs)
    store_data.append({
        'storeNumber':       sn,
        'storeName':         f"{meta['retailer']} #{sn}",
        'retailer':          meta['retailer'],
        'address':           meta['address'],
        'riskLevel':         _tier.capitalize(),   # 'Critical' / 'High' / 'Medium' / 'Low'
        'riskTier':          _tier,
        'riskScore':         score,
        'estLostSales':      lost_sales_amt(recs) or 0,
        'notFoundRate':      round(inf_r, 1),      # alias expected by table
        'notFound':          round(inf_r, 1),
        'pricingCompliance': round(100 - inf_r, 1),
        'compliance':        round(100 - inf_r, 1),
        'skusAffected':      len(inf_skus),
        'trendDir':          t_dir,
        'trendVal':          t_val,
        'recurrent':         is_recurrent,
        'chronicDays':       chronic_days,
        'topSkus':           top_skus_st,
        'infBreakdown': {
            'toRefund':    to_refund_s,
            'replaced':    replaced_s,
            'total':       total_s,
            'toRefundPct': round(to_refund_s / total_s * 100, 1) if total_s else 0,
            'replacedPct': round(replaced_s  / total_s * 100, 1) if total_s else 0,
        },
    })

store_data.sort(key=lambda x: -x['riskScore'])
for i, row in enumerate(store_data):
    row['rank'] = i + 1

# ─────────────────────────────────────────────────────────────────────────────
# Store index — lightweight per-store trend/performance data for client filter
# ─────────────────────────────────────────────────────────────────────────────
_ST_DAY_NAMES = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
store_index = {}
for sn, recs in by_store_recs.items():
    if inf_rate_pct(recs) >= 100.0:
        continue
    # 30-day daily INF trend
    by_date_st = defaultdict(list)
    for r in recs:
        by_date_st[r['_date']].append(r)
    nil_by_day_st = []
    for i in range(29, -1, -1):
        d = max_date - timedelta(days=i)
        day_recs = by_date_st.get(d, [])
        nil_by_day_st.append({
            'label': d.strftime('%-d %b'),
            'rate':  round(inf_rate_pct(day_recs), 2) if day_recs else None,
        })
    # Day-of-week found rate
    by_dow_st = defaultdict(list)
    for r in recs:
        by_dow_st[r['_date'].weekday()].append(r)
    dow_st = [{'day': _ST_DAY_NAMES[d], 'rate': round(found_rate_pct(by_dow_st[d]), 1) if by_dow_st[d] else 0} for d in range(7)]
    # 12-week found rate trend
    by_week_st = defaultdict(list)
    for r in recs:
        by_week_st[r['_week']].append(r)
    weeks_st = sorted(by_week_st.keys())[-12:]
    trend_st = [{'week': fmt_week(w), 'rate': round(found_rate_pct(by_week_st[w]), 1)} for w in weeks_st]
    # Performance KPIs
    total_st  = len(recs)
    inf_st    = sum(1 for r in recs if r['_inf'])
    fr_st     = round((total_st - inf_st) / total_st * 100, 1) if total_st else 0
    nfr_st    = round(inf_st / total_st * 100, 1) if total_st else 0
    sl_last_st  = [r for r in recs if in_window(r, window_start, window_end)]
    sl_prior_st = [r for r in recs if in_window(r, prior_start, prior_end)]
    wow_st = round(found_rate_pct(sl_last_st) - found_rate_pct(sl_prior_st), 1)
    perf_kpis_st = [
        {'label': 'Found Rate',     'value': f'{fr_st}%',   'trend': f'{wow_st:+.1f}% WoW', 'trendUp': wow_st >= 0},
        {'label': 'Not Found Rate', 'value': f'{nfr_st}%',  'trend': f'{-wow_st:+.1f}% WoW', 'trendUp': wow_st >= 0},
        {'label': 'INF Events',     'value': f'{inf_st:,}', 'trend': '', 'trendUp': False},
    ]
    store_index[str(sn)] = {
        'nilByDay':      nil_by_day_st,
        'dowFoundRate':  dow_st,
        'foundRateTrend': trend_st,
        'perfKpis':      perf_kpis_st,
    }

# ─────────────────────────────────────────────────────────────────────────────
# State detail data — per-state breakdown for interactive map drill-down
# ─────────────────────────────────────────────────────────────────────────────

state_detail = {}
for abbr, srecs in by_state_recs.items():
    # Retailers in this state
    by_ret_st = defaultdict(list)
    for r in srecs:
        by_ret_st[r['WAREHOUSE_NAME']].append(r)
    retailers_st = sorted(
        [
            {
                'name':       ret,
                'infRate':    round(inf_rate_pct(recs), 1),
                'storeCount': len({r['STORE_NUMBER'] for r in recs}),
            }
            for ret, recs in by_ret_st.items()
        ],
        key=lambda x: -x['infRate'],
    )[:7]

    # Top stores by INF rate in this state (skip 100% NFR)
    by_store_st = defaultdict(list)
    for r in srecs:
        by_store_st[r['STORE_NUMBER']].append(r)
    store_rows_st = []
    for sn, recs in by_store_st.items():
        ir_st = inf_rate_pct(recs)
        if ir_st >= 100.0:
            continue
        meta_st = store_meta.get(sn, {'retailer': '', 'address': ''})
        city_st = recs[0].get('CITY', '') if recs else ''
        store_rows_st.append({
            'storeName': f"{meta_st['retailer']} #{sn}",
            'retailer':  meta_st['retailer'],
            'city':      city_st,
            'infRate':   round(ir_st, 1),
            'riskTier':  risk_tier(ir_st / 15 * 100),  # scale: 15%+ INF = critical
        })
    store_rows_st.sort(key=lambda x: -x['infRate'])

    # Top brands by INF rate in this state
    by_brand_st = defaultdict(list)
    for r in srecs:
        by_brand_st[r['BRAND']].append(r)
    top_brands_st = sorted(
        [{'name': brand, 'infRate': round(inf_rate_pct(recs), 1)}
         for brand, recs in by_brand_st.items()],
        key=lambda x: -x['infRate'],
    )[:5]

    # WoW delta (found rate change vs prior window)
    sl_st = [r for r in srecs if in_window(r, window_start, window_end)]
    sp_st = [r for r in srecs if in_window(r, prior_start, prior_end)]
    wow_st = round(found_rate_pct(sl_st) - found_rate_pct(sp_st), 1) if sl_st else 0.0

    all_stores_st = {r['STORE_NUMBER'] for r in srecs}
    inf_stores_st = {r['STORE_NUMBER'] for r in srecs if r['_inf']}

    state_detail[abbr] = {
        'infRate':        round(inf_rate_pct(srecs), 1),
        'infEvents':      sum(1 for r in srecs if r['_inf']),
        'storesTotal':    len(all_stores_st),
        'storesImpacted': len(inf_stores_st),
        'wowDelta':       wow_st,
        'retailers':      retailers_st,
        'topStores':      store_rows_st[:10],
        'topBrands':      top_brands_st,
    }

# ─────────────────────────────────────────────────────────────────────────────
# Priority table — SKU view (brand manager / sales lead)
# ─────────────────────────────────────────────────────────────────────────────

by_upc_recs = defaultdict(list)
for r in records:
    by_upc_recs[r['UPC']].append(r)

sku_last_recs  = defaultdict(list)
sku_prior_recs = defaultdict(list)
for r in records:
    upc = r['UPC']
    if in_window(r, window_start, window_end): sku_last_recs[upc].append(r)
    if in_window(r, prior_start, prior_end):   sku_prior_recs[upc].append(r)

# UPC → display name (first seen)
upc_name = {}
for r in records:
    if r['UPC'] not in upc_name:
        upc_name[r['UPC']] = r['UPC_DESCRIPTION']

max_inf_sku = max((inf_rate_pct(recs) for recs in by_upc_recs.values()), default=1)
max_vol_sku = max((len(recs) for recs in by_upc_recs.values()), default=1)
max_ls_sku  = max((lost_sales_amt(recs) or 0 for recs in by_upc_recs.values()), default=0) or 1

sku_data = []
for upc, recs in by_upc_recs.items():
    if not any(r['_inf'] for r in recs):
        continue  # skip SKUs with zero INF events

    all_stores_sku = {r['STORE_NUMBER'] for r in recs}
    inf_stores_sku = {r['STORE_NUMBER'] for r in recs if r['_inf']}
    inf_r       = inf_rate_pct(recs)
    if inf_r >= 100.0:
        continue  # data artifact — 100% NFR
    inf_last_r  = inf_rate_pct(sku_last_recs[upc])
    inf_prior_r = inf_rate_pct(sku_prior_recs[upc])
    ls_sku      = lost_sales_amt(recs) or 0

    score = compute_risk(
        inf_r, max_inf_sku,
        ls_sku,
        len(recs), max_vol_sku,
        inf_last_r, inf_prior_r,
    )
    t_dir, t_val = trend_dir_val(inf_last_r, inf_prior_r)

    _tier = risk_tier(score)
    sku_data.append({
        'upc':               upc_name[upc],        # description for display in "UPC" column
        'upcCode':           upc,                  # raw code
        'sku':               upc_name[upc],
        'riskLevel':         _tier.capitalize(),   # 'Critical' / 'High' / 'Medium' / 'Low'
        'riskTier':          _tier,
        'riskScore':         score,
        'estLostSales':      lost_sales_amt(recs) or 0,
        'notFoundRate':      round(inf_r, 1),      # alias expected by table
        'notFound':          round(inf_r, 1),
        'pricingCompliance': round(100 - inf_r, 1),
        'compliance':        round(100 - inf_r, 1),
        'retailersAffected': len({r['WAREHOUSE_NAME'] for r in recs if r['_inf']}),
        'storesAffected':    len(inf_stores_sku),
        'trendDir':          t_dir,
        'trendVal':          t_val,
    })

sku_data.sort(key=lambda x: -x['riskScore'])
for i, row in enumerate(sku_data):
    row['rank'] = i + 1

# ─────────────────────────────────────────────────────────────────────────────
# Performance tab
# ─────────────────────────────────────────────────────────────────────────────

# Found rate trend by week
by_week_recs = defaultdict(list)
for r in records:
    by_week_recs[r['_week']].append(r)

found_rate_trend = [
    {
        'week': fmt_week(wb),
        'rate': round(found_rate_pct(by_week_recs.get(wb, [])), 1),
    }
    for wb in week_buckets
]

# Category performance: found rate + WoW delta
by_cat_recs  = defaultdict(list)
by_cat_last  = defaultdict(list)
by_cat_prior = defaultdict(list)
for r in records:
    cat = r['CATEGORY']
    by_cat_recs[cat].append(r)
    if in_window(r, window_start, window_end): by_cat_last[cat].append(r)
    if in_window(r, prior_start, prior_end):   by_cat_prior[cat].append(r)

category_performance = [
    {
        'name':   cat,
        'rate':   round(found_rate_pct(by_cat_recs[cat]), 1),
        'change': round(found_rate_pct(by_cat_last[cat]) - found_rate_pct(by_cat_prior[cat]), 1),
    }
    for cat in sorted(by_cat_recs.keys())
]

# Retailer benchmark: found rate per retailer, sorted best first
retailer_benchmark = sorted(
    [
        {
            'retailer':    ret,
            'foundRate':   round(found_rate_pct(recs), 1),
            'avgIssueHrs': 0,  # not computable from CSV
        }
        for ret, recs in by_retailer_recs.items()
    ],
    key=lambda x: -x['foundRate'],
)

# Performance KPIs
fr_lyear  = found_rate_pct(recs_lyear)
perf_wow  = round(fr_last - fr_prior, 1)
perf_yoy  = round(fr_last - fr_lyear, 1)

performance_kpis = [
    {
        'label':   'Found Rate',
        'value':   f'{global_fr:.1f}%',
        'trend':   f'{perf_wow:+.1f}% WoW',
        'trendUp': perf_wow >= 0,
    },
    {
        'label':   'IC total found rate',
        'value':   'N/A',
        'trend':   'Platform benchmark not in data',
        'trendUp': True,
    },
    {
        'label':   'Week over Week',
        'value':   f'{perf_wow:+.1f}%',
        'trend':   f'vs prior {TREND_WINDOW_DAYS} days',
        'trendUp': perf_wow >= 0,
    },
    {
        'label':   'vs. Last Year',
        'value':   f'{perf_yoy:+.1f}%',
        'trend':   f'same {TREND_WINDOW_DAYS}-day window, prior year',
        'trendUp': perf_yoy >= 0,
    },
]

# INF status breakdown (global)
to_refund_global = sum(1 for r in records if r['SYSTEM_ORDER_ITEM_STATUS'] == 'to_refund')
replaced_global  = sum(1 for r in records if r['SYSTEM_ORDER_ITEM_STATUS'] == 'replaced')
inf_tot_global   = to_refund_global + replaced_global
inf_breakdown_global = {
    'toRefund':    to_refund_global,
    'replaced':    replaced_global,
    'total':       inf_tot_global,
    'toRefundPct': round(to_refund_global / inf_tot_global * 100, 1) if inf_tot_global else 0,
    'replacedPct': round(replaced_global  / inf_tot_global * 100, 1) if inf_tot_global else 0,
}

# Day-of-week found rate (global)
_DAY_NAMES = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
by_dow_global = defaultdict(list)
for r in records:
    by_dow_global[r['_date'].weekday()].append(r)
dow_found_rate_global = [
    {'day': _DAY_NAMES[d], 'rate': round(found_rate_pct(by_dow_global[d]), 1)}
    for d in range(7)
]

# Substitution data: top 10 SKUs by replacement occurrences
by_upc_replaced = defaultdict(int)
for r in records:
    if r['SYSTEM_ORDER_ITEM_STATUS'] == 'replaced':
        by_upc_replaced[r['UPC']] += 1

substitution_data = sorted(
    [
        {
            'yourProduct': upc_name.get(upc, upc),
            'substitute':  'Unknown — substitute identity not captured in this data',
            'rate':        round(replaced_ct / len(by_upc_recs[upc]) * 100, 1),
            'revenueLoss': 0,  # no price data; est = replaced_ct × avg_unit_price
            'occurrences': replaced_ct,
        }
        for upc, replaced_ct in by_upc_replaced.items()
    ],
    key=lambda x: -x['occurrences'],
)[:10]

# ─────────────────────────────────────────────────────────────────────────────
# AI Recommendations — top 4 store×SKU combos by INF rate (min volume/rate)
# ─────────────────────────────────────────────────────────────────────────────

by_store_sku = defaultdict(list)
for r in records:
    by_store_sku[(r['STORE_NUMBER'], r['UPC'])].append(r)

def _ai_priority(inf_r):
    if inf_r >= 40: return ('Critical', 'priority-critical')
    if inf_r >= 25: return ('High',     'priority-high')
    return ('Medium', 'priority-medium')

def _ai_action(inf_r, trend_pct):
    if inf_r >= 40:       return 'Escalate to field rep for urgent restock'
    if trend_pct > 100:   return 'Verify with regional manager — rapid deterioration detected'
    if inf_r >= 25:       return 'Request store manager inspection'
    return 'Schedule visit this week and monitor trend'

scored_combos = []
for (sn, upc), recs in by_store_sku.items():
    if len(recs) < 10: continue          # too little data
    ir = inf_rate_pct(recs)
    if ir < 15: continue                 # not problematic enough
    last_r  = inf_rate_pct([r for r in recs if in_window(r, window_start, window_end)])
    prior_r = inf_rate_pct([r for r in recs if in_window(r, prior_start, prior_end)])
    trend_pct = round(((last_r - prior_r) / prior_r * 100) if prior_r > 0 else 0, 0)
    confidence = min(98, int(70 + min(28, len(recs) // 50)))
    priority_label, priority_class = _ai_priority(ir)
    meta = store_meta.get(sn, {'retailer': '', 'address': ''})
    display_r = last_r if last_r > 0 else ir
    trend_str = (f"spiked {abs(trend_pct):.0f}% vs prior period"
                 if abs(trend_pct) > 30 else f"at {display_r:.1f}%")
    scored_combos.append({
        'priority':      priority_label,
        'priorityClass': priority_class,
        'issue':         'NIL',
        'store':         f"{meta['retailer']} #{sn}",
        'location':      meta['address'],
        'description':   (f"NIL rate {trend_str} for {upc_name.get(upc, upc)}. "
                          f"Overall INF rate: {ir:.1f}%. "
                          f"Possible replenishment failure or shelf reset issue."),
        'action':        _ai_action(ir, trend_pct),
        'confidence':    confidence,
        '_ir':           ir,
        '_sn':           sn,
        '_upc':          upc,
    })

scored_combos.sort(key=lambda x: -x['_ir'])

# Pick top 4, diversifying across stores and SKUs
ai_recommendations = []
seen_stores, seen_skus = set(), set()
for item in scored_combos:
    if len(ai_recommendations) >= 4: break
    if item['_sn'] in seen_stores and item['_upc'] in seen_skus: continue
    seen_stores.add(item['_sn'])
    seen_skus.add(item['_upc'])
    ai_recommendations.append({k: v for k, v in item.items() if not k.startswith('_')})

# ─────────────────────────────────────────────────────────────────────────────
# Brand distribution — share of pick volume per brand (top 8)
# Used by the Share of Shelf / Brand Distribution chart.
# ─────────────────────────────────────────────────────────────────────────────

brand_pick_vol = defaultdict(int)
for r in records:
    brand_pick_vol[r['BRAND']] += 1

top_brands_dist = sorted(brand_pick_vol.items(), key=lambda x: -x[1])[:8]
brand_distribution = [
    {
        'name':  brand,
        'value': round(count / len(records) * 100, 1),
        'color': SERIES_COLORS[i % len(SERIES_COLORS)],
    }
    for i, (brand, count) in enumerate(top_brands_dist)
]

# ─────────────────────────────────────────────────────────────────────────────
# Date-range snapshots — KPIs + retailer table for each selectable window
# Allows the UI to show accurate metrics for any date range without raw records.
# ─────────────────────────────────────────────────────────────────────────────

def compute_window_snapshot(window_days):
    """
    Compute KPI cards and retailer priority table for the most-recent N days.
    Comparison window = the equal-length window immediately before it.
    """
    win_end   = max_date
    win_start = win_end   - timedelta(days=window_days - 1)
    pri_end   = win_start - timedelta(days=1)
    pri_start = pri_end   - timedelta(days=window_days - 1)

    recs_win  = [r for r in records if win_start  <= r['_date'] <= win_end]
    recs_pri  = [r for r in records if pri_start  <= r['_date'] <= pri_end]

    if not recs_win:
        return {'kpis': [], 'tableData': []}

    # ── KPI cards ──
    inf_win  = sum(1 for r in recs_win if r['_inf'])
    ir_win   = inf_rate_pct(recs_win)
    fr_win   = found_rate_pct(recs_win)
    ir_pri   = inf_rate_pct(recs_pri)
    fr_pri   = found_rate_pct(recs_pri)
    ir_d     = round(ir_win - ir_pri, 1)
    wow      = round(fr_win - fr_pri, 1)
    all_st_w = {r['STORE_NUMBER'] for r in recs_win}
    inf_st_w = {r['STORE_NUMBER'] for r in recs_win if r['_inf']}
    st_pct_w = len(inf_st_w) / len(all_st_w) * 100 if all_st_w else 0

    kpis_w = [
        kpi_card('Not found rate', f'{ir_win:.1f}%',
                 f'{"↑" if ir_d > 0 else "↓"} {abs(ir_d):.1f}% vs prior {window_days}d', ir_d < 0),
        kpi_card('Found rate', f'{fr_win:.1f}%',
                 f'{"↑" if wow > 0 else "↓"} {abs(wow):.1f}% vs prior {window_days}d', wow >= 0),
        kpi_card('Stores impacted', f'{len(inf_st_w):,}',
                 f'{st_pct_w:.0f}% of {len(all_st_w)} stores', False),
        kpi_card('Est. lost sales', fmt_lost_sales(lost_sales_amt(recs_win)),
                 f'{inf_win:,} INF events' if HAS_PRICE_DATA else 'Price feed required', False),
    ]

    # ── Performance data for this window ──
    lyr_end_w   = win_end   - timedelta(days=365)
    lyr_start_w = win_start - timedelta(days=365)
    fr_lyear_w  = found_rate_pct([r for r in records if lyr_start_w <= r['_date'] <= lyr_end_w])
    yoy_w       = round(fr_win - fr_lyear_w, 1)
    perf_kpis_w = [
        {'label': 'Found Rate',          'value': f'{fr_win:.1f}%',  'trend': f'{wow:+.1f}% WoW',                           'trendUp': wow >= 0},
        {'label': 'IC total found rate', 'value': 'N/A',             'trend': 'Platform benchmark not in data',              'trendUp': True},
        {'label': 'Week over Week',      'value': f'{wow:+.1f}%',    'trend': f'vs prior {window_days} days',                'trendUp': wow >= 0},
        {'label': 'vs. Last Year',       'value': f'{yoy_w:+.1f}%', 'trend': f'same {window_days}-day window, prior year',  'trendUp': yoy_w >= 0},
    ]
    by_cat_w = defaultdict(list); by_cat_w_pri = defaultdict(list)
    for r in recs_win: by_cat_w[r['CATEGORY']].append(r)
    for r in recs_pri: by_cat_w_pri[r['CATEGORY']].append(r)
    cat_perf_w = sorted([
        {'name': cat, 'rate': round(found_rate_pct(recs), 1),
         'change': round(found_rate_pct(recs) - found_rate_pct(by_cat_w_pri.get(cat, [])), 1)}
        for cat, recs in by_cat_w.items()
    ], key=lambda x: -x['rate'])

    # ── Retailer priority table ──
    by_ret_w     = defaultdict(list)
    by_ret_w_pri = defaultdict(list)
    for r in recs_win:
        by_ret_w[r['WAREHOUSE_NAME']].append(r)
    for r in recs_pri:
        by_ret_w_pri[r['WAREHOUSE_NAME']].append(r)

    max_ir_w  = max((inf_rate_pct(recs) for recs in by_ret_w.values()), default=1)
    max_vol_w = max((len(recs) for recs in by_ret_w.values()), default=1)
    max_ls_w  = max((lost_sales_amt(recs) or 0 for recs in by_ret_w.values()), default=0) or 1

    rows_w = []
    for ret, recs in by_ret_w.items():
        all_skus_w = {r['UPC'] for r in recs}
        inf_skus_w = {r['UPC'] for r in recs if r['_inf']}
        ir_r       = inf_rate_pct(recs)
        if ir_r >= 100.0:
            continue  # data artifact — 100% NFR
        if HAS_PRICE_DATA and total_sales_amt(recs) < MIN_RETAILER_SALES:
            continue  # dirty data — insufficient sales volume
        ir_r_pri   = inf_rate_pct(by_ret_w_pri[ret])
        ls_r_w     = lost_sales_amt(recs) or 0
        score_w    = compute_risk(ir_r, max_ir_w, ls_r_w, len(recs), max_vol_w, ir_r, ir_r_pri)
        t_dir_w, t_val_w = trend_dir_val(ir_r, ir_r_pri)
        all_st_r = {r['STORE_NUMBER'] for r in recs}
        inf_st_r = {r['STORE_NUMBER'] for r in recs if r['_inf']}
        rows_w.append({
            'retailer':       ret,
            'storesCount':    len(all_st_r),
            'estLostSales':   lost_sales_amt(recs) or 0,
            'notFound':       round(ir_r, 1),
            'compliance':     round(100 - ir_r, 1),
            'storesAffected': len(inf_st_r),
            'skusAffected':   len(inf_skus_w),
            'trendDir':       t_dir_w,
            'trendVal':       t_val_w,
            'riskScore':      score_w,
            'riskTier':       risk_tier(score_w),
        })
    rows_w.sort(key=lambda x: -x['riskScore'])
    for i, row in enumerate(rows_w):
        row['rank'] = i + 1

    # ── Retailer risk distribution (bar chart) ──
    # Derived from rows_w so chart uses same composite riskScore as the priority table
    retailer_risk_w = {
        'categories':    [r['retailer']      for r in rows_w],
        'riskValues':    [r['riskScore']     for r in rows_w],
        'notFoundRates': [r['notFound']      for r in rows_w],
        'storesAffected':[r['storesAffected']for r in rows_w],
        'detected':      [sum(1 for rec in by_ret_w[r['retailer']] if rec['_inf']) for r in rows_w],
        'confirmed':     [
            sum(1 for rec in by_ret_w[r['retailer']] if rec['SYSTEM_ORDER_ITEM_STATUS'] == 'to_refund')
            for r in rows_w
        ],
    }

    # ── Store priority table (window-scoped, no recurrence) ──
    by_st_w     = defaultdict(list)
    by_st_w_pri = defaultdict(list)
    for r in recs_win: by_st_w[r['STORE_NUMBER']].append(r)
    for r in recs_pri: by_st_w_pri[r['STORE_NUMBER']].append(r)
    max_ir_st_w  = max((inf_rate_pct(v) for v in by_st_w.values()), default=1) or 1
    max_vol_st_w = max((len(v) for v in by_st_w.values()), default=1) or 1
    max_ls_st_w  = max((lost_sales_amt(v) or 0 for v in by_st_w.values()), default=0) or 1
    store_rows_w = []
    for sn, srecs in by_st_w.items():
        all_s = {r['UPC'] for r in srecs}
        inf_s = {r['UPC'] for r in srecs if r['_inf']}
        ir_s  = inf_rate_pct(srecs)
        if ir_s >= 100.0: continue
        ir_s_pri = inf_rate_pct(by_st_w_pri[sn])
        ls_s_w   = lost_sales_amt(srecs) or 0
        sc_s = compute_risk(ir_s, max_ir_st_w, ls_s_w,
                            len(srecs), max_vol_st_w, ir_s, ir_s_pri)
        t_d, t_v = trend_dir_val(ir_s, ir_s_pri)
        meta = store_meta.get(sn, {'retailer': '', 'address': ''})
        _t = risk_tier(sc_s)
        store_rows_w.append({
            'storeNumber': sn, 'storeName': f"{meta['retailer']} #{sn}",
            'retailer': meta['retailer'], 'address': meta['address'],
            'riskLevel': _t.capitalize(), 'riskTier': _t, 'riskScore': sc_s,
            'estLostSales': ls_s_w, 'notFoundRate': round(ir_s, 1), 'notFound': round(ir_s, 1),
            'pricingCompliance': round(100 - ir_s, 1), 'compliance': round(100 - ir_s, 1),
            'skusAffected': len(inf_s), 'trendDir': t_d, 'trendVal': t_v,
            'recurrent': False, 'chronicDays': 0,
        })
    store_rows_w.sort(key=lambda x: -x['riskScore'])
    for i, row in enumerate(store_rows_w): row['rank'] = i + 1

    # ── SKU priority table (window-scoped) ──
    by_upc_w     = defaultdict(list)
    by_upc_w_pri = defaultdict(list)
    for r in recs_win: by_upc_w[r['UPC']].append(r)
    for r in recs_pri: by_upc_w_pri[r['UPC']].append(r)
    max_ir_sku_w  = max((inf_rate_pct(v) for v in by_upc_w.values()), default=1) or 1
    max_vol_sku_w = max((len(v) for v in by_upc_w.values()), default=1) or 1
    max_ls_sku_w  = max((lost_sales_amt(v) or 0 for v in by_upc_w.values()), default=0) or 1
    sku_rows_w = []
    for upc, urecs in by_upc_w.items():
        if not any(r['_inf'] for r in urecs): continue
        all_st_u = {r['STORE_NUMBER'] for r in urecs}
        inf_st_u = {r['STORE_NUMBER'] for r in urecs if r['_inf']}
        ir_u = inf_rate_pct(urecs)
        if ir_u >= 100.0: continue
        ir_u_pri = inf_rate_pct(by_upc_w_pri[upc])
        ls_u_w = lost_sales_amt(urecs) or 0
        sc_u = compute_risk(ir_u, max_ir_sku_w, ls_u_w, len(urecs), max_vol_sku_w, ir_u, ir_u_pri)
        t_d, t_v = trend_dir_val(ir_u, ir_u_pri)
        _t = risk_tier(sc_u)
        name = upc_name.get(upc, upc)
        sku_rows_w.append({
            'upc': name, 'upcCode': upc, 'sku': name,
            'riskLevel': _t.capitalize(), 'riskTier': _t, 'riskScore': sc_u,
            'estLostSales': ls_u_w, 'notFoundRate': round(ir_u, 1), 'notFound': round(ir_u, 1),
            'pricingCompliance': round(100 - ir_u, 1), 'compliance': round(100 - ir_u, 1),
            'retailersAffected': len({r['WAREHOUSE_NAME'] for r in urecs if r['_inf']}),
            'storesAffected': len(inf_st_u), 'trendDir': t_d, 'trendVal': t_v,
        })
    sku_rows_w.sort(key=lambda x: -x['riskScore'])
    for i, row in enumerate(sku_rows_w): row['rank'] = i + 1

    # ── State risks + state detail (window-scoped) ──
    by_state_w = defaultdict(list)
    for r in recs_win:
        by_state_w[r['STATE']].append(r)

    state_risks_w = sorted(
        [{'abbr': abbr, 'name': US_STATES.get(abbr, abbr), 'risk': round(inf_rate_pct(recs), 1)}
         for abbr, recs in by_state_w.items()],
        key=lambda x: -x['risk'],
    )

    # State prior-window records (for WoW delta per state)
    by_state_pri = defaultdict(list)
    for r in recs_pri:
        by_state_pri[r['STATE']].append(r)

    state_data_w = {}
    for abbr, srecs in by_state_w.items():
        by_ret_st = defaultdict(list)
        for r in srecs:
            by_ret_st[r['WAREHOUSE_NAME']].append(r)
        retailers_st = sorted(
            [{'name': ret, 'infRate': round(inf_rate_pct(recs), 1),
              'storeCount': len({r['STORE_NUMBER'] for r in recs})}
             for ret, recs in by_ret_st.items()],
            key=lambda x: -x['infRate'],
        )[:7]

        by_store_st = defaultdict(list)
        for r in srecs:
            by_store_st[r['STORE_NUMBER']].append(r)
        store_rows_st = []
        for sn, recs in by_store_st.items():
            ir_st = inf_rate_pct(recs)
            if ir_st >= 100.0:
                continue
            meta_st = store_meta.get(sn, {'retailer': '', 'address': ''})
            city_st = recs[0].get('CITY', '') if recs else ''
            store_rows_st.append({
                'storeName': f"{meta_st['retailer']} #{sn}",
                'retailer':  meta_st['retailer'],
                'city':      city_st,
                'infRate':   round(ir_st, 1),
                'riskTier':  risk_tier(ir_st / 15 * 100),
            })
        store_rows_st.sort(key=lambda x: -x['infRate'])

        by_brand_st = defaultdict(list)
        for r in srecs:
            by_brand_st[r['BRAND']].append(r)
        top_brands_st = sorted(
            [{'name': b, 'infRate': round(inf_rate_pct(recs), 1)} for b, recs in by_brand_st.items()],
            key=lambda x: -x['infRate'],
        )[:5]

        sp_st = by_state_pri.get(abbr, [])
        wow_st = round(found_rate_pct(srecs) - found_rate_pct(sp_st), 1) if srecs else 0.0
        all_stores_st = {r['STORE_NUMBER'] for r in srecs}
        inf_stores_st = {r['STORE_NUMBER'] for r in srecs if r['_inf']}

        state_data_w[abbr] = {
            'infRate':        round(inf_rate_pct(srecs), 1),
            'infEvents':      sum(1 for r in srecs if r['_inf']),
            'storesTotal':    len(all_stores_st),
            'storesImpacted': len(inf_stores_st),
            'wowDelta':       wow_st,
            'retailers':      retailers_st,
            'topStores':      store_rows_st[:10],
            'topBrands':      top_brands_st,
        }

    ret_bench_w = sorted([
        {'retailer': ret, 'foundRate': round(found_rate_pct(rr), 1), 'avgIssueHrs': 0}
        for ret, rr in by_ret_w.items()
    ], key=lambda x: -x['foundRate'])
    by_week_snap = defaultdict(list)
    for r in recs_win:
        by_week_snap[r['_week']].append(r)
    found_rate_trend_snap = [
        {'week': fmt_week(wb), 'label': fmt_week(wb), 'rate': round(found_rate_pct(by_week_snap[wb]), 1)}
        for wb in sorted(by_week_snap.keys())
    ]
    to_refund_snap = sum(1 for r in recs_win if r['SYSTEM_ORDER_ITEM_STATUS'] == 'to_refund')
    replaced_snap  = sum(1 for r in recs_win if r['SYSTEM_ORDER_ITEM_STATUS'] == 'replaced')
    inf_tot_snap   = to_refund_snap + replaced_snap
    inf_breakdown_snap = {
        'toRefund':    to_refund_snap,
        'replaced':    replaced_snap,
        'total':       inf_tot_snap,
        'toRefundPct': round(to_refund_snap / inf_tot_snap * 100, 1) if inf_tot_snap else 0,
        'replacedPct': round(replaced_snap  / inf_tot_snap * 100, 1) if inf_tot_snap else 0,
    }
    _DAY_NAMES_SNAP = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    by_dow_snap = defaultdict(list)
    for r in recs_win:
        by_dow_snap[r['_date'].weekday()].append(r)
    dow_found_rate_snap = [
        {'day': _DAY_NAMES_SNAP[d], 'rate': round(found_rate_pct(by_dow_snap[d]), 1)}
        for d in range(7)
    ]
    performance_w = {
        'kpis':                perf_kpis_w,
        'foundRateTrend':      found_rate_trend_snap,
        'categoryPerformance': cat_perf_w,
        'retailerBenchmark':   ret_bench_w,
        'infBreakdown':        inf_breakdown_snap,
        'dowFoundRate':        dow_found_rate_snap,
    }

    return {'kpis': kpis_w, 'tableData': rows_w, 'retailerRisk': retailer_risk_w,
            'storeData': store_rows_w, 'skuData': sku_rows_w,
            'stateRisks': state_risks_w, 'stateData': state_data_w,
            'performance': performance_w}

DATE_WINDOWS = [7, 14, 30, 90]
snapshots_by_date_range = {f'{d}d': compute_window_snapshot(d) for d in DATE_WINDOWS}

# ─────────────────────────────────────────────────────────────────────────────
# Filter options (for populating UI dropdowns)
# ─────────────────────────────────────────────────────────────────────────────

filter_options = {
    'retailers':       sorted(by_retailer_recs.keys()),
    'brands':          sorted({r['BRAND'] for r in records}),
    'skus':            sorted({r['UPC_DESCRIPTION'] for r in records}),
    'categories':      sorted({r['CATEGORY'] for r in records}),
    'superCategories': sorted({r['SUPER_CATEGORY'] for r in records}),
}

# ─────────────────────────────────────────────────────────────────────────────
# Filter index — pre-computed chart slices per filter dimension
# ─────────────────────────────────────────────────────────────────────────────

print('Building filter index...')

by_brand_fi = defaultdict(list)
for r in records:
    by_brand_fi[r['BRAND']].append(r)

filter_index = {}

for ret, recs in by_retailer_recs.items():
    filter_index[f'retailer:{ret}'] = compute_slice(recs)

for cat, recs in by_cat_recs.items():
    filter_index[f'category:{cat}'] = compute_slice(recs)

top_brands_fi = sorted(by_brand_fi.keys(), key=lambda b: -len(by_brand_fi[b]))[:30]
for brand in top_brands_fi:
    filter_index[f'brand:{brand}'] = compute_slice(by_brand_fi[brand])

# Top-50 SKU (UPC_DESCRIPTION) slices — ranked by INF event count so the most-problematic
# SKUs are always in the index; only these SKUs appear in the UI dropdown.
by_sku_fi = defaultdict(list)
for r in records:
    by_sku_fi[r['UPC_DESCRIPTION']].append(r)

top_skus_fi = sorted(
    [sku for sku, recs in by_sku_fi.items() if any(r['_inf'] for r in recs)],
    key=lambda s: -sum(1 for r in by_sku_fi[s] if r['_inf']),
)[:50]
for sku in top_skus_fi:
    filter_index[f'sku:{sku}'] = compute_slice(by_sku_fi[sku])

# Retailer × category combos (7 × 7 = up to 49; key = "retailer:X|category:Y")
by_ret_cat = defaultdict(list)
for r in records:
    by_ret_cat[(r['WAREHOUSE_NAME'], r['CATEGORY'])].append(r)
ret_cat_count = 0
for (ret, cat), recs in by_ret_cat.items():
    if len(recs) >= 5:
        filter_index[f'retailer:{ret}|category:{cat}'] = compute_slice(recs)
        ret_cat_count += 1

# Retailer × brand combos (top 30 brands × 7 retailers; key = "retailer:X|brand:Y")
by_ret_brand = defaultdict(list)
top_brands_set = set(top_brands_fi)
for r in records:
    if r['BRAND'] in top_brands_set:
        by_ret_brand[(r['WAREHOUSE_NAME'], r['BRAND'])].append(r)
ret_brand_count = 0
for (ret, brand), recs in by_ret_brand.items():
    if len(recs) >= 5:
        filter_index[f'retailer:{ret}|brand:{brand}'] = compute_slice(recs)
        ret_brand_count += 1

# Category × brand combos (key = "category:X|brand:Y")
by_cat_brand = defaultdict(list)
for r in records:
    if r['BRAND'] in top_brands_set:
        by_cat_brand[(r['CATEGORY'], r['BRAND'])].append(r)
cat_brand_count = 0
for (cat, brand), recs in by_cat_brand.items():
    if len(recs) >= 5:
        filter_index[f'category:{cat}|brand:{brand}'] = compute_slice(recs)
        cat_brand_count += 1

print(f'  {len(filter_index)} slices ({len(by_retailer_recs)} retailer, {len(by_cat_recs)} category, {len(top_brands_fi)} brand, {len(top_skus_fi)} sku, {ret_cat_count} ret×cat, {ret_brand_count} ret×brand, {cat_brand_count} cat×brand)')

# ─────────────────────────────────────────────────────────────────────────────
# Assemble and write output
# ─────────────────────────────────────────────────────────────────────────────

output = {
    # Filter dropdown option lists — only include entries that have a filterIndex slice
    # so every dropdown selection is guaranteed to actually update the charts.
    'retailers':  filter_options['retailers'],   # all retailers (all are indexed)
    'brands':     sorted(top_brands_fi),         # top 30 brands by pick volume
    'skus':       sorted(top_skus_fi),           # top 50 SKUs by INF event count
    'categories': filter_options['categories'],  # all categories (all are indexed)

    # Brand → [SKU] mapping for the top-50 indexed SKUs so the UI can filter the
    # SKU dropdown when a brand is selected.
    'skusByBrand': {
        brand: sorted(sku for sku in top_skus_fi if by_sku_fi[sku][0]['BRAND'] == brand)
        for brand in top_brands_fi
        if any(by_sku_fi[sku][0]['BRAND'] == brand for sku in top_skus_fi)
    },

    # SKU → brand / category lookups (for the top-50 indexed SKUs only).
    # Used to auto-select brand + category when a SKU is chosen in the filter bar.
    'skuToBrand':    {sku: by_sku_fi[sku][0]['BRAND']    for sku in top_skus_fi},
    'skuToCategory': {sku: by_sku_fi[sku][0]['CATEGORY'] for sku in top_skus_fi},

    # Brand → dominant category (most picks). Used to auto-select category when
    # a brand is chosen in the filter bar.
    'brandToCategory': {
        brand: max(
            {r['CATEGORY'] for r in by_brand_fi[brand]},
            key=lambda cat: sum(1 for r in by_brand_fi[brand] if r['CATEGORY'] == cat),
        )
        for brand in top_brands_fi
    },

    # Retailer → [brands / categories / SKUs] — only indexed items.
    # Used to narrow the brand, category, and SKU dropdowns when a retailer is selected.
    'retailerToBrands': {
        ret: sorted(b for b in {r['BRAND'] for r in recs} if b in set(top_brands_fi))
        for ret, recs in by_retailer_recs.items()
    },
    'retailerToCategories': {
        ret: sorted({r['CATEGORY'] for r in recs})
        for ret, recs in by_retailer_recs.items()
    },
    'retailerToSkus': {
        ret: sorted(s for s in {r['UPC_DESCRIPTION'] for r in recs} if s in set(top_skus_fi))
        for ret, recs in by_retailer_recs.items()
    },

    # Priority tables — all three views
    'tableData': table_data,   # retailer view (existing prototype component)
    'storeData':  store_data,   # store view (field agent dispatch)
    'storeIndex': store_index,  # per-store trend/performance data (keyed by storeNumber string)
    'skuData':    sku_data,     # SKU view (brand manager)

    # Heat map + per-state drill-down
    'stateRisks': state_risks,
    'stateData':  state_detail,

    # KPI cards
    'kpis': {
        'mvp': kpis_mvp,
        'ltv': [],  # not computable from this data — falls back to mock
    },

    # NIL trend charts
    'nilTrend': {
        'byDay':      nil_by_day,
        'byHour':     nil_by_hour,
        'byRetailer': nil_by_retailer,
        'bySKU':      nil_by_sku,
        'byCategory': nil_by_category,
    },

    # Risk distribution by retailer (bar chart)
    'retailerRisk': retailer_risk,

    # Performance tab
    'performance': {
        'kpis':                performance_kpis,
        'foundRateTrend':      found_rate_trend,
        'categoryPerformance': category_performance,
        'retailerBenchmark':   retailer_benchmark,
        'substitutionData':    substitution_data,
        'infBreakdown':        inf_breakdown_global,
        'dowFoundRate':        dow_found_rate_global,
    },

    # AI Recommendations — top 4 store×SKU combos by INF rate, data-driven
    'aiRecommendations': ai_recommendations,

    # Brand Distribution — share of pick volume per brand (replaces mock shareOfShelf)
    'brandDistribution': brand_distribution,

    # Date-range snapshots — KPIs + retailer table per selectable window
    # Key = '7d' | '14d' | '30d' | '90d'; each value: { kpis, tableData }
    'snapshotsByDateRange': snapshots_by_date_range,

    # Pre-computed chart slices for client-side filtering
    'filterIndex': filter_index,

    # Filter option arrays — use these to replace hardcoded BRANDS/SKUS/CATEGORIES
    # in index.html, or read via window.__CPG_RUNTIME_DATA__ if wired up
    '_filterOptions': filter_options,

    # Date range options with labels derived from actual data dates
    'dateRangeOptions': [
        {
            'value': '7d',
            'label': f"Last 7 Days ({(max_date - timedelta(days=6)).strftime('%-d %b %Y')} – {max_date.strftime('%-d %b %Y')})",
        },
        {
            'value': '14d',
            'label': f"Last 14 Days ({(max_date - timedelta(days=13)).strftime('%-d %b %Y')} – {max_date.strftime('%-d %b %Y')})",
        },
        {
            'value': '30d',
            'label': f"Last 30 Days ({(max_date - timedelta(days=29)).strftime('%-d %b %Y')} – {max_date.strftime('%-d %b %Y')})",
        },
        {
            'value': '90d',
            'label': f"Last 90 Days ({(max_date - timedelta(days=89)).strftime('%-d %b %Y')} – {max_date.strftime('%-d %b %Y')})",
        },
    ],

    # Metadata for debugging
    '_meta': {
        'generatedAt':    datetime.now().isoformat(),
        'totalRecords':   total,
        'dateRange':      [str(min_date), str(max_date)],
        'globalInfRate':  round(global_inf_r, 2),
        'globalFoundRate': round(global_fr, 2),
        'storesTotal':    len(all_stores),
        'storesImpacted': len(impacted_stores),
        'trendWindow': {
            'last':  [str(window_start), str(window_end)],
            'prior': [str(prior_start),  str(prior_end)],
            'lyear': [str(lyear_start),  str(lyear_end)],
        },
    },
}

with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
    json.dump(output, f, indent=2, default=str)

# ─────────────────────────────────────────────────────────────────────────────
# Summary
# ─────────────────────────────────────────────────────────────────────────────

print(f'\n✓ Written → {OUTPUT_FILE}')
print(f'\n  Global INF rate:    {global_inf_r:.2f}%  ({inf_count:,} INF events)')
print(f'  Global found rate:  {global_fr:.2f}%')
print(f'  Stores impacted:    {len(impacted_stores):,} / {len(all_stores):,}  ({stores_pct:.0f}%)')
print(f'  WoW found rate:     {perf_wow:+.1f}%')
print(f'  YoY found rate:     {perf_yoy:+.1f}%')
print(f'\n  Priority tables:')
print(f'    Retailers:  {len(table_data)} rows')
print(f'    Stores:     {len(store_data)} rows  ({sum(1 for r in store_data if r["recurrent"])} chronic)')
print(f'    SKUs:       {len(sku_data)} rows with INF events')
print(f'\n  Risk tiers (retailer):')
for tier in ('critical', 'high', 'medium', 'low'):
    n = sum(1 for r in table_data if r['riskTier'] == tier)
    print(f'    {tier:<10} {n}')
print(f'\nTo serve:')
print(f'  python3 -m http.server 8000')
print(f'  Open: http://localhost:8000/index.html?data=http://localhost:8000/data.json')
