import { createHash } from 'crypto';

/**
 * Query normalization pipeline for cache key generation.
 * Strips metadata, normalizes team names, dates, and market terms
 * to produce stable cache keys for equivalent queries.
 */

// --- Telegram metadata stripping (ported from queries.ts cleanContent) ---

function stripTelegramMetadata(text: string): string | null {
  if (!text) return null;
  let t = text.trim();

  if (t.startsWith('System:') || t.startsWith('Pre-compaction') ||
      /^\[(Mon|Tue|Wed|Thu|Fri|Sat|Sun) /.test(t)) {
    return null;
  }
  if (t.startsWith('[media attached')) return null;

  if (t.startsWith('[Queued messages')) {
    const chunks = t.split(/---\s*\nQueued #\d+\s*\n/);
    for (const chunk of chunks.slice(1)) {
      const clean = stripTelegramMetadata(chunk.trim());
      if (clean) return clean;
    }
    return null;
  }

  if (t.startsWith('Conversation info')) {
    const blocks = [...t.matchAll(/```/g)];
    if (blocks.length >= 2) {
      const lastClose = (blocks[blocks.length - 1] as any).index + 3;
      t = t.slice(lastClose).trim();
    }
    if (!t) return null;
  }

  const tgMatch = t.match(/^\[Telegram \S+ id:\S+[^\]]*\]\s*.+?\(\d+\):\s*([\s\S]*)/);
  if (tgMatch) t = tgMatch[1].trim();

  t = t.replace(/\[message_id:\s*\d+\]/g, '').trim();
  t = t.replace(/\[Replying to \S+ id:\d+\]\s*/g, '').trim();
  t = t.split('\n').filter(l => l.trim()).join(' ');

  return t || null;
}

// --- Team name normalization (inline lookup maps) ---
// Covers NBA, NFL, NHL, MLB team aliases -> 3-letter canonical abbreviations

const TEAM_ALIASES: Record<string, string> = {
  // NBA
  'hawks': 'ATL', 'atlanta hawks': 'ATL', 'atlanta': 'ATL',
  'celtics': 'BOS', 'boston celtics': 'BOS', 'boston': 'BOS',
  'nets': 'BKN', 'brooklyn nets': 'BKN', 'brooklyn': 'BKN',
  'hornets': 'CHA', 'charlotte hornets': 'CHA', 'charlotte': 'CHA',
  'bulls': 'CHI', 'chicago bulls': 'CHI',
  'cavaliers': 'CLE', 'cavs': 'CLE', 'cleveland cavaliers': 'CLE', 'cleveland': 'CLE',
  'mavericks': 'DAL', 'mavs': 'DAL', 'dallas mavericks': 'DAL', 'dallas': 'DAL',
  'nuggets': 'DEN', 'denver nuggets': 'DEN', 'denver': 'DEN',
  'pistons': 'DET', 'detroit pistons': 'DET', 'detroit': 'DET',
  'warriors': 'GSW', 'golden state warriors': 'GSW', 'golden state': 'GSW', 'dubs': 'GSW',
  'rockets': 'HOU', 'houston rockets': 'HOU', 'houston': 'HOU',
  'pacers': 'IND', 'indiana pacers': 'IND', 'indiana': 'IND',
  'clippers': 'LAC', 'la clippers': 'LAC',
  'lakers': 'LAL', 'la lakers': 'LAL',
  'grizzlies': 'MEM', 'memphis grizzlies': 'MEM', 'memphis': 'MEM',
  'heat': 'MIA', 'miami heat': 'MIA', 'miami': 'MIA',
  'bucks': 'MIL', 'milwaukee bucks': 'MIL', 'milwaukee': 'MIL',
  'timberwolves': 'MIN', 'wolves': 'MIN', 'minnesota timberwolves': 'MIN', 'minnesota': 'MIN',
  'pelicans': 'NOP', 'new orleans pelicans': 'NOP', 'new orleans': 'NOP',
  'knicks': 'NYK', 'new york knicks': 'NYK',
  'thunder': 'OKC', 'oklahoma city thunder': 'OKC', 'oklahoma city': 'OKC',
  'magic': 'ORL', 'orlando magic': 'ORL', 'orlando': 'ORL',
  '76ers': 'PHI', 'sixers': 'PHI', 'philadelphia 76ers': 'PHI', 'philadelphia': 'PHI', 'philly': 'PHI',
  'suns': 'PHX', 'phoenix suns': 'PHX', 'phoenix': 'PHX',
  'trail blazers': 'POR', 'blazers': 'POR', 'portland trail blazers': 'POR', 'portland': 'POR',
  'kings': 'SAC', 'sacramento kings': 'SAC', 'sacramento': 'SAC',
  'spurs': 'SAS', 'san antonio spurs': 'SAS', 'san antonio': 'SAS',
  'raptors': 'TOR', 'toronto raptors': 'TOR', 'toronto': 'TOR',
  'jazz': 'UTA', 'utah jazz': 'UTA', 'utah': 'UTA',
  'wizards': 'WAS', 'washington wizards': 'WAS', 'washington': 'WAS',
  // NFL
  'cardinals': 'ARI', 'arizona cardinals': 'ARI',
  'falcons': 'ATL', 'atlanta falcons': 'ATL',
  'ravens': 'BAL', 'baltimore ravens': 'BAL', 'baltimore': 'BAL',
  'bills': 'BUF', 'buffalo bills': 'BUF', 'buffalo': 'BUF',
  'panthers': 'CAR', 'carolina panthers': 'CAR', 'carolina': 'CAR',
  'bears': 'CHI', 'chicago bears': 'CHI', 'chicago': 'CHI',
  'bengals': 'CIN', 'cincinnati bengals': 'CIN', 'cincinnati': 'CIN',
  'browns': 'CLE', 'cleveland browns': 'CLE',
  'cowboys': 'DAL', 'dallas cowboys': 'DAL',
  'broncos': 'DEN', 'denver broncos': 'DEN',
  'lions': 'DET', 'detroit lions': 'DET',
  'packers': 'GB', 'green bay packers': 'GB', 'green bay': 'GB',
  'texans': 'HOU', 'houston texans': 'HOU',
  'colts': 'IND', 'indianapolis colts': 'IND', 'indianapolis': 'IND',
  'jaguars': 'JAX', 'jags': 'JAX', 'jacksonville jaguars': 'JAX', 'jacksonville': 'JAX',
  'chiefs': 'KC', 'kansas city chiefs': 'KC', 'kansas city': 'KC',
  'raiders': 'LV', 'las vegas raiders': 'LV', 'las vegas': 'LV',
  'chargers': 'LAC', 'la chargers': 'LAC', 'los angeles chargers': 'LAC',
  'rams': 'LAR', 'la rams': 'LAR', 'los angeles rams': 'LAR',
  'dolphins': 'MIA', 'miami dolphins': 'MIA',
  'vikings': 'MIN', 'minnesota vikings': 'MIN',
  'patriots': 'NE', 'pats': 'NE', 'new england patriots': 'NE', 'new england': 'NE',
  'saints': 'NO', 'new orleans saints': 'NO',
  'giants': 'NYG', 'new york giants': 'NYG',
  'jets': 'NYJ', 'new york jets': 'NYJ',
  'eagles': 'PHI', 'philadelphia eagles': 'PHI',
  'steelers': 'PIT', 'pittsburgh steelers': 'PIT', 'pittsburgh': 'PIT',
  '49ers': 'SF', 'niners': 'SF', 'san francisco 49ers': 'SF', 'san francisco': 'SF',
  'seahawks': 'SEA', 'seattle seahawks': 'SEA', 'seattle': 'SEA',
  'buccaneers': 'TB', 'bucs': 'TB', 'tampa bay buccaneers': 'TB', 'tampa bay': 'TB', 'tampa': 'TB',
  'titans': 'TEN', 'tennessee titans': 'TEN', 'tennessee': 'TEN',
  'commanders': 'WAS', 'washington commanders': 'WAS',
  // NHL
  'bruins': 'BOS', 'boston bruins': 'BOS',
  'sabres': 'BUF', 'buffalo sabres': 'BUF',
  'flames': 'CGY', 'calgary flames': 'CGY', 'calgary': 'CGY',
  'hurricanes': 'CAR', 'carolina hurricanes': 'CAR', 'canes': 'CAR',
  'blackhawks': 'CHI', 'chicago blackhawks': 'CHI',
  'avalanche': 'COL', 'colorado avalanche': 'COL', 'colorado': 'COL', 'avs': 'COL',
  'blue jackets': 'CBJ', 'columbus blue jackets': 'CBJ', 'columbus': 'CBJ',
  'stars': 'DAL', 'dallas stars': 'DAL',
  'red wings': 'DET', 'detroit red wings': 'DET',
  'oilers': 'EDM', 'edmonton oilers': 'EDM', 'edmonton': 'EDM',
  'florida panthers': 'FLA', 'florida': 'FLA',
  'canadiens': 'MTL', 'habs': 'MTL', 'montreal canadiens': 'MTL', 'montreal': 'MTL',
  'predators': 'NSH', 'preds': 'NSH', 'nashville predators': 'NSH', 'nashville': 'NSH',
  'devils': 'NJD', 'new jersey devils': 'NJD', 'new jersey': 'NJD',
  'islanders': 'NYI', 'new york islanders': 'NYI',
  'rangers': 'NYR', 'new york rangers': 'NYR',
  'senators': 'OTT', 'sens': 'OTT', 'ottawa senators': 'OTT', 'ottawa': 'OTT',
  'flyers': 'PHI', 'philadelphia flyers': 'PHI',
  'penguins': 'PIT', 'pens': 'PIT', 'pittsburgh penguins': 'PIT',
  'sharks': 'SJS', 'san jose sharks': 'SJS', 'san jose': 'SJS',
  'kraken': 'SEA', 'seattle kraken': 'SEA',
  'blues': 'STL', 'st louis blues': 'STL', 'st. louis blues': 'STL', 'st louis': 'STL',
  'lightning': 'TBL', 'tampa bay lightning': 'TBL', 'bolts': 'TBL',
  'maple leafs': 'TOR', 'leafs': 'TOR', 'toronto maple leafs': 'TOR',
  'canucks': 'VAN', 'vancouver canucks': 'VAN', 'vancouver': 'VAN',
  'golden knights': 'VGK', 'vegas golden knights': 'VGK', 'vegas': 'VGK', 'knights': 'VGK',
  'capitals': 'WSH', 'caps': 'WSH', 'washington capitals': 'WSH',
  'winnipeg jets': 'WPG', 'winnipeg': 'WPG',
  'wild': 'MIN', 'minnesota wild': 'MIN',
  // MLB
  'diamondbacks': 'ARI', 'dbacks': 'ARI', 'arizona diamondbacks': 'ARI',
  'braves': 'ATL', 'atlanta braves': 'ATL',
  'orioles': 'BAL', 'baltimore orioles': 'BAL',
  'red sox': 'BOS', 'boston red sox': 'BOS',
  'cubs': 'CHC', 'chicago cubs': 'CHC',
  'white sox': 'CWS', 'chicago white sox': 'CWS', 'sox': 'CWS',
  'reds': 'CIN', 'cincinnati reds': 'CIN',
  'guardians': 'CLE', 'cleveland guardians': 'CLE',
  'rockies': 'COL', 'colorado rockies': 'COL',
  'tigers': 'DET', 'detroit tigers': 'DET',
  'astros': 'HOU', 'houston astros': 'HOU',
  'royals': 'KC', 'kansas city royals': 'KC',
  'angels': 'LAA', 'la angels': 'LAA', 'los angeles angels': 'LAA', 'anaheim': 'LAA',
  'dodgers': 'LAD', 'la dodgers': 'LAD', 'los angeles dodgers': 'LAD',
  'marlins': 'MIA', 'miami marlins': 'MIA',
  'brewers': 'MIL', 'milwaukee brewers': 'MIL',
  'twins': 'MIN', 'minnesota twins': 'MIN',
  'mets': 'NYM', 'new york mets': 'NYM',
  'yankees': 'NYY', 'new york yankees': 'NYY', 'yanks': 'NYY',
  'athletics': 'OAK', "a's": 'OAK', 'oakland athletics': 'OAK', 'oakland': 'OAK',
  'phillies': 'PHI', 'philadelphia phillies': 'PHI',
  'pirates': 'PIT', 'pittsburgh pirates': 'PIT',
  'padres': 'SD', 'san diego padres': 'SD', 'san diego': 'SD',
  'mariners': 'SEA', 'seattle mariners': 'SEA',
  'blue jays': 'TOR', 'jays': 'TOR', 'toronto blue jays': 'TOR',
  'nationals': 'WSH', 'nats': 'WSH', 'washington nationals': 'WSH',
  'rays': 'TB', 'tampa bay rays': 'TB',
  'texas rangers': 'TEX', 'texas': 'TEX',
};

// Build sorted keys for multi-word matching (longest first)
const TEAM_KEYS_SORTED = Object.keys(TEAM_ALIASES).sort((a, b) => b.length - a.length);

function normalizeTeamNames(text: string): { text: string; teams: string[] } {
  const teams: string[] = [];
  let result = text;

  for (const alias of TEAM_KEYS_SORTED) {
    const regex = new RegExp(`\\b${alias.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'gi');
    if (regex.test(result)) {
      const abbr = TEAM_ALIASES[alias];
      if (!teams.includes(abbr)) teams.push(abbr);
      result = result.replace(regex, abbr);
    }
  }

  return { text: result, teams };
}

// --- Date normalization ---

function getETDate(): Date {
  const now = new Date();
  const etStr = now.toLocaleDateString('en-US', { timeZone: 'America/New_York' });
  return new Date(etStr);
}

function normalizeDates(text: string): { text: string; gameDate: string | null } {
  const today = getETDate();
  const todayStr = today.toISOString().slice(0, 10);
  const tomorrow = new Date(today);
  tomorrow.setDate(tomorrow.getDate() + 1);
  const tomorrowStr = tomorrow.toISOString().slice(0, 10);
  const yesterday = new Date(today);
  yesterday.setDate(yesterday.getDate() - 1);
  const yesterdayStr = yesterday.toISOString().slice(0, 10);

  let result = text;
  let gameDate: string | null = null;

  // Replace relative date references with canonical dates
  const todayPatterns = /\b(tonight|today|this evening|tonights|todays)\b/gi;
  const tomorrowPatterns = /\b(tomorrow|tomorrows|tmrw|tmw)\b/gi;
  const yesterdayPatterns = /\b(yesterday|yesterdays|last night)\b/gi;

  if (todayPatterns.test(result)) {
    result = result.replace(todayPatterns, todayStr);
    gameDate = todayStr;
  }
  if (tomorrowPatterns.test(result)) {
    result = result.replace(tomorrowPatterns, tomorrowStr);
    gameDate = tomorrowStr;
  }
  if (yesterdayPatterns.test(result)) {
    result = result.replace(yesterdayPatterns, yesterdayStr);
    gameDate = gameDate || yesterdayStr;
  }

  // Extract explicit dates like "2/22" or "feb 22"
  const dateMatch = result.match(/\b(\d{1,2})[\/\-](\d{1,2})\b/);
  if (dateMatch && !gameDate) {
    const month = parseInt(dateMatch[1]);
    const day = parseInt(dateMatch[2]);
    if (month >= 1 && month <= 12 && day >= 1 && day <= 31) {
      gameDate = `${today.getFullYear()}-${String(month).padStart(2, '0')}-${String(day).padStart(2, '0')}`;
    }
  }

  if (!gameDate) gameDate = todayStr; // Default to today for sports queries

  return { text: result, gameDate };
}

// --- Market term canonicalization ---

const MARKET_TERMS: Record<string, string> = {
  'ats': 'spread', 'against the spread': 'spread', 'the line': 'spread', 'line': 'spread', 'point spread': 'spread',
  'o/u': 'total', 'over under': 'total', 'over/under': 'total', 'ou': 'total',
  'ml': 'moneyline', 'money line': 'moneyline',
  'pts': 'points', 'rebounds': 'rebounds', 'rebs': 'rebounds', 'reb': 'rebounds',
  'assists': 'assists', 'ast': 'assists', 'asts': 'assists',
  'threes': '3pm', '3 pointers': '3pm', 'three pointers': '3pm', '3pt': '3pm', '3s': '3pm',
  'steals': 'steals', 'stls': 'steals', 'stl': 'steals',
  'blocks': 'blocks', 'blks': 'blocks', 'blk': 'blocks',
  'tds': 'touchdowns', 'passing yards': 'pass_yards', 'rushing yards': 'rush_yards',
  'receiving yards': 'rec_yards', 'receptions': 'receptions', 'recs': 'receptions',
  'strikeouts': 'strikeouts', 'ks': 'strikeouts',
  'home runs': 'home_runs', 'hrs': 'home_runs', 'hr': 'home_runs',
  'hits': 'hits', 'rbis': 'rbi', 'rbi': 'rbi',
  'goals': 'goals', 'saves': 'saves', 'sog': 'shots_on_goal', 'shots on goal': 'shots_on_goal',
};

const MARKET_KEYS_SORTED = Object.keys(MARKET_TERMS).sort((a, b) => b.length - a.length);

function canonicalizeMarketTerms(text: string): string {
  let result = text;
  for (const term of MARKET_KEYS_SORTED) {
    const regex = new RegExp(`\\b${term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'gi');
    result = result.replace(regex, MARKET_TERMS[term]);
  }
  return result;
}

// --- Filler word removal ---

const FILLER_PATTERNS = [
  /^(hey|hi|hello|yo|sup)\b[,!]?\s*/i,
  /\b(can you|could you|would you|please|pls|plz)\b\s*/gi,
  /\b(tell me|show me|give me|get me|find me|look up|check)\b\s*/gi,
  /\b(what do you think about|what do you think of|what are your thoughts on)\b\s*/gi,
  /\b(i want to know|i need to know|i'm wondering|i was wondering)\b\s*/gi,
  /\b(do you have|do you know)\b\s*/gi,
  /\b(real quick|right now|rn|asap)\b\s*/gi,
  /^(so|ok|okay|alright|well)\b[,]?\s*/i,
  /\b(what are the|what is the|what's the|whats the|what are|what is|whats)\b\s*/gi,
  /\b(how about the|how about|how are the)\b\s*/gi,
  /\b(any|the|on)\b\s*/gi,
  /\b(for)\b\s*/gi,
  /\?+$/,  // trailing question marks
];

function removeFillerWords(text: string): string {
  let result = text;
  for (const pattern of FILLER_PATTERNS) {
    result = result.replace(pattern, ' ');
  }
  result = result.replace(/\s+/g, ' ').trim();
  return result;
}

// --- League detection ---

const LEAGUE_PATTERNS: Record<string, RegExp> = {
  'nba': /\b(nba|basketball)\b/i,
  'nfl': /\b(nfl|football)\b/i,
  'nhl': /\b(nhl|hockey)\b/i,
  'mlb': /\b(mlb|baseball)\b/i,
  'ncaab': /\b(ncaab|ncaa basketball|college basketball|march madness|cbb)\b/i,
  'ncaaf': /\b(ncaaf|ncaa football|college football|cfb)\b/i,
  'epl': /\b(epl|premier league|english premier)\b/i,
  'wnba': /\b(wnba)\b/i,
  'mls': /\b(mls|major league soccer)\b/i,
};

function detectLeague(text: string): string | null {
  for (const [league, pattern] of Object.entries(LEAGUE_PATTERNS)) {
    if (pattern.test(text)) return league;
  }
  return null;
}

// --- Intent classification ---

const INTENT_PATTERNS: Record<string, RegExp> = {
  'props': /\b(prop|props|player prop|over|under|points|rebounds|assists|strikeouts|touchdowns|passing|rushing|receiving|3pm|receptions|goals|saves|shots)\b/i,
  'odds': /\b(odds|spread|moneyline|total|line|lines|money line|point spread|ats|o\/u|ml|juice|vig)\b/i,
  'scores': /\b(score|scores|final|result|results|won|lost|win|lose|beat|beating)\b/i,
  'injury': /\b(injur|injured|injury|out|questionable|doubtful|probable|gtd|game time decision|status|health|hurt)\b/i,
  'analysis': /\b(analysis|analyze|pick|picks|prediction|predictions|predict|best bet|best bets|value|fade|lean|like|lock|parlay|teaser|same game parlay|sgp)\b/i,
};

function classifyIntent(text: string): string {
  for (const [intent, pattern] of Object.entries(INTENT_PATTERNS)) {
    if (pattern.test(text)) return intent;
  }
  return 'general';
}

// --- Main normalization pipeline ---

export interface NormalizedQuery {
  original: string;
  normalized: string;
  cacheKey: string;
  category: string;
  league: string | null;
  teams: string[];
  gameDate: string | null;
  dataVersion: string | null;
}

export function normalizeQuery(rawQuery: string, agent: string, gameState?: string): NormalizedQuery | null {
  // Stage 1: Strip Telegram metadata
  const cleaned = stripTelegramMetadata(rawQuery);
  if (!cleaned) return null;

  // Stage 2: Lowercase + whitespace collapse
  let text = cleaned.toLowerCase().replace(/\s+/g, ' ').trim();

  // Stage 3: Team name normalization
  const teamResult = normalizeTeamNames(text);
  text = teamResult.text;

  // Stage 4: Date normalization
  const dateResult = normalizeDates(text);
  text = dateResult.text;

  // Stage 5: Market term canonicalization
  text = canonicalizeMarketTerms(text);

  // Stage 6: Filler word removal
  text = removeFillerWords(text);

  // Detect league (before filler removal may have stripped some context)
  const league = detectLeague(cleaned.toLowerCase());

  // Stage 7: Intent classification
  const category = classifyIntent(text);

  // Final whitespace cleanup
  text = text.replace(/\s+/g, ' ').trim();

  if (!text || text.length < 3) return null;

  // Stage 8: Cache key generation — includes category + gameDate for scope isolation
  const keyParts = [agent, text, category, dateResult.gameDate || ''];
  if (gameState) keyParts.push(gameState);
  const cacheKey = createHash('sha256').update(keyParts.join(':')).digest('hex');

  // data_version: hash of date + category for freshness tracking
  const dvParts = [dateResult.gameDate || '', category, league || ''];
  const dataVersion = createHash('md5').update(dvParts.join(':')).digest('hex').slice(0, 16);

  return {
    original: rawQuery,
    normalized: text,
    cacheKey,
    category,
    league,
    teams: teamResult.teams,
    gameDate: dateResult.gameDate,
    dataVersion,
  };
}
