// CLE QA Validator — Scans user-facing strings for banned terminology.
// Usage: npx tsx scripts/cle-qa-check.ts
// Scans src/ and backend/src/ for: "betting line(s)", "bettor(s)", "sportsbook(s)", "betting"

import fs from 'fs';
import path from 'path';

const ROOT = path.resolve(__dirname, '..');

const SCAN_PATTERNS = [
  'src/**/*.tsx',
  'src/**/*.ts',
  'src/lib/**/*.ts',
];

const BACKEND_PATTERNS = [
  'backend/src/services/grok.ts',
  'backend/src/services/blog-generator.ts',
  'backend/src/routes/forecast.ts',
  'backend/src/workers/daily-forecasts.ts',
  'backend/src/workers/backfill-odds.ts',
];

interface Violation {
  file: string;
  line: number;
  term: string;
  text: string;
  severity: 'error' | 'warning';
  suggestion: string;
}

// Terms that are always banned in user-facing strings
const BANNED_TERMS: Array<{ pattern: RegExp; term: string; suggestion: string }> = [
  { pattern: /betting\s+lines?/gi, term: 'betting line(s)', suggestion: 'current market(s)' },
  { pattern: /\bbettors?\b/gi, term: 'bettor(s)', suggestion: 'market speculator(s)' },
];

// Terms that are flagged as warnings (contextual)
const WARN_TERMS: Array<{ pattern: RegExp; term: string; suggestion: string }> = [
  { pattern: /\bsportsbooks?\b/gi, term: 'sportsbook(s)', suggestion: 'market / market venues' },
  { pattern: /\bbetting\b/gi, term: 'betting', suggestion: 'investing / market interest / positioning' },
];

// Lines matching these patterns are whitelisted (CLE instructions, comments about rules)
const WHITELIST_PATTERNS = [
  /AVOID|NEVER USE|LANGUAGE RULE|AVOID or minimize|NEVER include/i,
  /profile\.\w+\s*=|answers\.\w+/,  // survey/user data fields
  /eslint-disable/,
  /\/\/ Sportsbook$/,  // survey section header comment
  /\/\/ .* Betting Routine/,  // survey section header comment
  /bet_style|risk_tolerance|bankroll_band|'sportsbook'/,  // DB field names
  /queries\.push\(/,  // Twitter search queries (must use real terms to find tweets)
  /engaging with sports/,  // Twitter worker description
  /Searches for sports forecast/,  // Twitter engagement description
  /Seed today.*sportsbook/,  // Seed worker comment
  /instead of "betting/,  // CLE instruction examples
  /instead of "betting signals"/,  // CLE instruction examples
  /Always use ".*" for .* references/,  // CLE LANGUAGE RULE instructions
];

function globFiles(baseDir: string, patterns: string[]): string[] {
  const files: string[] = [];

  function walkDir(dir: string) {
    for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
      const fullPath = path.join(dir, entry.name);
      if (entry.isDirectory()) {
        if (entry.name === 'node_modules' || entry.name === '.next' || entry.name === 'dist') continue;
        walkDir(fullPath);
      } else if (entry.isFile() && (entry.name.endsWith('.ts') || entry.name.endsWith('.tsx'))) {
        files.push(fullPath);
      }
    }
  }

  // For simplicity, walk the whole tree and filter
  walkDir(baseDir);
  return files;
}

function scanFile(filePath: string): Violation[] {
  const violations: Violation[] = [];
  const content = fs.readFileSync(filePath, 'utf-8');
  const lines = content.split('\n');
  const relPath = path.relative(ROOT, filePath);

  // Skip dist/ files
  if (relPath.includes('/dist/')) return violations;

  lines.forEach((line, idx) => {
    const lineNum = idx + 1;

    // Skip whitelisted lines
    if (WHITELIST_PATTERNS.some(wp => wp.test(line))) return;

    // Check banned terms (always error)
    for (const bt of BANNED_TERMS) {
      if (bt.pattern.test(line)) {
        // Skip if it's "moneyline" not "betting line"
        if (bt.term === 'betting line(s)' && /moneyline/i.test(line)) {
          // Only flag if there's also "betting line" separate from moneyline
          const stripped = line.replace(/moneyline/gi, '');
          if (!bt.pattern.test(stripped)) continue;
        }
        violations.push({
          file: relPath,
          line: lineNum,
          term: bt.term,
          text: line.trim().substring(0, 120),
          severity: 'error',
          suggestion: bt.suggestion,
        });
      }
    }

    // Check warning terms
    for (const wt of WARN_TERMS) {
      if (wt.pattern.test(line)) {
        // Skip if "betting" is part of "betting line(s)" — already caught above
        if (wt.term === 'betting' && /betting\s+lines?/i.test(line)) continue;
        // Skip "moneyline"
        if (/moneyline/i.test(line) && wt.term === 'betting') continue;

        violations.push({
          file: relPath,
          line: lineNum,
          term: wt.term,
          text: line.trim().substring(0, 120),
          severity: 'warning',
          suggestion: wt.suggestion,
        });
      }
    }
  });

  return violations;
}

// Main
const allFiles = globFiles(path.join(ROOT, 'src'), [])
  .concat(globFiles(path.join(ROOT, 'backend/src'), []));

let totalErrors = 0;
let totalWarnings = 0;
const allViolations: Violation[] = [];

for (const file of allFiles) {
  const violations = scanFile(file);
  allViolations.push(...violations);
  totalErrors += violations.filter(v => v.severity === 'error').length;
  totalWarnings += violations.filter(v => v.severity === 'warning').length;
}

// Output
console.log('\n=== CLE QA Check ===\n');

if (allViolations.length === 0) {
  console.log('PASS: No CLE violations found.\n');
} else {
  const errors = allViolations.filter(v => v.severity === 'error');
  const warnings = allViolations.filter(v => v.severity === 'warning');

  if (errors.length > 0) {
    console.log(`ERRORS (${errors.length}):`);
    for (const v of errors) {
      console.log(`  ${v.file}:${v.line}  "${v.term}" → use "${v.suggestion}"`);
      console.log(`    ${v.text}`);
    }
    console.log();
  }

  if (warnings.length > 0) {
    console.log(`WARNINGS (${warnings.length}):`);
    for (const v of warnings) {
      console.log(`  ${v.file}:${v.line}  "${v.term}" → consider "${v.suggestion}"`);
      console.log(`    ${v.text}`);
    }
    console.log();
  }

  console.log(`Summary: ${totalErrors} error(s), ${totalWarnings} warning(s)`);
}

process.exit(totalErrors > 0 ? 1 : 0);