
    ]i                     \   d Z ddlZddlZddlmZ ddlmZmZmZ  ed      Z	e	dz  dz  Z
e	dz  d	z  Zi d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9Zd:ed;efd<Zd;ee   fd=Zd;ee   fd>Zd?ed;ee   fd@ZdA ZedBk(  r e        yy)Ca  Build/merge betting/mlb_historical.json from Kaggle oddsDataMLB.csv.

Source:
  data/kaggle/oddsDataMLB.csv

Important:
- Kaggle file is team-perspective, not explicit home/away.
- To safely ingest without guessing venue, we create ONE canonical row per game:
  key = (date, min(team_code, opponent_code), max(team_code, opponent_code))
  and keep the Kaggle row where team_code == min(team_code, opponent_code).

This yields consistent, non-duplicated historical games suitable for most
odds-based strategies (favorite/underdog, spreads, totals), but "home" is
an arbitrary orientation, not true venue.

We merge into existing betting/mlb_historical.json by (date, homeTeam, awayTeam)
(normalized), filling missing odds fields; and we ADD new Kaggle historical games.
    N)Path)DictTupleOptionalz/var/www/html/eventheodds/datakagglezoddsDataMLB.csvbettingzmlb_historical.jsonARIzArizona DiamondbacksATLzAtlanta BravesBALzBaltimore OriolesBOSzBoston Red SoxCHCzChicago CubsCWSzChicago White SoxCINzCincinnati RedsCLEzCleveland GuardiansCOLzColorado RockiesDETzDetroit TigersHOUzHouston AstrosKCzKansas City RoyalsLAAzLos Angeles AngelsLADzLos Angeles DodgersMIAzMiami MarlinsMILzMilwaukee BrewersMINzMinnesota TwinszNew York MetszNew York YankeeszOakland AthleticszPhiladelphia PhillieszPittsburgh PirateszSan Diego PadreszSeattle MarinerszSan Francisco GiantszSt. Louis CardinalszTampa Bay RayszTexas RangerszToronto Blue JayszWashington Nationals)NYMNYYOAKPHIPITSDSEASFSTLTBTEXTORWSHsreturnc                     dj                  | xs dj                         j                         j                               S )N  )joinstriplowersplit)r'   s    E/var/www/html/eventheodds/scripts/build_mlb_historical_from_kaggle.pynormr1   @   s.    88QW"OO%++-33566    c                     | y t        |       j                         }|dk(  s|j                         dv ry 	 t        t	        |            S # t
        $ r Y y w xY wNr+   )nanan)strr-   r.   intfloat	Exceptionxr'   s     r0   to_intr=   D   sU    yAABw!'')|+58} s   A
 
	AAc                     | y t        |       j                         }|dk(  s|j                         dv ry 	 t        |      S # t        $ r Y y w xY wr4   )r7   r-   r.   r9   r:   r;   s     r0   to_floatr?   P   sP    yAABw!'')|+Qx s   
A 	AArowc                    | j                  d      xs dj                         }t        | j                  d            xs |rt        |d d       nd }| j                  d      xs dj                         }| j                  d      xs dj                         }|r|r|r|sy |}|}t        j                  ||      }t        j                  ||      }t        | j                  d            }	t        | j                  d            }
|	|
y t        | j                  d	            }t        | j                  d
            }t        | j                  d            }t        | j                  d            }t        | j                  d            }t        d |||||fD              }|	|
kD  rd}n
|
|	kD  rd}nd}d }||	|
z
  }||z   }|dkD  rd}n
|dk  rd}nd}d }||	|
z   }||kD  rd}n
||k  rd}nd}d| dt        |      j                  dd       dt        |      j                  dd       }|d d|||||	|
d|||||ddt        |      ||||	|
z
  |	|
z   dd d!id"S )#Ndater+   season   teamopponentrunsoppRuns	moneyLineoppMoneyLinerunLine
oppRunLinetotalc              3   $   K   | ]  }|d u 
 y wN ).0vs     r0   	<genexpr>z&build_game_from_row.<locals>.<genexpr>u   s     ^Q1D=^s   homeawaydrawr   pushoverunderzkaggle-mlb-neutral--r*   _z-at-mlb)	homeScore	awayScorekaggle_neutral)moneylineHomemoneylineAway
spreadHome
spreadAway	totalLinesource)winnerspreadCoveredtotalResultmargintotalPointsnotezVKaggle MLB oddsDataMLB ingested with neutral orientation; home/away is not true venue.)idbdl_game_idsportrB   rC   homeTeamawayTeamscoresoddshasRealOddsresultmeta)
getr-   r=   r8   CODE_TO_NAMEr?   anyr1   replacebool)r@   rB   rC   rE   opp	home_code	away_coderT   rU   shsaml_homeml_awayspread_homespread_awayrM   has_realrf   spread_coveredri   adjtotal_resulttpgids                           r0   build_game_from_rowr   \   s   GGFO!r((*DCGGH%&KD3tBQx=dFGGFO!r((*D77:$"
+
+
-Ct3fIIIy1DIy1D		 B		"	#B	zRZSWW[)*GSWW^,-G3779-.K377<01KSWWW%&E^7G[+W\*]^^H 
Bw	bNb{"7#N1W#N#NL"W:!L%Z"L!LvQtDz'9'9#c'B&C4T
HZHZ[^_bHcGd
eC  "4$$%%&
 H~+'2g7
 l
3 r2   c                     t         j                         st        dt                g } t        j                         r$	 t	        j
                  t        t        d            } i }| D ]c  }|j                  d      xs dt        |j                  dd            t        |j                  dd            f}|d   sS|d   sY|d	   s_|||<   e d}d}d}i }t        t         dd
      5 }t        j                  |      }	|	D ]  }
|
j                  d      xs dj                         }|
j                  d      xs dj                         }|
j                  d      xs dj                         }|r|r|sst        ||g      \  }}|||f}||k7  r|
||<    	 d d d        |j                         D ]I  \  \  }}}}
t        |
      }|s|j                  d      xs dt        |j                  dd            t        |j                  dd            f}||v r||   }|j                  d      xs i |j                  d      xs i }dD ]:  }j                  |      |j                  |      '|j                  |      |<   < j                  d      dv r|j                  dd      d<   |d<   t!        |j                  d      xs t#        fddD                    |d<   |dz  }n
|||<   |dz  }|dz  }L t%        |j'                               }|j)                  d        t        j*                  j-                  dd       t	        j.                  |t        t        d      d	       t1        d |D              }t3        dt                t3        dt5        |        d| d | d!|        t3        d"t5        |       d#| d$|r|t5        |      z  d%z  ndd&d'       y # t        $ r g } Y w xY w# 1 sw Y   cxY w)(NzMissing rrB   r+   ro   rp   r         zutf-8)newlineencodingrE   rF   rr   )r`   ra   rb   rc   rd   re   )Nestimated_DO_NOT_BACKTESTr_   rs   c              3   D   K   | ]  }j                  |      d u  y wrO   rv   )rQ   k2cur_oddss     r0   rS   zmain.<locals>.<genexpr>   s*       DgegHLLQSDT\`D`  Dgs    )r`   ra   rb   rd   c                 &    | j                  dd      S )NrB   r+   r   )r<   s    r0   <lambda>zmain.<locals>.<lambda>   s    155+ r2   )keyT)parentsexist_okw)indentc              3   D   K   | ]  }|j                  d       sd  yw)rs   r   Nr   )rQ   gs     r0   rS   zmain.<locals>.<genexpr>   s     6Q}!5q6s     zMerged MLB Kaggle into z
Existing: z | Kaggle canonical games: z
 | Added: z | Updated: zTotal: z | hasRealOdds: z (d   z.1fz%))SRCexists
SystemExitOUTjsonloadopenr:   rv   r1   csv
DictReaderr-   sorteditemsr   rz   rx   listvaluessortparentmkdirdumpsumprintlen)existingidxr   r   addedupdatedkeptseenfr   r@   rB   rE   r{   abkreccurnew_oddsr   outrealr   s                          @r0   mainr      s   ::<8C5)**H
zz|	yyc30H
 -/C uuV}"Dz")=$>QUU:VXEY@Z[q6c!fQCH
 EG D-/D	c2	0 ANN1 	CGGFO)r002DGGFO)r002D77:&,"335Ct34+&DAqq!AqyDG	 "ZZ\ q!c!#&wwv$"d377:b+A&BDQ[\^I_D`a#:c(Cwwv,"Hwwv,"H] 4<<#+R0@0L#+<<#3HRL4 ||H%)LL%-\\(;K%L""CK!%cggm&<  'g  Dg  lf  Dg  Ag  "hCqLGCHQJE	+. szz|
CHH+H,JJTD1IIc4S>!,6#66D	#C5
)*	Js8}o%@jQVPWWcdkcl
mn	GCH:-dV2StCH}S7HVWX[5\\^
_`A  	H	" s   #O# &B-O5#O21O25O?__main__)__doc__r   r   pathlibr   typingr   r   r   BASEr   r   rw   r7   r1   r8   r=   r9   r?   dictr   r   __name__rP   r2   r0   <module>r      s  &    ( (,-
Xo))
Y..	!	 
 
	
 
> 
 
 
  
 
 
 	
 
 
  
?  
!" 
#$ "! !=D7C 7C 7	# 		8E? 	VT Vhtn VrHaV zF r2   