
    :jiz-                         d dl Z d dlZd dlZd dlZd dlmZ d dlmZ  ed      Z ej                         dz  dz  dz  Z	de
d	e
d
efdZde
fdZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zedk(  r e        yy)    N)datetime)Pathz)/var/www/html/eventheodds/data/kaggle/ufcz.cache	kagglehubdatasetsdataset_ownerdataset_namereturnc                 F   t         | z  |z  dz  }|j                         syt        |j                         D cg c]C  }|j	                         s|j
                  j                         s/t        |j
                        E c}d      }|r|t        |d         z  S dS c c}w )z+Find the latest version of a Kaggle datasetversionsNT)reverser   )	KAGGLE_CACHEexistssortediterdiris_dirnameisdigitintstr)r   r   basevr   s        9/var/www/html/eventheodds/scripts/normalize_kaggle_ufc.pylatest_version_pathr      s~    -',6CD;;=DLLN^qahhjQVV^^M]s166{^hlmH&.4#hqk""8D8 _s   BB)Bvalc                 .   | sy | j                         } dD ]n  }	 t        j                  | j                  d      d   j                  d      d   |j	                  dd      j	                  dd            }|j                         c S  y # t        $ r Y }w xY w)N)z%Y-%m-%dz%Y/%m/%dz%d/%m/%Yz%Y-%m-%d %H:%M:%Sz%Y-%m-%dT%H:%M:%S%zz%Y-%m-%dT%H:%M:%SZ+r   Zz%z )stripr   strptimesplitreplace	isoformat	Exception)r   fmtdts      r   
parse_dater'      s    
))+Cu 	""399S>!#4#:#:3#?#BCKKPTVXDYDaDabegiDjkB<<>!   		s   A)B	BBc                     | | dk(  ryt        | t        t        f      rt        |       S t        |       j	                  dd      }d| dS )Nr   NULL'z'')
isinstancer   floatr   r"   )r   ss     r   	sql_valuer.   $   sJ    
{cRi#U|$3xCd#Aqc8O    c                 J    	 t        t        |             S # t        $ r Y y w xY wN)r   r,   r$   r   s    r   to_intr3   -   s'    5: s    	""c                 8    	 t        |       S # t        $ r Y y w xY wr1   )r,   r$   r2   s    r   to_floatr5   4   s#    Sz s   
 	c                  d   t         j                  j                  dd      } | r| S d}t         j                  j	                  |      r_t        |dd      5 }|D ]A  }|j                  d      s|j                  dd	      d	   j                         c cd d d        S  	 d d d        yy# 1 sw Y   yxY w)
NSPORTS_DATABASE_URLr   z/var/www/html/eventheodds/.envrutf-8encodingzSPORTS_DATABASE_URL==   )	osenvirongetpathr   open
startswithr!   r   )db_urlenv_pathflines       r   load_env_db_urlrH   ;   s    ZZ^^126F/H	ww~~h(C'2 	9a 9??#9:::c1-a06688	9 	99	9 		9 s   B&,$B&B&&B/c                    t               }d|v r|j                  d      d   n|}|st        d      d}t        |dd      5 }|j	                  |        d d d        t        j                  d|d	|gt
        j                  t
        j                  d
d      }|j                  dk7  r/|j                  xs dj                         }t        d|d d        y # 1 sw Y   xY w)Nz?schema=r   zSPORTS_DATABASE_URL not setz/tmp/ufc_kaggle_batch.sqlwr9   r:   psqlz-fTi,  )stdoutstderrtexttimeoutr   zpsql failed:   )rH   r!   RuntimeErrorrB   write
subprocessrunPIPE
returncoderM   r   )sqlrD   tmp_pathrF   resulterrs         r   run_sqlr[   H   s    F,6&,@V\\*%a(fF899*H	hg	. !	^^	x(F A}}"))+]3t9+677  s   CCc           	          |sy dj                  fd|D              }dj                  d D              }d|  d| d| d| d		}t        |       t        d
t        |       d|         y )Nz,
c              3   `   K   | ]$  d dj                  fdD              z   dz    & yw)(, c              3   R   K   | ]  }t        j                  |               y wr1   )r.   r@   ).0colrows     r   	<genexpr>z)batch_insert.<locals>.<genexpr>.<genexpr>`   s     CC	#''#,/Cs   $')N)join)ra   rc   columnss    @r   rd   zbatch_insert.<locals>.<genexpr>_   s2       	diiC7CCCcIs   *.r_   c              3   (   K   | ]
  }d | d   yw)"N )ra   cs     r   rd   zbatch_insert.<locals>.<genexpr>c   s     3a1QCq3s   zINSERT INTO "z" (z	) VALUES

;z[ufc] Inserted z rows into )rf   r[   printlen)tablerg   
batch_rowsconflict_clause
values_sqlcols_sqlrW   s    `     r   batch_insertru   \   s}      J yy3733H%H:Z
|2oEVVW
XCCL	OC
O,Kw
?@r/   c                     t        dd      } | st        d       y | dz  dz  }|j                         st        d|        y t        d|        g }t        |dd	
      5 }t	        j
                  |      }|D ]v  }|j                  d      }|s|j                  ||j                  d      t        |j                  d            |j                  d      t        j                  |      d       x 	 d d d        t        dt        |              t        dt        |      d      D ]  }|||dz    }t        dg d|d        y # 1 sw Y   TxY w)N
aminealibi&ufc-fights-fighters-and-events-datasetz[ufc] Events dataset not founddataz
Events.csvz[ufc] Events.csv not found at z[ufc] events_csv=r8   r9   r:   Event_IdNameDateLocation)eventIdr   datelocationrawz[ufc] events rows=r   rP   UfcEventzON CONFLICT ("eventId") DO UPDATE SET name=EXCLUDED.name, date=EXCLUDED.date, location=EXCLUDED.location, raw=EXCLUDED.raw, "updatedAt"=NOW())r   rn   r   rB   csv
DictReaderr@   appendr'   jsondumpsro   rangeru   )	rA   csv_pathrowsrF   readerr8   event_idibatchs	            r   ingest_eventsr   i   sP   |-UVD./f}|+H??.xj9:	hZ
()D	hg	. !" 
	AuuZ(HKK#f"155=1EE*-zz!} 	
	 
s4yk
*+1c$i% 
Qqu: \		

 s    BEEc                     t        dd      } | st        d       y | dz  dz  }|j                         st        d|        y t        d|        g }t        |dd	
      5 }t	        j
                  |      }|D ]  }|j                  d      }|s|j                  dd      }d }|r[	 d|v rV|j                  dd      j                  d      }	t        |	d         dz  t        |	      dkD  r|	d   rt        |	d         ndz   }|j                  ||j                  d      |j                  d      dk7  r|j                  d      nd |t        |j                  d            t        |j                  d            |j                  d      t        |j                  d            t        |j                  d            t        |j                  d            |j                  d      dk(  r|j                  d      nd t        j                  |      d        	 d d d        t        dt        |              t!        dt        |      d       D ]  }
||
|
d z    }t#        d!g d|d"        y #  Y mxY w# 1 sw Y   \xY w)#Nrw   rx   z [ufc] Fighters dataset not foundry   zFighters.csvz [ufc] Fighters.csv not found at z[ufc] fighters_csv=r8   r9   r:   
Fighter_IdzHt.r   r*   ri   r      r=   z	Full NameNicknamezNo NicknamezWt.ReachStanceWLDBeltTrue)	fighterIdr   nicknameheightInweightLbreachInstancewinslossesdrawsbeltr   z[ufc] fighters rows=rP   
UfcFighteraA  ON CONFLICT ("fighterId") DO UPDATE SET name=EXCLUDED.name, nickname=EXCLUDED.nickname, heightIn=EXCLUDED."heightIn", weightLb=EXCLUDED."weightLb", reachIn=EXCLUDED."reachIn", stance=EXCLUDED.stance, wins=EXCLUDED.wins, losses=EXCLUDED.losses, draws=EXCLUDED.draws, belt=EXCLUDED.belt, raw=EXCLUDED.raw, "updatedAt"=NOW())r   rn   r   rB   r   r   r@   r"   r!   r   ro   r   r5   r3   r   r   r   ru   )rA   r   r   rF   r   r8   
fighter_id
height_str	height_inpartsr   r   s               r   ingest_fightersr      sV   |-UVD01f}~-H??0
;<	z
*+D	hg	. !" 	A|,Jub)JIj( * 2 23 ; A A# F$'aMB$63u:XY>^cde^f#eAh-lm$n	 KK'k*12z1Bm1SAEE*-Y]%$QUU5\2#AEE'N3%%/quuSz* s,c
+)*v&)@fdzz!} 	@ 
 T
,-1c$i% 
Qqu E P		

' s'    AI1'AI)DI1)I.+I11I:c                     t        dd      } | st        d       y | dz  dz  }|j                         st        d|        y t        d|        g }t        |dd	
      5 }t	        j
                  |      }|D ]  }|j                  d      }|s|j                  ||j                  d      |j                  d      |j                  d      |j                  d      |j                  d      |j                  d      |j                  d      t        |j                  d            |j                  d      |j                  d      |j                  d      |j                  d      |j                  d      t        j                  |      d        	 d d d        t        dt        |              t        dt        |      d      D ]  }|||dz    }t        dg d|d        y # 1 sw Y   TxY w)Nrw   rx   z[ufc] Fights dataset not foundry   z
Fights.csvz[ufc] Fights.csv not found at z[ufc] fights_csv=r8   r9   r:   Fight_Idrz   Fighter_Id_1Fighter_Id_2	Fighter_1	Fighter_2Weight_ClassMethodRound
Fight_TimeResult_1Result_2RefereezMethod Details)fightIdr~   
fighter1Id
fighter2Idfighter1Namefighter2NameweightClassmethodround	fightTimeresult1result2refereemethodDetailsr   z[ufc] fights rows=r   rP   UfcFighta  ON CONFLICT ("fightId") DO UPDATE SET "eventId"=EXCLUDED."eventId", "fighter1Id"=EXCLUDED."fighter1Id", "fighter2Id"=EXCLUDED."fighter2Id", "fighter1Name"=EXCLUDED."fighter1Name", "fighter2Name"=EXCLUDED."fighter2Name", "weightClass"=EXCLUDED."weightClass", method=EXCLUDED.method, round=EXCLUDED.round, "fightTime"=EXCLUDED."fightTime", result1=EXCLUDED.result1, result2=EXCLUDED.result2, referee=EXCLUDED.referee, "methodDetails"=EXCLUDED."methodDetails", raw=EXCLUDED.raw, "updatedAt"=NOW())r   rn   r   rB   r   r   r@   r   r3   r   r   ro   r   ru   )	rA   r   r   rF   r   r8   fight_idr   r   s	            r   ingest_fightsr      s   |-UVD./f}|+H??.xj9:	hZ
()D	hg	. !" 	AuuZ(HKK#55,eeN3eeN3 !k 2 !k 2 uu^4%%/g/UU<055,55,55+!"'7!8zz!} 		0 
s4yk
*+1c$i% 
Qqu G |		

3 s    D3G''G0c                     t        dd      } | st        d       y | dz  }|j                         st        d|        y t        d|        g }t        |dd	      5 }t	        j
                  |      }|D ]  }|j                  d
      }|j                  d      }t        |j                  d            }|j                  d      }	|j                  d      xs d}
|r|sk|j                  i d|j                  d      xs d d|d|dt        |j                  d            dt        |j                  d            dt        |j                  d            dt        |j                  d            dt        |j                  d            dt        |j                  d            d t        |j                  d!            d"t        |j                  d#            d$|d%t        |j                  d&            d|	d|
d't        j                  |              	 d d d        t        d(t        |              t        d)t        |      d*      D ]  }|||d*z    }t        d+g d,|d-        y # 1 sw Y   TxY w).Njerzyszocikzufc-betting-odds-daily-datasetz[ufc] Odds dataset not foundzUFC_betting_odds.csvz([ufc] UFC_betting_odds.csv not found at z[ufc] odds_csv=r8   r9   r:   	fighter_1	fighter_2
event_datesourceregionunknownfightUrl	fight_urlfighter1fighter2odds1odds_1odds2odds_2f1KoOdds
f1_ko_oddsf2KoOdds
f2_ko_odds	f1SubOddsf1_sub_odds	f2SubOddsf2_sub_odds	f1DecOddsf1_dec_odds	f2DecOddsf2_dec_odds	eventDate
addingDateadding_dater   z[ufc] odds rows=r   rP   UfcOdds)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   a  ON CONFLICT (fighter1, fighter2, "eventDate", source, region) DO UPDATE SET "fightUrl"=EXCLUDED."fightUrl", odds1=EXCLUDED.odds1, odds2=EXCLUDED.odds2, "f1KoOdds"=EXCLUDED."f1KoOdds", "f2KoOdds"=EXCLUDED."f2KoOdds", "f1SubOdds"=EXCLUDED."f1SubOdds", "f2SubOdds"=EXCLUDED."f2SubOdds", "f1DecOdds"=EXCLUDED."f1DecOdds", "f2DecOdds"=EXCLUDED."f2DecOdds", "addingDate"=EXCLUDED."addingDate", raw=EXCLUDED.raw, "updatedAt"=NOW())r   rn   r   rB   r   r   r@   r'   r   r5   r   r   ro   r   ru   )rA   r   r   rF   r   r8   r   r   r   r   r   r   r   s                r   ingest_oddsr      s   }.NOD,-,,H??8
CD	OH:
&'D	hg	. !" 	Auu[)Huu[)H#AEE,$78JUU8_FUU8_1	F8KK AEE+.6$H H !%%/2	
 !%%/2 HQUU<%89 HQUU<%89 XaeeM&:; XaeeM&:; XaeeM&:; XaeeM&:; Z j})=> & &  tzz!}! 	> 
SYK
()1c$i% 
Qqu E v		

A s   GI66I?c                      t        d       t                t                t                t	                t        d       y )Nz*[ufc] Starting UFC Kaggle normalization...z [ufc] UFC normalization complete)rn   r   r   r   r   rj   r/   r   mainr   *  s)    	
67OOM	
,-r/   __main__)r   r   r>   rS   r   pathlibr   BASE_DIRhomer   r   r   r'   r.   r3   r5   rH   r[   ru   r   r   r   r   r   __name__rj   r/   r   <module>r      s    
  	    ;<tyy{X%3j@9s 9# 9$ 9
C 

8(
A"
J4
n,
^3
l. zF r/   