
    qiG                         d Z ddlZddlmZ ddlmZ ddlmZ ddlmZm	Z	m
Z
mZ ddlZddlZddlZddlmZ e G d d	             Ze G d
 d             Z G d d      Z G d d      Z	 	 	 	 ddededededef
dZy)zI
Data loader for historical OHLCV and tick data from CSV and JSON files.
    N)	dataclass)datetime)Path)ListOptionalIterator	Generator)DATE_FORMATc                   z    e Zd ZU dZeed<   eed<   eed<   eed<   eed<   eed<   edefd	       Zedefd
       Z	y)OHLCVzSingle OHLCV candle data.	timestampopenhighlowclosevolumereturnc                     | j                   S )z+Simulated bid price (close - small offset).r   selfs    -/var/www/html/crpytotradingbot/data_loader.pybidz	OHLCV.bid   s     zz    c                 >    | j                   dz  }| j                   |z   S )zCSimulated ask price (close + small offset based on typical spread).-C6?r   )r   spreads     r   askz	OHLCV.ask!   s!     f$zzF""r   N)
__name__
__module____qualname____doc__r   __annotations__floatpropertyr   r    r   r   r   r      sX    #
K
K	JLMU   #U # #r   r   c                   x    e Zd ZU dZeed<   eed<   eed<   dZeed<   dZeed<   e	defd	       Z
e	defd
       Zy)TickDataz&Tick data for live trading simulation.r   r   r           
bid_volume
ask_volumer   c                 :    | j                   | j                  z   dz  S )zMid price between bid and ask.   )r   r   r   s    r   midzTickData.mid1   s     488#q((r   c                 4    | j                   | j                  z
  S )zSpread between bid and ask.)r   r   r   s    r   r   zTickData.spread6   s     xx$((""r   N)r   r    r!   r"   r   r#   r$   r*   r+   r%   r.   r   r&   r   r   r(   r(   (   s]    0	J	JJJ)U ) ) # # #r   r(   c                      e Zd ZdZddefdZ	 	 	 	 	 	 	 ddedee   deded	ed
edededd fdZdede	fdZ
	 	 ddedee   deeddf   fdZ	 d dedeee   ddf   fdZdedee   fdZdefdZdefdZdefdZdefdZdedefdZy)!TickDataLoaderzs
    Load and process tick data from CSV files.

    Optimized for large files with streaming/chunked loading.
    symbolc                 <    || _         d| _        d| _        d| _        y)ze
        Initialize tick data loader.

        Args:
            symbol: Trading pair symbol
        Nr   )r2   	file_path
total_rows_dfr   r2   s     r   __init__zTickDataLoader.__init__C   s      (, +/r   Nr4   max_rows	skip_rowstimestamp_colbid_colask_colbid_vol_colask_vol_colr   c	                    || _         ||dkD  rt        d|dz         nd|d}	|	j                         D 
ci c]  \  }
}|	|
| }	}
}t        d| d       t	        j
                  di |	}|| _        || _        || _        || _	        || _
        ||   j                  | j                        |d<   || _        t        |      | _        t        d| j                  d	d
       | S c c}}
w )a  
        Load tick data from CSV file.

        Args:
            file_path: Path to CSV file
            max_rows: Maximum rows to load (None for all)
            skip_rows: Number of rows to skip from start
            timestamp_col: Timestamp column name
            bid_col: Bid price column name
            ask_col: Ask price column name
            bid_vol_col: Bid volume column name
            ask_vol_col: Ask volume column name

        Returns:
            Self for chaining
        r      N)filepath_or_bufferskiprowsnrowszLoading tick data from z...parsed_timestampzLoaded ,z ticksr&   )r4   rangeitemsprintpdread_csv_timestamp_col_bid_col_ask_col_bid_vol_col_ask_vol_colapply_parse_timestampr6   lenr5   )r   r4   r9   r:   r;   r<   r=   r>   r?   read_paramskvdfs                r   load_csvzTickDataLoader.load_csvO   s    6 # #,3<q=aQ/d
 )4(9(9(;M1q}q!tMM'	{#67[[';' ,'' "$M!2!8!89N9N!Ob'*&12) Ns   
C# C#ts_strc           
      @   	 t        j                  dt        |            }|r|j                  d      }t	        |j                  d            }t	        |j                  d            }t	        |j                  d            }t	        |j                  d            }t	        |dd       }t	        |dd       }	t	        |dd	       }
t        ||	|
||||d
z        S t        j                  |      j                         S # t        $ r t        j                         cY S w xY w)z:Parse timestamp in format 'YYYYMMDD HH:MM:SS:milliseconds'z'(\d{8})\s+(\d{2}):(\d{2}):(\d{2}):(\d+)rA   r-            N        )rematchstrgroupintr   rJ   to_datetimeto_pydatetime	Exceptionnow)r   rY   rb   	date_parthourminutesecondmsyearmonthdays              r   rR   zTickDataLoader._parse_timestamp   s    	"HHGVUE!KKN	5;;q>*U[[^,U[[^,Q(9Ra=)IaN+)Aa.)eS$T	RR ~~f-;;== 	"<<>!	"s   CC= "C= =DD	start_idxend_idxc              #   P  K   | j                   y||nt        | j                         }t        |t        |t        | j                                     D ]  }| j                   j                  |   }t        |d   t        || j                           t        || j                           | j                  | j                   j                  v rt        || j                           nd| j                  | j                   j                  v rt        || j                           nd        yw)z
        Iterate over ticks.

        Args:
            start_idx: Starting index
            end_idx: Ending index (None for all)

        Yields:
            TickData objects
        NrE   r)   r   r   r   r*   r+   )r6   rS   rG   minilocr(   r$   rM   rN   rO   columnsrP   )r   rr   rs   endidxrows         r   iterate_tickszTickDataLoader.iterate_ticks   s      88 ,g#dhh-CS]$;< 	C((--$C01#dmm,-#dmm,-<@<M<MQUQYQYQaQa<a5T%6%6!78gj<@<M<MQUQYQYQaQa<a5T%6%6!78gj 	s   D$D&
chunk_sizec              #     K   | j                   yt        dt        | j                         |      D ]  }t        ||z   t        | j                               }g }t        ||      D ]  }| j                   j                  |   }|j                  t        |d   t        || j                           t        || j                           | j                  | j                   j                  v rt        || j                           nd| j                  | j                   j                  v rt        || j                           nd              |  yw)z
        Iterate over ticks in chunks for memory efficiency.

        Args:
            chunk_size: Number of ticks per chunk

        Yields:
            Lists of TickData objects
        Nr   rE   r)   ru   )r6   rG   rS   rv   rw   appendr(   r$   rM   rN   rO   rx   rP   )r   r}   startry   chunkrz   r{   s          r   iterate_ticks_chunkedz$TickDataLoader.iterate_ticks_chunked   s      881c$((mZ8 	Eej(#dhh-8CEUC( hhmmC(X!"45c$--01c$--01@D@Q@QUYU]U]UeUe@euS):):%;<kn@D@Q@QUYU]U]UeUe@euS):):%;<kn  K	s   E
Eindexc           
         | j                   |t        | j                         k\  ry| j                   j                  |   }t        |d   t	        || j
                           t	        || j                           | j                  | j                   j                  v rt	        || j                           nd| j                  | j                   j                  v rt	        || j                                 S d      S )zGet single tick by index.NrE   r)   ru   )
r6   rS   rw   r(   r$   rM   rN   rO   rx   rP   )r   r   r{   s      r   get_tickzTickDataLoader.get_tick   s    88uDHH5hhmmE",-c$--()c$--()8<8I8ITXXM]M]8]uS!2!234cf8<8I8ITXXM]M]8]uS!2!234
 	

 dg
 	
r   c                    | j                   yt        | j                   | j                     j                         | j                   | j                     j                               }t	        | j                   | j                     j	                         | j                   | j                     j	                               }t        |      t        |      fS )#Get min and max prices in the data.r)   r)   )r6   rv   rM   rN   maxr$   r   	min_price	max_prices      r   get_price_rangezTickDataLoader.get_price_range   s    88/335txx7N7R7R7TU	/335txx7N7R7R7TU	i %	"233r   c                     | j                   t        | j                         dk(  ry| j                   d   j                  d   | j                   d   j                  d   fS )$Get start and end dates of the data.r   NNrE   )r6   rS   rw   r   s    r   get_date_rangezTickDataLoader.get_date_range   sZ    88s488}1 HH'(--a0HH'(--b1
 	
r   c                 J   | j                   i S | j                   | j                     | j                   | j                     z
  }t        |j	                               t        |j                               t        |j                               t        |j                               dS )zGet spread statistics.)
min_spread
max_spread
avg_spreadmedian_spread)r6   rN   rM   r$   rv   r   meanmedian)r   spreadss     r   get_spread_statszTickDataLoader.get_spread_stats   sx    88I((4==)DHHT]],CC../"7>>#34	
 	
r   c                     | j                   S )zGet number of ticks.)r5   r   s    r   __len__zTickDataLoader.__len__  s    r   c                 J    | j                  |      }|t        d| d      |S )zGet tick by index.zIndex z out of range)r   
IndexError)r   r   ticks      r   __getitem__zTickDataLoader.__getitem__  s.    }}U#<veWM:;;r   )BTCUSD)Nr   	Timestampz	Bid pricez	Ask pricez
Bid volumez
Ask volume)r   N)i'  )r   r    r!   r"   rc   r8   r   re   rX   r   rR   r	   r(   r|   r   r   r   tupler   r   dictr   r   r   r&   r   r   r1   r1   <   sa   
0s 
0 #'(""''99 3-9 	9
 9 9 9 9 9 
9v"s "x "2 !% # 
8T4'	(	@   
4>4-	.@
c 
hx&8 
4 4
 

$ 
   r   r1   c                   X   e Zd ZdZddefdZ	 	 	 	 	 	 	 d dedededed	ed
ededee   dd fdZ	 d!dedee   dd fdZde	j                  dededed	ed
ededee   fdZde	j                  fdZdee   fdZd"dedee   fdZdededee   fdZdefdZdefdZdefdZdedefdZy)#
DataLoaderz'Load and process historical OHLCV data.r2   c                 .    || _         g | _        d| _        y)z`
        Initialize data loader.

        Args:
            symbol: Trading pair symbol
        N)r2   datar6   r7   s     r   r8   zDataLoader.__init__  s     !#	+/r   Nr4   r;   open_colhigh_collow_col	close_col
volume_coldate_formatr   c	           	         t        j                  |      }	|rt        j                  |	|   |      |	|<   n	 t        j                  |	|         |	|<   |	j                  |      j                  d      }	|	| _
        | j                  |	||||||      | _        | S # t        t        f$ rc |	|   j                  t              }
|
j                         dkD  rt        j                  |
d      |	|<   nt        j                  |
d      |	|<   Y w xY w)a  
        Load OHLCV data from CSV file.

        Args:
            file_path: Path to CSV file
            timestamp_col: Name of timestamp column
            open_col: Name of open price column
            high_col: Name of high price column
            low_col: Name of low price column
            close_col: Name of close price column
            volume_col: Name of volume column
            date_format: Date format string (auto-detect if None)

        Returns:
            Self for chaining
        )format   mBrn   unitsTdrop)rJ   rK   rf   
ValueError	TypeErrorastyper$   r   sort_valuesreset_indexr6   _dataframe_to_ohlcvr   )r   r4   r;   r   r   r   r   r   r   rW   	ts_valuess              r   rX   zDataLoader.load_csv'  s   6 [[#  "r-/@ UB}L$&NN2m3D$E=! ^^M*66D6A,,x7Iz
	  	* L}-44U;	==?T)(*yt(LB}%(*ys(KB}%Ls   B A/D
	D
data_keyc           	          t        |d      5 }t        j                  |      }ddd       |r|   }nt        t              r	d|v r|d   }|s	g | _        | S |d   }t        |t              rt        j                  |g d      }nt        j                  |      }d}	 t        j                  ||         ||<   |j                  |      j!                  d      }|| _        | j%                  |dddddd      | _        | S # 1 sw Y   xY w# t        t        f$ rc ||   j                  t              }|j                         dkD  rt        j                  |d	
      ||<   nt        j                  |d
      ||<   Y w xY w)zLoad OHLCV data from JSON file.rNr   r   r   r   r   r   r   r   )rx   r   r   rn   r   r   Tr   r   r   r   r   r   )r   jsonload
isinstancer   r   listrJ   	DataFramerf   r   r   r   r$   r   r   r   r6   r   )	r   r4   r   fraw_data
first_itemrW   ts_colr   s	            r   	load_jsonzDataLoader.load_jsonZ  ss    )S! 	$Qyy|H	$ )H$'Fh,>'HDIKa[
j$'OB
 h'B	A6
3BvJ ^^F#//T/:,,VVUGX
	 O	$ 	$2 I& 	A6
))%0I}}%^^IDA6
^^IC@6
	As   C?D ?DA/E=<E=rW   c                 `   g }|j                         D ]  \  }	}
t        t        |
|   d      r|
|   j                         n|
|   t	        |
|         t	        |
|         t	        |
|         t	        |
|         ||j
                  v rt	        |
|         nd      }|j                  |        |S )z+Convert DataFrame to list of OHLCV objects.rg   r)   r   )iterrowsr   hasattrrg   r$   rx   r   )r   rW   r;   r   r   r   r   r   r   _r{   candles               r   r   zDataLoader._dataframe_to_ohlcv  s     kkm 		 FAs@GMHZ\k@l#m,::<ru  wD  sE3x=)3x=)#g,'C	N+1;rzz1IuS_-sF KK		  r   c                 x   | j                   | j                   j                         S | j                  st        j                         S t        j                  | j                  D cg c]G  }|j
                  |j                  |j                  |j                  |j                  |j                  dI c}      S c c}w )zGet data as pandas DataFrame.r   )r6   copyr   rJ   r   r   r   r   r   r   r   )r   cs     r   to_dataframezDataLoader.to_dataframe  s    8888==?"yy<<>!|| YY

  [[uu((

 
 
	 

s   $AB7c              #   6   K   | j                   D ]  }|  yw)z Iterate over candles one by one.Nr   )r   r   s     r   iterate_candleszDataLoader.iterate_candles  s     ii 	FL	s   ticks_per_candlec              #      K   | j                   D ]M  }| j                  ||      }t        |      D ]+  \  }}|j                  }|dz  }t	        ||||z         }| - O yw)z*Generate simulated tick data from candles.r   )r   r   r   N)r   _interpolate_candle	enumerater   r(   )	r   r   r   pricesiprice	tick_timer   r   s	            r   r|   zDataLoader.iterate_ticks  sv     ii 	F--f6FGF%f- 5",,	'
 
	s   AA r   	num_ticksc                    |dk  r1|j                   |j                  |j                  |j                  gd| S |j                   g}|j                  |j                   k\  }|r|dz
  }|dz  }d|z  dz  }t	        d|dz
        D ]  }||k  r>||z  }	|j                  |j                   |	|j                  |j                   z
  z  z          F||k  rD||z
  ||z
  z  }	|j                  |j                  |	|j                  |j                  z
  z  z          ||z
  ||z
  z  }	|j                  |j                  |	|j                  |j                  z
  z  z           n|dz
  }|dz  }d|z  dz  }t	        d|dz
        D ]  }||k  r>||z  }	|j                  |j                   |	|j                  |j                   z
  z  z          F||k  rD||z
  ||z
  z  }	|j                  |j                  |	|j                  |j                  z
  z  z          ||z
  ||z
  z  }	|j                  |j                  |	|j                  |j                  z
  z  z           |j                  |j                         |S )z#Interpolate prices within a candle.r\   Nr-   r[   rA   )r   r   r   r   rG   r   )
r   r   r   r   
is_bullish
mid_pointslow_idxhigh_idxr   ratios
             r   r   zDataLoader._interpolate_candle  sD   >KKfjj&,,G
SS++\\V[[0
"QJ AoG:~*H1i!m, 	V<KEMM&++fkk9Q0R"RS(][X-?@EMM&**ufjj8P/Q"QR\j8.CDEMM&++9S0T"TU	V #QJ!QH*n)G1i!m, 	T=LEMM&++v{{9R0S"ST'\\g.@AEMM&++fkk9Q0R"RS[Z'-ABEMM&**uvzz8Q/R"RS	T 	fll#r   c                     | j                   syt        d | j                   D              }t        d | j                   D              }||fS )r   r   c              3   4   K   | ]  }|j                     y wN)r   .0r   s     r   	<genexpr>z-DataLoader.get_price_range.<locals>.<genexpr>  s     1!1   c              3   4   K   | ]  }|j                     y wr   )r   r   s     r   r   z-DataLoader.get_price_range.<locals>.<genexpr>  s     212r   )r   rv   r   r   s      r   r   zDataLoader.get_price_range  s?    yy1tyy11	2		22	9%%r   c                     | j                   sy| j                   d   j                  | j                   d   j                  fS )r   r   r   r   )r   r   r   s    r   r   zDataLoader.get_date_range  s3    yy		!&&		"(?(?@@r   c                 ,    t        | j                        S )zGet number of candles.)rS   r   r   s    r   r   zDataLoader.__len__  s    499~r   r   c                      | j                   |   S )zGet candle by index.r   )r   r   s     r   r   zDataLoader.__getitem__  s    yyr   )BTCUSDT)r   r   r   r   r   r   Nr   )r\   )r   r    r!   r"   rc   r8   r   rX   r   rJ   r   r   r   r   r   r   r   re   r(   r|   r$   r   r   r   r   r   r   r&   r   r   r   r     s   1	0s 	0 ) "%)11 1 	1
 1 1 1 1 c]1 
1l #'-- 3-- 
	-^LL  	
     
e0bll (% 
c (8:L (% (C (DK (T& &A A      r   r   r2   num_candlesstart_price
volatilityr   c           	         t         j                  j                  d       t        |       }g }|}t	        ddd      }t        |      D ]w  }t         j                  j                  d|      }	|}
|
d|	z   z  }t        |	      |t         j                  j                         z  z   }||
kD  rO|d|t         j                  j                         z  z   z  }|
d|t         j                  j                         z  z
  z  }nN|
d|t         j                  j                         z  z   z  }|d|t         j                  j                         z  z
  z  }t        ||
|      }t        ||
|      }t         j                  j                         dz  dz   }t        ||
||||      }|j                  |       |}|t        j                  d      z   }z ||_        |S )	a  
    Create sample OHLCV data for testing.

    Args:
        symbol: Trading pair symbol
        num_candles: Number of candles to generate
        start_price: Starting price
        volatility: Price volatility factor

    Returns:
        DataLoader with generated data
    *   i  rA   r   r`   d   r   )hours)nprandomseedr   r   rG   normalabsr   rv   r   r   rJ   	Timedeltar   )r2   r   r   r   loaderr   current_pricecurrent_timer   change
open_priceclose_pricemax_move
high_price	low_pricer   r   s                    r   create_sample_datar
    s   $ IINN2FDMD!Q'L; <!!!Z0"
 AJ/v;bii.>.>.@!@@#$Hryy7G7G7I,I(IJJ"a(RYY5E5E5G*G&GHI#q8bii6F6F6H+H'HIJ#q8bii6F6F6H+H'HIIZ=
	:{;	!!#d*S0"
 	F##bll&;;;<> FKMr   )r   r`   g     j@g{Gz?)r"   r   dataclassesr   r   pathlibr   typingr   r   r   r	   ra   pandasrJ   numpyr   configr
   r   r(   r1   r   rc   re   r$   r
  r&   r   r   <module>r     s     !   6 6 	    # # #* # # #&Z Zzp  p h  	999 9 	9
 9r   