
    9i                     b   d dl Z d dlZd dlmZ d Zd Zd Zd Zd Z	d Z
dd	Z ej                  ej                  j                         ej                  d
      k\  r&d Zej                  j                   j"                  ZeZneZeZee
eeeee	ej                  j                   j&                  ej                  j                   j(                  ej                  j                   j*                  ej                  j                   j*                  ej                  j                   j,                  dZd Zy)    N)versionc           
          t        j                  |       } ddt         j                  j                  | t        j                  t        j
                  d      | j                        z        z   z  }| |z  S )av  
    Gaussian Error Linear Unit. Original Implementation of the gelu activation function in Google Bert repo when
    initially created. For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
    0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) Also see
    https://arxiv.org/abs/1606.08415
          ?      ?       @)tfconvert_to_tensormatherfcastsqrtdtype)xcdfs     e/var/www/html/backtest/airagagent/rag_env/lib/python3.12/site-packages/transformers/activations_tf.py_gelur      sU     	QA
rww{{1rwwrwws|QWW'E#EFF
GCs7N    c                 n   t        j                  |       } t        j                  t        j                  | j
                        }t        j                  d| j
                        }ddt        j                  t        j                  d|z        | |t        j                  | d      z  z   z        z   z  }| |z  S )z
    Gaussian Error Linear Unit. This is a smoother version of the GELU. Original paper: https://arxiv.org/abs/1606.0841

    Args:
        x: float Tensor to perform activation

    Returns:
        `x` with the GELU activation applied.
    Hm?r   r   r      )	r   r	   r   r
   pir   tanhr   pow)r   r   coeffr   s       r   	_gelu_newr   "   s     	QA	!''	"BGGHagg&E
rwwrwwsRx0Aq!8L4LMNN
OCs7Nr   c                     t        j                  |       } | t        j                  t         j                  j	                  |             z  S )N)r   r	   r   r
   softplusr   s    r   mishr   4   s5    
QArwwrww''*+++r   c                    t        j                  |       } t        j                  d| j                        }t        j                  d| j                        }d| z  dt        j                  | |z  d|| z  | z  z   z        z   z  S )Nr   g3E?r   r   )r   r	   r   r   r   )r   coeff1coeff2s      r   	gelu_fastr#   :   sm    
QAWWXqww'FWW\177+F7cBGGAJ#
Q2F$GHHIIr   c                     t        j                  |       } t        j                  d| j                        }| t         j                  j                  || z        z  S )NgZd;?)r   r	   r   r   r
   sigmoid)r   r   s     r   
quick_gelur&   B   sB    
QAGGE177#Erwwuqy)))r   c                 B    t        j                  t        |       dd      S )a  
    Clip the range of possible GeLU outputs between [-10, 10]. This is especially useful for quantization purpose, as
    it allows mapping 2 negatives values in the GeLU spectrum. For more information on this trick, please refer to
    https://arxiv.org/abs/2004.09602

    Gaussian Error Linear Unit. Original Implementation of the gelu activation function in Google Bert repo when
    initially created. For information: OpenAI GPT's gelu is slightly different (and gives slightly different results):
    0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) Also see
    https://arxiv.org/abs/1606.08415 :param x: :return:
    i
   )r   clip_by_valuer   r   s    r   gelu_10r*   H   s     E!Hc2..r   c                 |    t        j                  | d|      \  }}|t         j                  j                  |      z  S )a  
    Gated Linear Unit. Implementation as defined in the original paper (see https://arxiv.org/abs/1612.08083), where
    the input `x` is split in two halves across a dimension (`axis`), A and B, returning A * sigmoid(B).

    Args:
        `x`: float Tensor to perform activation
        `axis`: dimension across which `x` be split in half

    Returns:
        `x` with the GLU activation applied (with its size halved across the dimension `axis`).
       )axis)r   splitr
   r%   )r   r-   abs       r   glur1   V   s2     88Aqt$DAqrwwq!!!r   z2.4c                 X    t         j                  j                  j                  | d      S )NT)approximate)r   kerasactivationsgelur   s    r   approximate_gelu_wrapr7   h   s"    xx##(((==r   )r6   r*   r#   gelu_newr1   r   r&   relur%   siluswishr   c           	      |    | t         v r	t         |    S t        d|  dt        t         j                                      )Nz	function z not found in ACT2FN mapping )ACT2FNKeyErrorlistkeys)activation_strings    r   get_tf_activationrB      sB    F"'((#4"55RSWX^XcXcXeSfRghiir   ))r
   
tensorflowr   	packagingr   r   r   r   r#   r&   r*   r1   parseVERSIONr7   r4   r5   r6   r8   r9   r%   r;   r   r=   rB    r   r   <module>rI      s     
$,J*/"  7==##$e(<<> 88$$D$HDH HH  %%xx##++HH  &&XX!!''HH  %%
 jr   