
    k1i15                     >   d Z ddlZddlZddlZddlmZ ddlmZmZmZ ddl	m
Z
mZmZmZmZmZmZmZ  ej$                  dd      Z ej$                  dd	      Zd
ddg ddZdee   fdZdedee   fdZdee   dee   fdZdedefdZdededefdZde
de
fdZy)zX
Link Discovery Node
Discovers white-hat link building opportunities for human approval
    N)datetime)AnyDictList)SEOCampaignStateCampaignPhasetransition_phase	add_erroradd_warningLinkOpportunityLinkOpportunityStatusOpportunityTypeNEXT_API_URLzhttp://127.0.0.1:3000MCP_URLzhttp://127.0.0.1:3001(   i  g?)zwrite-for-uszguest-post-guidelineszsponsored-postzlink-exchangez	buy-linksz	paid-postzadvertise-with-us)min_domain_authoritymin_monthly_trafficmin_relevance_scoreblacklist_patternsreturnc                     K   g S w)z
    Discover HARO/Connectively opportunities

    In production, this would integrate with HARO API or monitoring service.
    For now, returns placeholder structure.
     r       </var/www/html/langgraph-service/nodes/link_discovery_node.pydiscover_haro_opportunitiesr   -   s      Is   site_domainc                 H  K   g }	 t        j                  d      4 d{   }|j                  t         dddddd	| d
dd       d{   }|j                  dk(  rn|j                         j                  di       }|j                  dg       }|r:ddl}|j                  |d   j                  dd            }|j                  dg       }ddd      d{    |S 7 7 7 
# 1 d{  7  sw Y   |S xY w# t        $ r}t        d|        Y d}~|S d}~ww xY ww)zo
    Find unlinked brand mentions across the web

    Uses MCP tool if available, otherwise returns empty.
          N@timeoutN/rpc2.0   
tools/calllink_opportunity_scorerunlinked_mentions)typedomainname	argumentsjsonrpcidmethodparamsjson   resultcontentr   text{}mentionsz-[LinkDiscoveryNode] Unlinked mentions error: )
httpxAsyncClientpostr   status_coder2   getloads	Exceptionprint)	r   r8   clientresponser4   r5   r2   dataes	            r   discover_unlinked_mentionsrE   8   s?     HC$$T2 	8 	8f#[[)4 $* 9$7&1&	 )  H ##s*!,,Xr: **Y3::gajnnVT&BCD#xx
B7H-	8 	82 O3	8	8 	8 	8 	82 O  C=aSABBOCs   D"C= C!C= )C'C#BC'C= C%C= D"!C= #C'%C= 'C:-C0.C:5C= 8D":C= =	DDD"DD"competitorsc                   K   g }	 t        j                  d      4 d{   }| dd D ]  }|j                  t         ddddd	d
|j	                  dd      ddd       d{   }|j
                  dk(  sP|j                         j	                  di       }|j	                  dg       }|sddl}|j                  |d   j	                  dd            }|j                  |j	                  d
g               ddd      d{    |S 7 7 7 
# 1 d{  7  sw Y   |S xY w# t        $ r}	t        d|	        Y d}	~	|S d}	~	ww xY ww)z
    Find broken link opportunities from competitor backlinks

    Looks for 404 pages that competitors link to, which we could recreate.
    r   r   N   r!   r"   r#   r$   r%   broken_linksr(    )r'   competitor_domainr)   r,   r1   r3   r4   r5   r   r6   r7   z([LinkDiscoveryNode] Broken links error: )r9   r:   r;   r   r=   r<   r2   r>   extendr?   r@   )
rF   rI   rA   comprB   r4   r5   r2   rC   rD   s
             r   discover_broken_linksrN   \   ss     L>$$T2 	J 	Jf#BQ J!'it$#(".$=(659XXh5K*#	 "- "  ''3.%]]_002>F$jjB7G##zz'!*..*FG$++DHH^R,HI-J	J 	J4 5	J	J 	J 	J 	J4   >8<==>s   ED( DD( AD%D&D:4D/AD:D( DD( 
ED( DD( D%DD% D( #E%D( (	E
1E?EE

Eoppc                 &   | j                  dd      }| j                  dd      }| j                  dd      }| j                  dd      j                         }|t        d   k  ry|t        d	   k  ry|t        d
   k  ryt        d   D ]  }||v s y y)z0Check if an opportunity meets quality thresholdsdomain_authorityr   monthly_trafficrelevance_scoreurlrJ   r   Fr   r   r   T)r=   lowerLINK_QUALITY_THRESHOLDS)rO   datraffic	relevancerT   patterns         r   is_quality_opportunityr[      s    	#Q	'Bgg'+G)1-I
''%

"
"
$C 
#$:;; ()>?? *+@AA ++?@ c> r   	site_infoc                 f  K   d|j                  dd       d|j                  dd       d|j                  dd	       d
}	 t        j                  d      4 d{   }|j                  t         ddddddd| j                  dd       d| j                  dd       d|j                  dd       d|j                  dd       d| j                  dd       d id!d"#       d{   }|j
                  d$k(  ra|j                         j                  d%i       }|j                  d&g       }|r-|d'   j                  d(      r|d'   d(   cddd      d{    S ddd      d{    |S 7 7 7 7 # 1 d{  7  sw Y   |S xY w# t        $ r}t        d)|        Y d}~|S d}~ww xY ww)*zy
    Generate a personalized pitch for a link opportunity

    Uses Grok via MCP for research-based personalization.
    zHi,

I noticed you mentioned topicsports betting analyticszS on your site. I wanted to reach out because we have some complementary content at r(   EventheOddsz) that your readers might find valuable.


value_propzHWe provide free sports analytics tools and data-driven betting insights.zI

Would you be interested in checking it out?

Best,
The EventheOdds Teamg      >@r   Nr!   r"   r#   r$   ask_grokpromptzLWrite a brief, personalized outreach email for link building.

Target site: source_domainUnknownz
Their content topic: content_topicsportsz
Our site: z
Our value: z!Sports analytics and betting dataz
Opportunity type: opportunity_typementionz

Requirements:
- Be conversational and genuine, NOT template-y
- Reference something specific about their content
- Clearly explain mutual benefit
- Keep it under 150 words
- Do NOT include subject line

Return ONLY the email body.r)   r,   r1   r3   r4   r5   r   r6   z,[LinkDiscoveryNode] Pitch generation error: )	r=   r9   r:   r;   r   r<   r2   r?   r@   )rO   r\   default_pitchrA   rB   r4   r5   rD   s           r   generate_pitchrk      s+    "w0JKL  M`  aj  an  an  ow  yF  aG  `H H
|gh i j		M%B$$T2 "	. "	.f#[[)4 $* *$ +ggoy12 3ggox89 :===1
2 3MM,(KLM N77-y9: 	;'&	 )  H: ##s*!,,Xr: **Y3wqz~~f5"1:f-E"	. "	. "	. "	. "	.L M"	."	. "	. "	. "	. "	.L   B<QC@AABs   =F1 F E-F BE6&E0'A"E6	F E2F F1F &E4'F +F1-F 0E62F 4F 6F	<E?=F	F F1	F 	F.F)#F1)F..F1statec                 D  K   t        d| d           | d   }t        j                         j                         }g }dddd}	 	 t	                d{   }|D ]B  }t        |      st        ||       d{   }|j                  t        d;i dt        t        j                               d	|j                  d
d      d|j                  dd      d|j                  dd      d|j                  d      d|j                  dd      dt        j                  j                  dt         j"                  j                  d|ddd|j                  d      d|j                  d      ddddddddddddd |d!d"|j                  d#d              E 	 	 t'        |d          d{   }	|	D ]E  }
t        |
      st        |
|       d{   }|j                  t        d;i dt        t        j                               d	|
j                  d
d      d|
j                  dd      d|
j                  dd      d|
j                  d      d|
j                  dd      dt        j(                  j                  dt         j"                  j                  d|ddd|
j                  d      d|
j                  d      ddddddddddddd |d!d%|
j                  d&d      dd'         H 	 	 t+        | j                  d)g              d{   }|D ]B  }t        |      st        ||       d{   }|j                  t        d;i dt        t        j                               d	|j                  d*d      d|j                  dd      d|j                  dd      d|j                  d      d|j                  dd      dt        j,                  j                  dt         j"                  j                  d|ddd|j                  d      d|j                  d      ddddddddddddd |d!d+|j                  d,d              E 	 t        d.t/        |       d/       || d3<   || d4<   |r d5| d6<   t3        | d1d7t/        |       d8      } nt3        | d1d9      } t5        | t6        j8                        } || d:<   | S 7 7 m# t$        $ r}t        d$|        Y d}~ed}~ww xY w7 [7 8# t$        $ r}t        d(|        Y d}~-d}~ww xY w7 7 # t$        $ r}t        d-|        Y d}~d}~ww xY w# t$        $ r2}t        d0|        t1        | d1d2t        |             } Y d}~d}~ww xY ww)<a4  
    Link Discovery phase node

    Discovers white-hat link building opportunities:
    - HARO/Connectively monitoring
    - Unlinked brand mentions
    - Broken link building (DA 40+ only)
    - Guest post prospects (strict filtering)

    ALL opportunities require human approval before any outreach.
    z5[LinkDiscoveryNode] Starting link discovery for site site_idzeventheodds.air_   zAFree sports analytics tools with data from 50,000+ games analyzed)r(   r^   ra   Nr.   
target_urlrT   rJ   rd   r(   rQ   r   rR   rS   g        rh   statuspitch_draftpitch_approvedcontact_emailcontact_nameapproved_byapproved_atrejection_reasonexecuted_atlink_acquiredFacquired_urldiscovered_atnoteszHARO query: queryz+[LinkDiscoveryNode] HARO discovery failed: zMention context: contextr3   z8[LinkDiscoveryNode] Unlinked mentions discovery failed: competitor_datalinking_page_urlzBroken URL: 
broken_urlz3[LinkDiscoveryNode] Broken links discovery failed: z[LinkDiscoveryNode] Discovered z quality opportunitiesz1[LinkDiscoveryNode] Error during link discovery: link_discoveryzLink discovery failed: link_opportunitiespending_approvalsTrequires_human_reviewzFound z: link opportunities - would require approval in productionzBNo quality link opportunities found - external APIs not configured
updated_atr   )r@   r   utcnow	isoformatr   r[   rk   appendr   struuiduuid4r=   r   HAROvaluer   PENDINGr?   rE   UNLINKED_MENTIONrN   BROKEN_LINKlenr
   r   r	   r   REPORT)rl   rn   nowopportunitiesr\   	haro_oppsrO   pitchrD   r8   ri   brokenbrks                r   link_discovery_phaser      s     
A%	BRAS
TUIG
//

%
%
'C+-M"+YIbW	E9;;I  )#."0i"@@E!(( *tzz|,*#&775"#5* '*ggh&;* *-1CQ)G	*
 ),0A(B* ),0A3(G* *9)=)=)C)C*  5<<BB* %** (,* '*ggo&>* &)WW^%<* %)* %)* *.*  %)!*" ',#*$ &*%*& '*'*( !-SWWWb-A,BC)* :	R7	(8KLLH# )'2"0)"DDE!(( *tzz|,*#*;;ub#9* '.kk(B&?* *15G)K	*
 )04E(F* )04Es(K* *9)I)I)O)O*  5<<BB* %** (,* '.kk/&B* &-[[%@* %)* %)* *.*  %)!*" ',#*$ &*%*& '*'*( !2'++i2LTc2R1ST)* :	M0;Lb1QRRF )#."0i"@@E!(( *tzz|,*#&77+=r#B* '*ggh&;* *-1CQ)G	*
 ),0A(B* ),0A3(G* *9)D)D)J)J*  5<<BB* %** (,* '*ggo&>* &)WW^%<* %)* %)* *.*  %)!*" ',#*$ &*%*& '*'*( !-SWW\2-F,GH)* 8 	/M0B/CCYZ[ #0E
!.E
 )-%&E#3S'((bce E#3PR UM$8$89EE,Lg < A.  	E?sCDD	E
 M E.  	RLQCPQQ	R
 S A.  	MGsKLL	M
  WA!EF%!15LSQRVH3UVWs*  AV S( S"S( )S( 8S%9D&S( U" !T 2T3T 	T TD)T U" T> "T8#T> 9T> T;	D&T> /U" AV "S( %S( (	T
1T?U" T

U" T T 	T5T0*U" 0T55U" 8T> ;T> >	UUU" UU" "	V+'VV VV )__doc__osr   r9   r   typingr   r   r   state.campaign_stater   r   r	   r
   r   r   r   r   getenvr   r   rV   dictr   r   rE   rN   boolr[   rk   r   r   r   r   <module>r      s   
 
    " "	 	 	 ryy)@A
"))I6
7
 	  4: !# !$t* !H"T$Z "DJ "J  68d 8t 8 8vN&6 N;K Nr   