
    ڵ1iy                         d dl mZ d dlmZ d dlZd dlZd dlmZ ddlmZ ddl	m
Z
 d dlmZmZmZmZ d dlZ e         ej"                  ej$                  	        ej&                  e      Z G d
 d      Zy)    )
OpenSearch)bulkN)load_dotenv   )mongo_service)Embedder)ListDictAnyOptional)levelc            	       P   e Zd Zd Zd defdZd dededefdZd Zd	ed
e	e
eef      fdZdee
   dee
   d
efdZdee
   d
efdZdee
   d
efdZdee
   d
efdZdee
   d
efdZdee
   d
efdZd	ed
efdZd!dede	e   d
e
eef   fdZd	ed
efdZd	ed
efdZd	edefdZy)"OpenSearchServicec                    t        j                  d      }t        t        j                  dd            }t        j                  d      }t        j                  d      }t        j                  dd      j                         dk(  }t	        |||g      st        d      t        ||d	g||f|
      | _        t        j                  dd      | _        t        | _	        t               | _        y )NOPENSEARCH_HOSTOPENSEARCH_PORT443OPENSEARCH_USEROPENSEARCH_PASSWORDOPENSEARCH_USE_SSLtruez>OpenSearch configuration not complete in environment variables)hostport)hosts	http_authuse_sslOPENSEARCH_INDEXresumes)osgetenvintlowerall
ValueErrorr   clientindexr   r   embedder)selfopensearch_hostopensearch_portopensearch_useropensearch_passwordopensearch_use_ssls         ?/var/www/html/drjob-dev/drjob-ai/services/opensearch_service.py__init__zOpenSearchService.__init__   s    ))$56bii(95AB))$56 ii(=>YY';VDJJLPVVO_6IJK]^^  ,+ '(;<&	
 YY19=
* 
    textc                     |d|g ddid}| j                   j                  | j                  |      }|d   d   }|D cg c]  }|d   d   |d	   d
 c}S c c}w )z&Search resumes using multi-match querymulti_match)emailnamephone	languagesdate_of_birthnationalityprofile_summaryskillsexperience_texteducation_textcourses_textprojects_textcertifications_textcountry_namecountry_header_code)queryfields)sizerC   r&   bodyhits_source	resume_id_scorerJ   keyword_score)r%   searchr&   )r(   r1   top_krC   responserH   hits          r.   rN   zOpenSearchService.search-   s     ! 
4 ;;%%DJJU%C' 
 i.5HV
 	
 
s    ArC   rB   rO   c           	      P   	 |j                         st        j                  d       g S |s't        j                  d       | j                  ||      S |ddd|g dddd	id
d|j	                         iigiidgd}| j
                  j                  | j                  |      }|d   d   }|D cg c]  }|d   d   |d   d }}t        j                  dt        |       d|        |S c c}w # t        $ r-}	t        j                  dt        |	              g cY d}	~	S d}	~	ww xY w)a  
        Search resumes in OpenSearch with mandatory country filtering for screening type 2
        
        Args:
            query: Search query text
            country_header_code: Country header code that must match (e.g., 'ae', 'bd', 'in')
            top_k: Maximum number of results to return
            
        Returns:
            List of search results with resume_id and keyword_score, filtered by country
        z1Empty query provided to OpenSearch country searchz@No country_header_code provided - falling back to regular searchboolmustr3   )r4   r5   r6   r7   r8   r9   zprofile_summary^2zskills^2zexperience_text^1.5r=   r>   r?   r@   rA   best_fieldsAUTO)rC   rD   type	fuzzinesstermrB   rJ   )rE   rC   rI   rF   rH   rI   rK   rL   z OpenSearch country search found z results for country: z$Error in OpenSearch country search: N)striploggerwarningrN   r"   r%   r&   infolen	Exceptionerrorstr)
r(   rC   rB   rO   search_bodyrP   rH   rQ   resultses
             r.   search_with_country_filterz,OpenSearchService.search_with_country_filterQ   se   >	;;=RS	&ab{{5%00  --2/&  -:17'0"0 !'$9;N;T;T;V)"1!!D (=I%KN {{))

)MHF#F+D   ")n[9CPXMZG 
 KK:3w<.H^_r^stuN  	LL?AxHII	s;   &C/ (C/ AC/ -C*'C/ *C/ /	D%8"D D% D%c                 ~   ddl m} ddlm} t        ||      rt	        |      S t        ||      r|j                         S t        |t              r3|j                         D ci c]  \  }}|| j                  |       c}}S t        |t              r|D cg c]  }| j                  |       c}S |S c c}}w c c}w )zJConvert MongoDB ObjectIds and datetime objects to JSON serializable formatr   )ObjectId)datetime)
bsonrg   rh   
isinstancera   	isoformatdictitems_serialize_documentlist)r(   objrg   rh   kvitems          r.   rn   z%OpenSearchService._serialize_document   s    !%c8$s8OX&==?"T"?Byy{Ktq!At//22KKT"?BCtD,,T2CCJ	 LCs   #B4B:jobseeker_idreturnc                    	 | j                   j                  |      }|st        j                  d|        y|j	                  di       xs i }|j	                  dg       xs g }|j	                  dg       xs g }|j	                  dg       xs g }|j	                  dg       xs g }|j	                  dg       xs g }|j	                  d	d
      xs d
}	|j	                  dd
      xs d
}
i dt        |      d|j	                  d      xs |j	                  dd
      d|j	                  d      xs |j	                  dd
      d|j	                  d      xs |j	                  dd
      d|j	                  d      xs% |j	                  d      xs |j	                  dd
      d|j	                  d      xs |j	                  dd
      d|j	                  d      xs |j	                  dd
      d|j	                  d      xs% |j	                  d      xs |j	                  dd
      d|j	                  d      xs |j	                  dd
      d|j	                  d      xs |j	                  dd
      d	|	d|
d|j	                  d      xs | j                  ||      d|d|d|d||| j                  |      | j                  |      | j                  |      | j                  |      | j                  |      ||j	                  d       |j	                  d!      |j	                  d"      |||||||	|
d#d$}| j                  |      }|S # t        $ r.}t        j                  d%| d&t        |              Y d}~yd}~ww xY w)'z[Get jobseeker data with all related tables joined using optimized mongo_service aggregationzJobseeker not found: Nbasic_detailscourse_detailseducation_detailsemployment_detailsproject_detailscertification_detailsrA    rB   rJ   r4   secondary_email
first_name	last_namer6   mobile_numbersecondary_mobile_numberknown_languagesr7   r8   
birth_dater9   r:   r;   
key_skills
experience	educationcoursesprojectsstatusindexed
country_id)rt   r   r   r   rw   rx   ry   rz   r{   r|   rA   rB   )certificationsr<   r=   r>   r?   r@   jobseeker_dataz!Error getting jobseeker data for : )r   get_jobseeker_complete_datar[   r\   getra   _extract_skills_format_experience_format_education_format_courses_format_projects_format_certificationsrn   r_   r`   )r(   rt   complete_datarw   rx   ry   rz   r{   r|   rA   rB   documentserialized_documentrd   s                 r.   get_jobseeker_data_with_joinsz/OpenSearchService.get_jobseeker_data_with_joins   sI   D	 ..JJ<XM !!6|nEF *--orBHbM*../?DJN - 1 12Er J Pb!.!2!23G!L!RPR+//0A2FL"O$1$5$56Mr$R$XVX!(,,^R@FBL"/"3"34I2"N"TRT(S.(**73_}7H7HIZ\^7_( m//=dARARS_acAd( ]..{;a}?P?PQ\^`?a	(
 **?;  N}?P?PQj?k  No|  pA  pA  BI  KM  pN( "=#4#45F#G#s=K\K\]nprKs( ]..{;a}?P?PQ\^`?a(  !2!2<!@  "NMDUDUVbDc  "Ngtgxgx  zI  KM  hN( }00?g=CTCTUbdfCg( "=#4#45F#G#s=K\K\]nprKs( ( &':( -++L9vT=Q=QRdfu=v( 0(  .!(" >#($ O%(& #8#'#:#:;M#N"&"8"89J"K $ 4 4^ D!%!6!6!G'+'B'BCX'Y$0+//9,00;"/"3"3L"A%2&4):*<'6-B$0+>#5(HV #'":":8"D&& 	LL<\N"SQRVHUV	s   5M LM 	N$M>>Nrz   r{   c                    g }|D ]A  }|j                  d      s|j                  t        |d   t              r|d   n|d   g       C |D ]  }|j                  d      r-|j                  t        |d   t              r|d   n|d   g       |j                  d      sS|j                  t        |d   t              r|d   n|d   g        dj	                  t        |            S )z2Extract skills from employment and project detailsr;   technologies )r   extendrj   ro   joinset)r(   rz   r{   r;   empprojs         r.   r   z!OpenSearchService._extract_skills   s     & 	eCwwx z#h-/Nc(mUXYaUbTcd	e
 $ 	zDxx!
4>40Pd8nW[\dWeVfgxx'jnAUW[6\d>2cghvcwbxy		z xxF$$r0   c                 ^   g }|D ]  }g }|j                  d      r|j                  |d          |j                  d      r|j                  |d          |j                  d      r|j                  |d          |sw|j                  dj                  |              dj                  |      S )z'Format employment experience for searchdesignationcompany_namejob_descriptionr   r   appendr   )r(   rz   r<   r   	exp_partss        r.   r   z$OpenSearchService._format_experience  s    % 		<CIww}%  ]!34ww~&  ^!45ww()  %6!78&&sxx	':;		< xx((r0   ry   c                 ^   g }|D ]  }g }|j                  d      r|j                  |d          |j                  d      r|j                  |d          |j                  d      r|j                  |d          |sw|j                  dj                  |              dj                  |      S )z#Format education details for searchdegreeinstitutionfield_of_studyr   r   )r(   ry   r=   edu	edu_partss        r.   r   z#OpenSearchService._format_education  s    $ 		;CIwwx   X/ww}%  ]!34ww'(  %5!67%%chhy&9:		; xx''r0   rx   c                 ^   g }|D ]  }g }|j                  d      r|j                  |d          |j                  d      r|j                  |d          |j                  d      r|j                  |d          |sw|j                  dj                  |              dj                  |      S )z Format course details for searchcourse_namer   descriptionr   r   )r(   rx   course_textcoursecourse_partss        r.   r   z!OpenSearchService._format_courses'  s    $ 		;FLzz-(##F=$9:zz-(##F=$9:zz-(##F=$9:""388L#9:		; xx$$r0   c                 ^   g }|D ]  }g }|j                  d      r|j                  |d          |j                  d      r|j                  |d          |j                  d      r|j                  |d          |sw|j                  dj                  |              dj                  |      S )z!Format project details for searchproject_namer   roler   r   )r(   r{   project_textproject
proj_partss        r.   r   z"OpenSearchService._format_projects7  s    & 		:GJ{{>*!!'."9:{{=)!!'-"89{{6"!!'&/2##CHHZ$89		: xx%%r0   r|   c                 ^   g }|D ]  }g }|j                  d      r|j                  |d          |j                  d      r|j                  |d          |j                  d      r|j                  |d          |sw|j                  dj                  |              dj                  |      S )z'Format certification details for searchcertification_nameissuing_organizationr   r   r   )r(   r|   	cert_textcert
cert_partss        r.   r   z(OpenSearchService._format_certificationsG  s    	) 		7DJxx,-!!$';"<=xx./!!$'=">?xx&!!$}"56  *!56		7 xx	""r0   c           	      >   	 | j                  |      }|st        j                  d|        y| j                  j	                  | j                  ||      }| j                  ||       	 t        |      }| j                  j                  d      j                  d|iddd	ii       t        j                  d
|        y# t        $ r t        j                  d|        Y yw xY w# t        $ r.}t        j                  d| dt        |              Y d}~yd}~ww xY w)zGAdd a single resume to the index using jobseeker_id and store in Qdrantz"Could not get data for jobseeker: F)r&   idrG   Invalid jobseeker_id format: 
jobseekersr   $setr   r   z Successfully indexed jobseeker: TzError indexing jobseeker r   N)r   r[   r`   r%   r&   _store_in_qdrantr!   r$   r   get_collection
update_oner]   r_   ra   )r(   rt   r   rP   jobseeker_id_intrd   s         r.   
add_resumezOpenSearchService.add_resumeW  s   	99,GHA,PQ {{((tzzlQY(ZH !!,9#&|#4 
 --l;FF'()Q(
 KK:<.IJ  <\NKL  	LL4\N"SVHMN	sA   +C% :C% )B> 4A	C% >!C"C% !C""C% %	D.$DDN
batch_size	max_totalc                 B   	 | j                   j                         st        j                  d       dddddS t        j	                  d| d|        d}d}d}d}	 |||k\  rt        j	                  d|        nx|||z
  }t        ||      }n|}| j                   j                  ||      }	|	st        j	                  d	       n-g }
g }|	D ]{  }|||k\  r nrt        |d
         }| j                  |      }|rE| j                  ||d}|
j                  |       |j                  |       | j                  ||       n|dz  }|dz  }} |
r~	 t        | j                  |
      \  }}||z  }|r5| j                   j                  |      }|st        j                  d|        |r&|t!        |      z  }t        j                  d|        |t!        |	      z  }t        j	                  d| d       |||d}t        j	                  d|        |S # t"        $ r9}|t!        |
      z  }t        j                  dt        |              Y d}~d}~ww xY w# t"        $ r;}t        j                  dt        |              dddt        |      dcY d}~S d}~ww xY w)a  Bulk index jobseekers with status=1 and indexed=0 using optimized mongo_service methods
        
        Args:
            batch_size: Number of jobseekers to process in each batch
            max_total: Maximum total number of jobseekers to index (None = index all)
        zMongoDB not connectedr   zDatabase not connected)totalr   failedr`   z'Starting bulk indexing with batch_size=z, max_total=NzReached max_total limit of z"No more unindexed jobseekers foundr   )_index_idrI   r   z6Failed to update indexed status for batch starting at zBulk index errors: zBulk index failed: z
Processed z jobseekers so far...)r   r   r   zBulk indexing completed: zError in bulk indexing: )r   is_connectedr[   r`   r]   minget_unindexed_jobseekers_batchra   r   r&   r   r   r   r%    update_jobseekers_indexed_statusr\   r^   r_   )r(   r   r   indexed_countfailed_countprocessed_countskip	remainingcurrent_batch_sizer   actionsjobseeker_ids_to_update	jobseekerrt   r   actionsuccess_counterrorsupdate_successrd   results                        r.   bulk_index_jobseekersz'OpenSearchService.bulk_index_jobseekersx  s   `	L%%22445!"qAH`aa KKA*\ZcYdefMLO D(_	-IKK"=i[ IJ ( )O ;I),Z)C&)3& "//NNtUgh
!KK DE*,'!+ )I ,I1M#&y#7L#AA,OH&*jj#/'/"
  v./66|D --lHE %)#q(O/)4 E04T[['0J-v%6 3-1-?-?-`-`ax-yN#1 &1ghlgm/n o!(CK7L"LL+>vh)GH J'j(99NOPK P )(&F KK3F8<=M % E$G4':3q6(%CDDE   	LLL3CF8<=1CFKK	LsO   5I DI A=H AI 	I/II II 	J#0JJJc           	      b   	 	 t        |      }| j                  j                  d      j                  d|idddii       | j                  |      S # t        $ r t        j                  d|        Y yw xY w# t        $ r.}t        j                  d| d	t        |              Y d
}~yd
}~ww xY w)z;Reindex a specific jobseeker (set indexed=0 and then index)r   Fr   r   r   r   r   zError reindexing jobseeker r   N)
r!   r$   r[   r`   r   r   r   r   r_   ra   )r(   rt   r   rd   s       r.   reindex_jobseekerz#OpenSearchService.reindex_jobseeker  s    	#&|#4  --l;FF'()Q( ??<00  <\NKL  	LL6|nBs1vhOP	s5   A AA7 !A41A7 3A44A7 7	B. $B))B.c           	         	 	 t        |      }	 | j                  j                  | j                  |      }t        j                  d|        | j                  j                  d      j                  d|idddii       y# t        $ r t        j                  d|        Y yw xY w# t        $ ru}dt        |      v sdt        |      j                         v rt        j                  d| d       n)t        j                  d	| d
t        |              Y d}~yY d}~d}~ww xY w# t        $ r.}t        j                  d	| dt        |              Y d}~yd}~ww xY w)zDelete a resume from the indexr   F)r&   r   z+Successfully deleted jobseeker from index: 404	not_foundz
Jobseeker z4 not found in index, but will update database statuszError deleting jobseeker z from index: Nr   r   r   r   r   Tr   )r!   r$   r[   r`   r%   deleter&   r]   r_   ra   r"   r\   r   r   r   )r(   rt   r   rP   rd   s        r.   delete_resumezOpenSearchService.delete_resume  sP   	#&|#4 
	!;;--DJJ<-PI,XY --l;FF'()Q(
 +  <\NKL  !CF?kSV\\^&CNNZ~=q#rsLL#<\N-X[\]X^W_!`a  t!   	LL4\N"SVHMN	s_   B  ?B' 1D(  !B$!D( #B$$D( '	D%0A&D D( D(  D%%D( (	E1$EEr   c                 "   	 g }|j                  d      r*|j                  d|d    d|j                  dd              |j                  d      r|j                  d|d           |j                  d      }|rQt        |t              rd	j	                  |      }nt        |      }|j                         r|j                  d
|        dD ]0  \  }}|j                  |      s|j                  | d||           2 |s|j                  dg       }t        |t              r|r~g }	|D ]R  }
t        |
t              s|
j                  dd      }|
j                  dd      }|s|s=|	j                  | d|        T |	r#|j                  ddj	                  |	              |j                  dg       }t        |t              r|r~g }|D ]R  }t        |t              s|j                  dd      }|j                  dd      }|s|s=|j                  | d|        T |r#|j                  ddj	                  |              |j                  d      }|rXt        |t              rd	j	                  d |D              }nt        |      }|j                         r|j                  d|        dj	                  |      }|j                  dd      xs d|j                  dd      xs d|j                  dd      xs d|j                  dd      xs d|j                  dd      xs d|j                  dd      xs dg dddg d}|j                  d      }|rt        |t              rE|D cg c]5  }t        |      j                         st        |      j                         7 c}|d<   nQt        |t
              rA|j                  d       D cg c]#  }|j                         s|j                         % c}|d<   |j                  dg       }t        |t              r5|r3|D ].  }
t        |
t              s|
j                  d      s&|
d   |d!<    n |j                  dg       }t        |t              r5|r3|D ].  }t        |t              s|j                  d      s&|d   |d"<    n |j                  d      }|rt        |t              rE|D cg c]5  }t        |      j                         st        |      j                         7 c}|d<   nQt        |t
              rA|j                  d       D cg c]#  }|j                         s|j                         % c}|d<   |j                         r| j                  j                  ||d#|      S t        j                  d$| d%       d&| }|j                  d      r|d'|d    z  }|j                  d(      r|d)|d(    z  }|j                  d      r|d*|d    z  }|j                  d      r|d+|d    z  }| j                  j                  ||d#|      S c c}w c c}w c c}w c c}w # t        $ r.}t        j                  d,| d-t        |              Y d#}~y.d#}~ww xY w)/z<Helper method to store resume data in Qdrant vector databaser   zName: r   r   r}   r:   z	Profile: r;   z, zSkills: ))r<   
Experience)r=   	Education)r>   Courses)r?   Projects)r@   Certificationsr   r   titlecompanyz at zExperience: z; r   r   r   z from zEducation: r7   c              3   2   K   | ]  }t        |        y w)N)ra   ).0langs     r.   	<genexpr>z5OpenSearchService._store_in_qdrant.<locals>.<genexpr>\  s     -NDc$i-Ns   zLanguages: rA   rB   r4   r   )rA   rB   r4   r   r   r:   r;   experience_yearshighest_educationcurrent_designationr7   ,r   r   Nz$No text content found for jobseeker z to store in QdrantzResume ID: z	, Email: r9   z, Nationality: z, Country: z, Country Code: zError storing jobseeker z in Qdrant: F)r   r   rj   ro   r   ra   rZ   rl   splitr'   store_in_qdrant_with_detailsr[   r\   r_   r`   )r(   rt   r   
text_partsr;   skills_textfieldlabelr   	exp_textsexpr   r   r   	edu_textsr   r   r   r7   	lang_textcombined_textpayload_dataskillr   fallback_textrd   s                             r.   r   z"OpenSearchService._store_in_qdrant  sC   M	J ||L)!!F8L+A*B!HLLQ\^`DaCb"cd ||-.!!Ih7H.I-J"KL \\(+Ffd+"&))F"3K"%f+K$$&%%&>?! Eu <<&%%r(5/1B&CDE %\\,;
j$/J "I) J%c40$'GGGR$8E&)ggi&<G$ ) 0 0E7$wi1H IJ !"))L99M8N*OP %LLb9	i.9 "I( Q%c40%(WWXr%:F*-''-*DK% ) 0 0F86+1O PQ !"))K		)8L7M*NO %LL5	!)T2$(II-NI-N$N	$'	N	 ("))K	{*CDHHZ0M !)^R @ FB'/||4I2'N'TRT!gr28b&ll<<B%\\+r:@b#+<<0A2#F#L"$%%'')L \\(+Ffd+NT-kUX[\aXbXhXhXjc%j.>.>.@-kL*,IOVYIZ-l^c^i^i^kekkm-lL* "lB7J*d+
% C!#t,1A>A'l%:; !["5I)T*y$ C!#t,1B<?M%89 ![1Ii.OX0nt\_`d\e\k\k\mT1B0nL-	3/JS//Z]J^0o$bfblblbn0oL-""$}}AA,P]_ceqrr!El^Sfgh"-l^ <<<(!y'1B0C%DDM<<.!x7N6O%PPM<</!{8N3K2L%MMM<< 56!'7AV8W7X%YYM}}AA,P]_ceqrr[ .l-l. 1o0o(  	LL3L>cRSfXVW	s   C"Y %AY ?(Y (A8Y !(Y 
E&Y 0YY**Y Y*Y<?Y <Y AY Y %2Y Y6Y*Y ;YY#3Y B+Y Y 	Z $Z		Z)2   )   N)__name__
__module____qualname__r/   ra   rN   r!   re   rn   r   r
   r   r   r	   r   r   r   r   r   r   rS   r   r   r   r   rl   r    r0   r.   r   r      s|   #2"
3 "
HJ J# JVY JX F# F(4PSUXPX>BZ FP%$t* %tTXz %^a %$)T$Z )C ) (4: (# ( %d4j %S % &T
 &s & #DJ #3 # s t BgL gLXc] gL^bcfhkck^l gLRc d .# $ BOS OD Or0   r   )opensearchpyr   opensearchpy.helpersr   uuidr   dotenvr   r   
embeddingsr   typingr	   r
   r   r   loggingbasicConfigINFO	getLoggerr  r[   r   r  r0   r.   <module>r     s^    # %  	  (   , ,     ',, '			8	$U
 U
r0   