Ë
    Úµ1i<  ã                   óp   — d dl Z d dlmZmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ  G d„ d	«      Zy)
é    N)ÚListÚDictÚAny)Ú	PdfReader)ÚDocument)ÚBytesIO)Ú	AIService)ÚCVStorageServicec                   ó¶   — e Zd Zd„ Zdedefd„Zdedefd„Zdededefd„Zdede	ee
f   fd	„Zddee   dededee	ee
f      fd„Zdee   dee	ee
f      fd„Zy
)ÚResumeParserServicec                 ó>   — t        «       | _        d| _        d| _        y)z0Initialize resume parser service with AI serviceNaº  
You are a precise resume parser assistant. Extract ONLY the information that is EXPLICITLY stated in the given resume. Your primary goal is accuracy, not completeness.

Extract the following fields:
- Name
- Email
- Phone number
- Profile summary
- Languages
- Skills
- Experience
- Education
- Date of birth
- Nationality

CRITICAL INSTRUCTIONS:
1. If a piece of information is not explicitly stated in the resume, set that field to null in the JSON output.
2. Do NOT invent, assume, or generate ANY information.
3. Do NOT use placeholder or dummy data under any circumstances.
4. Accuracy is paramount. It's mandatory to use null for any missing information.
5. For lists (like skills or languages), use an empty list [] if no items are found.
6. For nested objects (like experience or education), include them only if information is available.

Resume: {}

Provide the extracted information in the following JSON format:

{{
    "name": null,
    "email": null,
    "phone": null,
    "profile_summary": null,
    "languages": [],
    "skills": [],
    "date_of_birth": null,
    "nationality": null,
    "experience": [
        {{
            "company": null,
            "role": null,
            "start_date": null,
            "end_date": null,
            "responsibilities": null
        }}
    ],
    "education": [
        {{
            "institution": null,
            "degree": null,
            "start_date": null,
            "end_date": null
        }}
    ]
}}

FINAL CHECKLIST:
- Have you used null for ALL missing information?
- Are ALL fields either filled with accurate data from the resume or set to null?
- Have you refrained from adding ANY assumed or generated information?
- Are lists empty [] if no items are found?
- Are nested objects only included if information is available?

Remember: Your task is solely to extract and report information that is explicitly present in the resume. Do not attempt to fill gaps or make the output look complete.
)r	   Ú
ai_serviceÚ
cv_storageÚprompt_template)Úselfs    ú:/var/www/html/drjob-dev/drjob-ai/services/resume_parser.pyÚ__init__zResumeParserService.__init__
   s   € ä#›+ˆŒØˆŒð? ˆÕó    Úfile_contentÚreturnc                 óò   — 	 t        |«      }t        |«      }d}|j                  D ]  }||j                  «       dz   z  }Œ |j	                  «       S # t
        $ r}t        dt        |«      › «      ‚d}~ww xY w)zExtract text from PDF fileÚ Ú
zError reading PDF: N)r   r   ÚpagesÚextract_textÚstripÚ	ExceptionÚstr)r   r   Úpdf_fileÚreaderÚtextÚpageÚes          r   Úextract_text_from_pdfz)ResumeParserService.extract_text_from_pdfO   s~   € ð	<Ü˜|Ó,ˆHÜ˜xÓ(ˆFØˆDØŸ™ò 3Ø˜×)Ñ)Ó+¨dÑ2Ñ2‘ð3à—:‘:“<ÐøÜò 	<ÜÐ1´#°a³&°Ð:Ó;Ð;ûð	<ús   ‚AA Á	A6ÁA1Á1A6c                 óê   — 	 t        |«      }t        |«      }d}|j                  D ]  }||j                  dz   z  }Œ |j	                  «       S # t
        $ r}t        dt        |«      › «      ‚d}~ww xY w)zExtract text from Word documentr   r   zError reading Word document: N)r   r   Ú
paragraphsr!   r   r   r   )r   r   Údoc_fileÚdocr!   Ú	paragraphr#   s          r   Úextract_text_from_docxz*ResumeParserService.extract_text_from_docx[   s|   € ð	FÜ˜|Ó,ˆHÜ˜8Ó$ˆCØˆDØ Ÿ^™^ò .	Ø˜	Ÿ™¨Ñ-Ñ-‘ð.à—:‘:“<ÐøÜò 	FÜÐ;¼CÀ»F¸8ÐDÓEÐEûð	Fús   ‚A
A Á	A2ÁA-Á-A2Úfilenamec                 óØ   — |j                  «       j                  d«      r| j                  |«      S |j                  «       j                  d«      r| j                  |«      S t	        d«      ‚)zExtract text based on file typez.pdf)z.docxz.doczAUnsupported file type. Only PDF and Word documents are supported.)ÚlowerÚendswithr$   r*   r   )r   r   r+   s      r   Úextract_text_from_filez*ResumeParserService.extract_text_from_fileg   s[   € à>‰>Ó×$Ñ$ VÔ,Ø×-Ñ-¨lÓ;Ð;Ø^‰^Ó×&Ñ&Ð'8Ô9Ø×.Ñ.¨|Ó<Ð<äÐ_Ó`Ð`r   Úresume_textc                 óØ  — 	 | j                   j                  |«      }| j                  j                  |«      }	 |j	                  d«      }|j                  d«      dz   }|dk7  r!|dk7  r||| }t        j                  |«      }|S t        d«      ‚# t        j                  t        f$ r}dt        |«      › |dœcY d	}~S d	}~ww xY w# t        $ r}d
dt        |«      › icY d	}~S d	}~ww xY w)z5Parse resume text using AI and return structured dataú{ú}é   éÿÿÿÿr   zNo valid JSON found in responsez%Failed to parse AI response as JSON: )ÚerrorÚraw_responseNr6   zError parsing resume: )r   Úformatr   Úai_responseÚfindÚrfindÚjsonÚloadsÚ
ValueErrorÚJSONDecodeErrorr   r   )	r   r0   Úpromptr9   Ú	start_idxÚend_idxÚjson_strÚparsed_datar#   s	            r   Úparse_resumez ResumeParserService.parse_resumep   sø   € ð	à×)Ñ)×0Ñ0°Ó=ˆFð Ÿ/™/×5Ñ5°fÓ=ˆKðà'×,Ñ,¨SÓ1	Ø%×+Ñ+¨CÓ0°1Ñ4Ø ’? w°!¢|Ø*¨9°WÐ=HÜ"&§*¡*¨XÓ"6KØ&Ð&ä$Ð%FÓGÐGøÜ×(Ñ(¬*Ð5ò àDÄSÈÃVÀHÐMØ$/ñõ ûðûô ò 	àÐ1´#°a³&°Ð:ðõ ûð	úsM   ‚6C ¹A
B ÂB ÂCÂ(B>Â8CÂ9C Â>CÃC Ã	C)ÃC$ÃC)Ã$C)NÚ
files_dataÚstore_in_dbÚ
job_emp_idc                 ó®  — g }|D ]š  \  }}	 | j                  ||«      }| j                  |«      }|t        |«      |dœ}	|rO|j                  d«      s>| j                  €t        «       | _        | j                  j                  ||||«      }
|
|	d<   |j                  |	«       Œœ |S # t        $ r'}|j                  |t        |«      dœ«       Y d}~ŒÊd}~ww xY w)z<Parse multiple resume files and optionally store in database)r+   Úextracted_text_lengthrD   r6   NÚstorage_result)r+   r6   )
r/   rE   ÚlenÚgetr   r
   Ústore_complete_cv_dataÚappendr   r   )r   rF   rG   rH   Úresultsr   r+   r0   rD   ÚresultrK   r#   s               r   Ú parse_and_store_multiple_resumesz4ResumeParserService.parse_and_store_multiple_resumes   sñ   € àˆà&0ò 	Ñ"ˆL˜(ðà"×9Ñ9¸,ÈÓQð #×/Ñ/°Ó<ð !)Ü-0°Ó-=Ø#.ññ  {§¡°wÔ'?Ø—‘Ð.Ü*:Ó*<˜œð &*§_¡_×%KÑ%KØ  +¨{¸Jó&Nð 0>FÐ+Ñ,à—‘˜vÕ&ð3	ðB ˆøô ò Ø—‘Ø (Ü  ›Vñ ÷ ñ ûðús   ŒBB$Â$	CÂ-CÃCc                 ó*   — | j                  |dd¬«      S )zFParse multiple resume files (legacy method for backward compatibility)FN)rG   rH   )rR   )r   rF   s     r   Úparse_multiple_resumesz*ResumeParserService.parse_multiple_resumes¶   s   € à×4Ñ4°ZÈUÐ_cÐ4ÓdÐdr   )TN)Ú__name__Ú
__module__Ú__qualname__r   Úbytesr   r$   r*   r/   r   r   rE   r   ÚtupleÚboolrR   rT   © r   r   r   r   	   sê   „ òCðJ
<°%ð 
<¸Có 
<ð
F°5ð 
F¸Só 
Fða°5ð aÀCð aÈCó að¨ð °°S¸#°X±ó ñ>%¸4À¹;ð %ÐUYð %Ðnqð %ð  ~Bð  CGð  HKð  MPð  HPñ  CQñ  ~Ró %ðNe°°e±ð eÀÀdÈ3ÐPSÈ8ÁnÑAUô er   r   )r<   Útypingr   r   r   ÚPyPDF2r   Údocxr   Úior   Úservices.ai_servicer	   Úservices.cv_storager
   r   r[   r   r   ú<module>rb      s+   ðÛ ß "Ñ "Ý Ý Ý Ý )Ý 0÷oeò oer   