
    ڵ1i<                     p    d dl Z d dlmZmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ  G d d	      Zy)
    N)ListDictAny)	PdfReader)Document)BytesIO)	AIService)CVStorageServicec                       e Zd Zd ZdedefdZdedefdZdededefdZdede	ee
f   fd	Zddee   dededee	ee
f      fdZdee   dee	ee
f      fdZy
)ResumeParserServicec                 >    t               | _        d| _        d| _        y)z0Initialize resume parser service with AI serviceNa  
You are a precise resume parser assistant. Extract ONLY the information that is EXPLICITLY stated in the given resume. Your primary goal is accuracy, not completeness.

Extract the following fields:
- Name
- Email
- Phone number
- Profile summary
- Languages
- Skills
- Experience
- Education
- Date of birth
- Nationality

CRITICAL INSTRUCTIONS:
1. If a piece of information is not explicitly stated in the resume, set that field to null in the JSON output.
2. Do NOT invent, assume, or generate ANY information.
3. Do NOT use placeholder or dummy data under any circumstances.
4. Accuracy is paramount. It's mandatory to use null for any missing information.
5. For lists (like skills or languages), use an empty list [] if no items are found.
6. For nested objects (like experience or education), include them only if information is available.

Resume: {}

Provide the extracted information in the following JSON format:

{{
    "name": null,
    "email": null,
    "phone": null,
    "profile_summary": null,
    "languages": [],
    "skills": [],
    "date_of_birth": null,
    "nationality": null,
    "experience": [
        {{
            "company": null,
            "role": null,
            "start_date": null,
            "end_date": null,
            "responsibilities": null
        }}
    ],
    "education": [
        {{
            "institution": null,
            "degree": null,
            "start_date": null,
            "end_date": null
        }}
    ]
}}

FINAL CHECKLIST:
- Have you used null for ALL missing information?
- Are ALL fields either filled with accurate data from the resume or set to null?
- Have you refrained from adding ANY assumed or generated information?
- Are lists empty [] if no items are found?
- Are nested objects only included if information is available?

Remember: Your task is solely to extract and report information that is explicitly present in the resume. Do not attempt to fill gaps or make the output look complete.
)r	   
ai_service
cv_storageprompt_template)selfs    :/var/www/html/drjob-dev/drjob-ai/services/resume_parser.py__init__zResumeParserService.__init__
   s    #+?     file_contentreturnc                     	 t        |      }t        |      }d}|j                  D ]  }||j                         dz   z  } |j	                         S # t
        $ r}t        dt        |             d}~ww xY w)zExtract text from PDF file 
zError reading PDF: N)r   r   pagesextract_textstrip	Exceptionstr)r   r   pdf_filereadertextpagees          r   extract_text_from_pdfz)ResumeParserService.extract_text_from_pdfO   s~    	<|,Hx(FD 3))+d223::< 	<1#a&:;;	<s   AA 	A6A11A6c                     	 t        |      }t        |      }d}|j                  D ]  }||j                  dz   z  } |j	                         S # t
        $ r}t        dt        |             d}~ww xY w)zExtract text from Word documentr   r   zError reading Word document: N)r   r   
paragraphsr!   r   r   r   )r   r   doc_filedocr!   	paragraphr#   s          r   extract_text_from_docxz*ResumeParserService.extract_text_from_docx[   s|    	F|,H8$CD ^^ .		--.::< 	F;CF8DEE	Fs   A
A 	A2A--A2filenamec                     |j                         j                  d      r| j                  |      S |j                         j                  d      r| j                  |      S t	        d      )zExtract text based on file typez.pdf)z.docxz.doczAUnsupported file type. Only PDF and Word documents are supported.)lowerendswithr$   r*   r   )r   r   r+   s      r   extract_text_from_filez*ResumeParserService.extract_text_from_fileg   s[    >>$$V,--l;;^^&&'89..|<<_``r   resume_textc                    	 | j                   j                  |      }| j                  j                  |      }	 |j	                  d      }|j                  d      dz   }|dk7  r!|dk7  r||| }t        j                  |      }|S t        d      # t        j                  t        f$ r}dt        |       |dcY d	}~S d	}~ww xY w# t        $ r}d
dt        |       icY d	}~S d	}~ww xY w)z5Parse resume text using AI and return structured data{}   r   zNo valid JSON found in responsez%Failed to parse AI response as JSON: )errorraw_responseNr6   zError parsing resume: )r   formatr   ai_responsefindrfindjsonloads
ValueErrorJSONDecodeErrorr   r   )	r   r0   promptr9   	start_idxend_idxjson_strparsed_datar#   s	            r   parse_resumez ResumeParserService.parse_resumep   s    	))00=F //55f=K',,S1	%++C014?w!|*9W=H"&**X"6K&&$%FGG((*5 DSVHM$/   	1#a&: 	sM   6C A
B B C(B>8C9C >CC 	C)C$C)$C)N
files_datastore_in_db
job_emp_idc                    g }|D ]  \  }}	 | j                  ||      }| j                  |      }|t        |      |d}	|rO|j                  d      s>| j                  t               | _        | j                  j                  ||||      }
|
|	d<   |j                  |	        |S # t        $ r'}|j                  |t        |      d       Y d}~d}~ww xY w)z<Parse multiple resume files and optionally store in database)r+   extracted_text_lengthrD   r6   Nstorage_result)r+   r6   )
r/   rE   lengetr   r
   store_complete_cv_dataappendr   r   )r   rF   rG   rH   resultsr   r+   r0   rD   resultrK   r#   s               r    parse_and_store_multiple_resumesz4ResumeParserService.parse_and_store_multiple_resumes   s    &0 	"L("99,Q #//< !)-0-=#. {w'?.*:*< &*__%K%K +{J&N 0>F+,v&3	B    ( V   s   BB$$	C-CCc                 *    | j                  |dd      S )zFParse multiple resume files (legacy method for backward compatibility)FN)rG   rH   )rR   )r   rF   s     r   parse_multiple_resumesz*ResumeParserService.parse_multiple_resumes   s    44ZU_c4ddr   )TN)__name__
__module____qualname__r   bytesr   r$   r*   r/   r   r   rE   r   tupleboolrR   rT    r   r   r   r   	   s    CJ
<% 
<C 
<
F5 
FS 
Fa5 aC aC a S#X >%4; %UY %nq %  ~B  CG  HK  MP  HP  CQ  ~R %Nee ed3PS8nAU er   r   )r<   typingr   r   r   PyPDF2r   docxr   ior   services.ai_servicer	   services.cv_storager
   r   r[   r   r   <module>rb      s+     " "    ) 0oe oer   