import json
from typing import List, Dict, Any
from PyPDF2 import PdfReader
from docx import Document
from io import BytesIO
from services.ai_service import AIService
from services.cv_storage import CVStorageService

class ResumeParserService:
    def __init__(self):
        """Initialize resume parser service with AI service"""
        self.ai_service = AIService()
        self.cv_storage = None  # Initialize only when needed
        self.prompt_template = """
You are a precise resume parser assistant. Extract ONLY the information that is EXPLICITLY stated in the given resume. Your primary goal is accuracy, not completeness.

Extract the following fields:
- Name
- Email
- Phone number
- Profile summary
- Languages
- Skills
- Experience
- Education
- Date of birth
- Nationality

CRITICAL INSTRUCTIONS:
1. If a piece of information is not explicitly stated in the resume, set that field to null in the JSON output.
2. Do NOT invent, assume, or generate ANY information.
3. Do NOT use placeholder or dummy data under any circumstances.
4. Accuracy is paramount. It's mandatory to use null for any missing information.
5. For lists (like skills or languages), use an empty list [] if no items are found.
6. For nested objects (like experience or education), include them only if information is available.

Resume: {}

Provide the extracted information in the following JSON format:

{{
    "name": null,
    "email": null,
    "phone": null,
    "profile_summary": null,
    "languages": [],
    "skills": [],
    "date_of_birth": null,
    "nationality": null,
    "experience": [
        {{
            "company": null,
            "role": null,
            "start_date": null,
            "end_date": null,
            "responsibilities": null
        }}
    ],
    "education": [
        {{
            "institution": null,
            "degree": null,
            "start_date": null,
            "end_date": null
        }}
    ]
}}

FINAL CHECKLIST:
- Have you used null for ALL missing information?
- Are ALL fields either filled with accurate data from the resume or set to null?
- Have you refrained from adding ANY assumed or generated information?
- Are lists empty [] if no items are found?
- Are nested objects only included if information is available?

Remember: Your task is solely to extract and report information that is explicitly present in the resume. Do not attempt to fill gaps or make the output look complete.
"""

    def extract_text_from_pdf(self, file_content: bytes) -> str:
        """Extract text from PDF file"""
        try:
            pdf_file = BytesIO(file_content)
            reader = PdfReader(pdf_file)
            text = ""
            for page in reader.pages:
                text += page.extract_text() + "\n"
            return text.strip()
        except Exception as e:
            raise Exception(f"Error reading PDF: {str(e)}")

    def extract_text_from_docx(self, file_content: bytes) -> str:
        """Extract text from Word document"""
        try:
            doc_file = BytesIO(file_content)
            doc = Document(doc_file)
            text = ""
            for paragraph in doc.paragraphs:
                text += paragraph.text + "\n"
            return text.strip()
        except Exception as e:
            raise Exception(f"Error reading Word document: {str(e)}")

    def extract_text_from_file(self, file_content: bytes, filename: str) -> str:
        """Extract text based on file type"""
        if filename.lower().endswith('.pdf'):
            return self.extract_text_from_pdf(file_content)
        elif filename.lower().endswith(('.docx', '.doc')):
            return self.extract_text_from_docx(file_content)
        else:
            raise Exception("Unsupported file type. Only PDF and Word documents are supported.")

    def parse_resume(self, resume_text: str) -> Dict[str, Any]:
        """Parse resume text using AI and return structured data"""
        try:
            # Create prompt with resume text
            prompt = self.prompt_template.format(resume_text)
            
            # Get AI response
            ai_response = self.ai_service.ai_response(prompt)
            
            # Try to parse JSON from AI response
            try:
                # Extract JSON from response (in case there's extra text)
                start_idx = ai_response.find('{')
                end_idx = ai_response.rfind('}') + 1
                if start_idx != -1 and end_idx != 0:
                    json_str = ai_response[start_idx:end_idx]
                    parsed_data = json.loads(json_str)
                    return parsed_data
                else:
                    raise ValueError("No valid JSON found in response")
            except (json.JSONDecodeError, ValueError) as e:
                return {
                    "error": f"Failed to parse AI response as JSON: {str(e)}",
                    "raw_response": ai_response
                }
        
        except Exception as e:
            return {
                "error": f"Error parsing resume: {str(e)}"
            }

    def parse_and_store_multiple_resumes(self, files_data: List[tuple], store_in_db: bool = True, job_emp_id: str = None) -> List[Dict[str, Any]]:
        """Parse multiple resume files and optionally store in database"""
        results = []
        
        for file_content, filename in files_data:
            try:
                # Extract text from file
                resume_text = self.extract_text_from_file(file_content, filename)
                
                # Parse resume
                parsed_data = self.parse_resume(resume_text)
                
                result = {
                    "filename": filename,
                    "extracted_text_length": len(resume_text),
                    "parsed_data": parsed_data
                }
                
                # Store in database if requested and parsing was successful
                if store_in_db and not parsed_data.get("error"):
                    if self.cv_storage is None:
                        self.cv_storage = CVStorageService()
                    
                    # Pass job_emp_id to storage method
                    storage_result = self.cv_storage.store_complete_cv_data(
                        filename, resume_text, parsed_data, job_emp_id
                    )
                    result["storage_result"] = storage_result
                
                results.append(result)
                
            except Exception as e:
                results.append({
                    "filename": filename,
                    "error": str(e)
                })
        
        return results

    def parse_multiple_resumes(self, files_data: List[tuple]) -> List[Dict[str, Any]]:
        """Parse multiple resume files (legacy method for backward compatibility)"""
        return self.parse_and_store_multiple_resumes(files_data, store_in_db=False, job_emp_id=None)