from opensearchpy import OpenSearch
from opensearchpy.helpers import bulk
import uuid
import os
from dotenv import load_dotenv
from .mongo_service import mongo_service
from .embeddings import Embedder
from typing import List, Dict, Any, Optional
import logging

# Load environment variables
load_dotenv()

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class OpenSearchService:
    def __init__(self):
        # Get OpenSearch configuration from environment variables
        opensearch_host = os.getenv("OPENSEARCH_HOST")
        opensearch_port = int(os.getenv("OPENSEARCH_PORT", "443"))
        opensearch_user = os.getenv("OPENSEARCH_USER")
        opensearch_password = os.getenv("OPENSEARCH_PASSWORD")
        opensearch_use_ssl = os.getenv("OPENSEARCH_USE_SSL", "true").lower() == "true"
        
        if not all([opensearch_host, opensearch_user, opensearch_password]):
            raise ValueError("OpenSearch configuration not complete in environment variables")
        
        self.client = OpenSearch(
            hosts=[
                {
                    "host": opensearch_host,
                    "port": opensearch_port,
                }
            ],
            http_auth=(opensearch_user, opensearch_password),
            use_ssl=opensearch_use_ssl,
        )
        self.index = os.getenv("OPENSEARCH_INDEX", "resumes")
        self.mongo_service = mongo_service
        self.embedder = Embedder()  # Initialize embedder for Qdrant integration

    def search(self, text: str, top_k=50):
        """Search resumes using multi-match query"""
        query = {
            "size": top_k,
            "query": {
                "multi_match": {
                    "query": text,
                    "fields": [
                        "email",
                        "name", 
                        "phone",
                        "languages",
                        "date_of_birth",
                        "nationality",
                        "profile_summary",
                        "skills",
                        "experience_text",      # Use text versions for search
                        "education_text",
                        "courses_text",
                        "projects_text",
                        "certifications_text",
                        "country_name",
                        "country_header_code"
                    ],
                }
            },
        }

        response = self.client.search(index=self.index, body=query)
        hits = response["hits"]["hits"]

        return [
            {"resume_id": hit["_source"]["resume_id"], "keyword_score": hit["_score"]}
            for hit in hits
        ]

    def search_with_country_filter(self, query: str, country_header_code: str, top_k: int = 50):
        """
        Search resumes in OpenSearch with mandatory country filtering for screening type 2
        
        Args:
            query: Search query text
            country_header_code: Country header code that must match (e.g., 'ae', 'bd', 'in')
            top_k: Maximum number of results to return
            
        Returns:
            List of search results with resume_id and keyword_score, filtered by country
        """
        try:
            if not query.strip():
                logger.warning("Empty query provided to OpenSearch country search")
                return []
                
            if not country_header_code:
                logger.warning("No country_header_code provided - falling back to regular search")
                return self.search(query, top_k)
            
            # Build query with mandatory country filter
            search_body = {
                "size": top_k,
                "query": {
                    "bool": {
                        "must": [
                            {
                                "multi_match": {
                                    "query": query,
                                    "fields": [
                                        "email",
                                        "name", 
                                        "phone",
                                        "languages",
                                        "date_of_birth",
                                        "nationality",
                                        "profile_summary^2",  # Higher weight for profile summary
                                        "skills^2",           # Higher weight for skills
                                        "experience_text^1.5", # Higher weight for experience
                                        "education_text",
                                        "courses_text",
                                        "projects_text",
                                        "certifications_text",
                                        "country_name"
                                    ],
                                    "type": "best_fields",
                                    "fuzziness": "AUTO"
                                }
                            },
                            {
                                "term": {
                                    "country_header_code": country_header_code.lower()
                                }
                            }
                        ]
                    }
                },
                "_source": ["resume_id"]
            }
            
            response = self.client.search(index=self.index, body=search_body)
            hits = response["hits"]["hits"]
            
            results = [
                {"resume_id": hit["_source"]["resume_id"], "keyword_score": hit["_score"]}
                for hit in hits
            ]
            
            logger.info(f"OpenSearch country search found {len(results)} results for country: {country_header_code}")
            return results
            
        except Exception as e:
            logger.error(f"Error in OpenSearch country search: {str(e)}")
            return []

    def _serialize_document(self, obj):
        """Convert MongoDB ObjectIds and datetime objects to JSON serializable format"""
        from bson import ObjectId
        from datetime import datetime
        
        if isinstance(obj, ObjectId):
            return str(obj)
        elif isinstance(obj, datetime):
            return obj.isoformat()
        elif isinstance(obj, dict):
            return {k: self._serialize_document(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [self._serialize_document(item) for item in obj]
        else:
            return obj

    def get_jobseeker_data_with_joins(self, jobseeker_id: str) -> Optional[Dict[str, Any]]:
        """Get jobseeker data with all related tables joined using optimized mongo_service aggregation"""
        try:
            # Use optimized aggregation method from mongo_service
            complete_data = self.mongo_service.get_jobseeker_complete_data(jobseeker_id)
            # print(complete_data)
            # exit()
            if not complete_data:
                logger.warning(f"Jobseeker not found: {jobseeker_id}")
                return None
            
            # Extract data from aggregation result
            basic_details = complete_data.get("basic_details", {}) or {}
            course_details = complete_data.get("course_details", []) or []
            education_details = complete_data.get("education_details", []) or []
            employment_details = complete_data.get("employment_details", []) or []
            project_details = complete_data.get("project_details", []) or []
            certification_details = complete_data.get("certification_details", []) or []
            country_name = complete_data.get("country_name", "") or ""
            country_header_code = complete_data.get("country_header_code", "") or ""

            # Build the complete document for indexing
            document = {
                "resume_id": str(jobseeker_id),
                "email": complete_data.get("email") or complete_data.get("secondary_email", ""),
                "first_name": basic_details.get("first_name") or complete_data.get("first_name", ""),
                "last_name": basic_details.get("last_name") or complete_data.get("last_name", ""),
                "phone": complete_data.get("mobile_number") or complete_data.get("secondary_mobile_number") or basic_details.get("phone", ""),
                "known_languages": basic_details.get("known_languages") or complete_data.get("known_languages", ""),
                "languages": basic_details.get("languages") or complete_data.get("languages", ""),
                "date_of_birth": basic_details.get("birth_date") or complete_data.get("birth_date") or basic_details.get("date_of_birth", ""),
                "nationality": basic_details.get("nationality") or complete_data.get("nationality", ""),
                "profile_summary": basic_details.get("profile_summary") or complete_data.get("profile_summary", ""),
                "country_name": country_name,
                "country_header_code": country_header_code,
                "skills": basic_details.get("key_skills") or self._extract_skills(employment_details, project_details),
                #"skills": self._extract_skills(employment_details, project_details),
                "experience": employment_details,  # Send as array of objects instead of string
                "education": education_details,    # Send as array of objects instead of string
                "courses": course_details,         # Send as array of objects instead of string
                "projects": project_details,       # Send as array of objects instead of string
                "certifications": certification_details,  # Send as array of objects instead of string
                # Add searchable text versions for search
                "experience_text": self._format_experience(employment_details),
                "education_text": self._format_education(education_details),
                "courses_text": self._format_courses(course_details),
                "projects_text": self._format_projects(project_details),
                "certifications_text": self._format_certifications(certification_details),
                "jobseeker_data": {
                    "jobseeker_id": jobseeker_id,
                    "status": complete_data.get("status"),
                    "indexed": complete_data.get("indexed"),
                    "country_id": complete_data.get("country_id"),
                    "basic_details": basic_details,
                    "course_details": course_details,
                    "education_details": education_details,
                    "employment_details": employment_details,
                    "project_details": project_details,
                    "certification_details": certification_details,
                    "country_name": country_name,
                    "country_header_code": country_header_code
                }
            }

            # Serialize ObjectIds and datetime objects for OpenSearch compatibility
            serialized_document = self._serialize_document(document)
            return serialized_document

        except Exception as e:
            logger.error(f"Error getting jobseeker data for {jobseeker_id}: {str(e)}")
            return None

    def _extract_skills(self, employment_details: List[Dict], project_details: List[Dict]) -> str:
        """Extract skills from employment and project details"""
        skills = []
        
        # Extract from employment
        for emp in employment_details:
            if emp.get('skills'):
                skills.extend(emp['skills'] if isinstance(emp['skills'], list) else [emp['skills']])
        
        # Extract from projects
        for proj in project_details:
            if proj.get('skills'):
                skills.extend(proj['skills'] if isinstance(proj['skills'], list) else [proj['skills']])
            if proj.get('technologies'):
                skills.extend(proj['technologies'] if isinstance(proj['technologies'], list) else [proj['technologies']])

        return " ".join(set(skills))

    def _format_experience(self, employment_details: List[Dict]) -> str:
        """Format employment experience for search"""
        experience_text = []
        for emp in employment_details:
            exp_parts = []
            if emp.get('designation'):
                exp_parts.append(emp['designation'])
            if emp.get('company_name'):
                exp_parts.append(emp['company_name'])
            if emp.get('job_description'):
                exp_parts.append(emp['job_description'])
            if exp_parts:
                experience_text.append(" ".join(exp_parts))
        
        return " ".join(experience_text)

    def _format_education(self, education_details: List[Dict]) -> str:
        """Format education details for search"""
        education_text = []
        for edu in education_details:
            edu_parts = []
            if edu.get('degree'):
                edu_parts.append(edu['degree'])
            if edu.get('institution'):
                edu_parts.append(edu['institution'])
            if edu.get('field_of_study'):
                edu_parts.append(edu['field_of_study'])
            if edu_parts:
                education_text.append(" ".join(edu_parts))
        
        return " ".join(education_text)

    def _format_courses(self, course_details: List[Dict]) -> str:
        """Format course details for search"""
        course_text = []
        for course in course_details:
            course_parts = []
            if course.get('course_name'):
                course_parts.append(course['course_name'])
            if course.get('institution'):
                course_parts.append(course['institution'])
            if course.get('description'):
                course_parts.append(course['description'])
            if course_parts:
                course_text.append(" ".join(course_parts))
        
        return " ".join(course_text)

    def _format_projects(self, project_details: List[Dict]) -> str:
        """Format project details for search"""
        project_text = []
        for project in project_details:
            proj_parts = []
            if project.get('project_name'):
                proj_parts.append(project['project_name'])
            if project.get('description'):
                proj_parts.append(project['description'])
            if project.get('role'):
                proj_parts.append(project['role'])
            if proj_parts:
                project_text.append(" ".join(proj_parts))
        
        return " ".join(project_text)

    def _format_certifications(self, certification_details: List[Dict]) -> str:
        """Format certification details for search"""
        cert_text = []
        for cert in certification_details:
            cert_parts = []
            if cert.get('certification_name'):
                cert_parts.append(cert['certification_name'])
            if cert.get('issuing_organization'):
                cert_parts.append(cert['issuing_organization'])
            if cert.get('description'):
                cert_parts.append(cert['description'])
            if cert_parts:
                cert_text.append(" ".join(cert_parts))
        
        return " ".join(cert_text)

    def add_resume(self, jobseeker_id: str) -> bool:
        """Add a single resume to the index using jobseeker_id and store in Qdrant"""
        try:
            document = self.get_jobseeker_data_with_joins(jobseeker_id)
            if not document:
                logger.error(f"Could not get data for jobseeker: {jobseeker_id}")
                return False

            # Index in OpenSearch
            response = self.client.index(index=self.index, id=jobseeker_id, body=document)
            
            # Store in Qdrant vector database
            self._store_in_qdrant(jobseeker_id, document)
            
            # Update indexed status using mongo_service
            try:
                jobseeker_id_int = int(jobseeker_id)
            except ValueError:
                logger.error(f"Invalid jobseeker_id format: {jobseeker_id}")
                return False
                
            self.mongo_service.get_collection('jobseekers').update_one(
                {"id": jobseeker_id_int},  # Use 'id' field and integer value
                {"$set": {"indexed": 1}}
            )
            
            logger.info(f"Successfully indexed jobseeker: {jobseeker_id}")
            return True

        except Exception as e:
            logger.error(f"Error indexing jobseeker {jobseeker_id}: {str(e)}")
            return False

    def bulk_index_jobseekers(self, batch_size: int = 25, max_total: Optional[int] = None) -> Dict[str, int]:
        """Bulk index jobseekers with status=1 and indexed=0 using optimized mongo_service methods
        
        Args:
            batch_size: Number of jobseekers to process in each batch
            max_total: Maximum total number of jobseekers to index (None = index all)
        """
        try:
            if not self.mongo_service.is_connected():
                logger.error("MongoDB not connected")
                return {"total": 0, "indexed": 0, "failed": 0, "error": "Database not connected"}

            # Skip slow count operation and start processing directly
            logger.info(f"Starting bulk indexing with batch_size={batch_size}, max_total={max_total}")
            
            indexed_count = 0
            failed_count = 0
            processed_count = 0
            
            # Process in batches until we reach max_total or no more data
            skip = 0
            while True:
                # Stop if we've reached max_total
                if max_total is not None and processed_count >= max_total:
                    logger.info(f"Reached max_total limit of {max_total}")
                    break
                
                # Calculate batch size for this iteration
                if max_total is not None:
                    remaining = max_total - processed_count
                    current_batch_size = min(batch_size, remaining)
                else:
                    current_batch_size = batch_size
                
                # Use optimized batch retrieval
                jobseekers = self.mongo_service.get_unindexed_jobseekers_batch(skip, current_batch_size)
                
                if not jobseekers:
                    logger.info("No more unindexed jobseekers found")
                    break

                actions = []
                jobseeker_ids_to_update = []
                
                for jobseeker in jobseekers:
                    # Stop if we've reached the max_total limit
                    if max_total is not None and processed_count >= max_total:
                        break
                        
                    jobseeker_id = str(jobseeker['id'])  # Use 'id' field instead of '_id'
                    document = self.get_jobseeker_data_with_joins(jobseeker_id)
                    
                    if document:
                        action = {
                            "_index": self.index,
                            "_id": jobseeker_id,
                            "_source": document
                        }
                        actions.append(action)
                        jobseeker_ids_to_update.append(jobseeker_id)
                        
                        # Store in Qdrant vector database
                        self._store_in_qdrant(jobseeker_id, document)
                        
                    else:
                        failed_count += 1
                    
                    processed_count += 1  # Increment processed count

                # Bulk index to OpenSearch
                if actions:
                    try:
                        success_count, errors = bulk(self.client, actions)
                        indexed_count += success_count
                        
                        # Update indexed status using optimized method
                        if jobseeker_ids_to_update:
                            update_success = self.mongo_service.update_jobseekers_indexed_status(jobseeker_ids_to_update)
                            if not update_success:
                                logger.warning(f"Failed to update indexed status for batch starting at {skip}")
                        
                        if errors:
                            failed_count += len(errors)
                            logger.error(f"Bulk index errors: {errors}")
                        
                    except Exception as e:
                        failed_count += len(actions)
                        logger.error(f"Bulk index failed: {str(e)}")

                skip += len(jobseekers)  # Use actual batch size returned
                logger.info(f"Processed {processed_count} jobseekers so far...")

            result = {
                "total": processed_count,
                "indexed": indexed_count,
                "failed": failed_count
            }
            
            logger.info(f"Bulk indexing completed: {result}")
            return result

        except Exception as e:
            logger.error(f"Error in bulk indexing: {str(e)}")
            return {"total": 0, "indexed": 0, "failed": 0, "error": str(e)}

    def reindex_jobseeker(self, jobseeker_id: str) -> bool:
        """Reindex a specific jobseeker (set indexed=0 and then index)"""
        try:
            # Convert to int for database query
            try:
                jobseeker_id_int = int(jobseeker_id)
            except ValueError:
                logger.error(f"Invalid jobseeker_id format: {jobseeker_id}")
                return False
            
            # Reset indexed status using mongo_service
            self.mongo_service.get_collection('jobseekers').update_one(
                {"id": jobseeker_id_int},  # Use 'id' field and integer value
                {"$set": {"indexed": 0}}
            )
            
            # Index the jobseeker
            return self.add_resume(jobseeker_id)
            
        except Exception as e:
            logger.error(f"Error reindexing jobseeker {jobseeker_id}: {str(e)}")
            return False

    def delete_resume(self, jobseeker_id: str) -> bool:
        """Delete a resume from the index"""
        try:
            # Convert to int for database query first
            try:
                jobseeker_id_int = int(jobseeker_id)
            except ValueError:
                logger.error(f"Invalid jobseeker_id format: {jobseeker_id}")
                return False
            
            try:
                response = self.client.delete(index=self.index, id=jobseeker_id)
                logger.info(f"Successfully deleted jobseeker from index: {jobseeker_id}")
            except Exception as e:
                # Check if it's a 404 error (document not found)
                if "404" in str(e) or "not_found" in str(e).lower():
                    logger.warning(f"Jobseeker {jobseeker_id} not found in index, but will update database status")
                else:
                    logger.error(f"Error deleting jobseeker {jobseeker_id} from index: {str(e)}")
                    return False
            
            # Update indexed status using mongo_service regardless of delete result
            self.mongo_service.get_collection('jobseekers').update_one(
                {"id": jobseeker_id_int},  # Use 'id' field and integer value
                {"$set": {"indexed": 0}}
            )
            
            return True
            
        except Exception as e:
            logger.error(f"Error deleting jobseeker {jobseeker_id}: {str(e)}")
            return False

    def _store_in_qdrant(self, jobseeker_id: str, document: dict):
        """Helper method to store resume data in Qdrant vector database"""
        try:
            # Create a combined text from various document fields for embedding
            text_parts = []
            
            # Add personal info
            if document.get('first_name'):
                text_parts.append(f"Name: {document['first_name']} {document.get('last_name', '')}")
            
            # Add profile summary
            if document.get('profile_summary'):
                text_parts.append(f"Profile: {document['profile_summary']}")
                
            # Add skills
            skills = document.get('skills')
            if skills:
                if isinstance(skills, list):
                    skills_text = ', '.join(skills)
                else:
                    skills_text = str(skills)
                if skills_text.strip():
                    text_parts.append(f"Skills: {skills_text}")
            
            # Add structured data sections
            for field, label in [
                ('experience_text', 'Experience'),
                ('education_text', 'Education'), 
                ('courses_text', 'Courses'),
                ('projects_text', 'Projects'),
                ('certifications_text', 'Certifications')
            ]:
                if document.get(field):
                    text_parts.append(f"{label}: {document[field]}")
            
            # Add structured arrays as fallback if text fields are empty
            if not text_parts:
                # Try experience array
                experience = document.get('experience', [])
                if isinstance(experience, list) and experience:
                    exp_texts = []
                    for exp in experience:
                        if isinstance(exp, dict):
                            title = exp.get('title', '')
                            company = exp.get('company', '')
                            if title or company:
                                exp_texts.append(f"{title} at {company}")
                    if exp_texts:
                        text_parts.append(f"Experience: {'; '.join(exp_texts)}")
                
                # Try education array
                education = document.get('education', [])
                if isinstance(education, list) and education:
                    edu_texts = []
                    for edu in education:
                        if isinstance(edu, dict):
                            degree = edu.get('degree', '')
                            institution = edu.get('institution', '')
                            if degree or institution:
                                edu_texts.append(f"{degree} from {institution}")
                    if edu_texts:
                        text_parts.append(f"Education: {'; '.join(edu_texts)}")
                        
                # Try languages
                languages = document.get('languages')
                if languages:
                    if isinstance(languages, list):
                        lang_text = ', '.join(str(lang) for lang in languages)
                    else:
                        lang_text = str(languages)
                    if lang_text.strip():
                        text_parts.append(f"Languages: {lang_text}")
            
            combined_text = " ".join(text_parts)
            
            # Extract and prepare detailed payload information
            payload_data = {
                'country_name': document.get('country_name', '') or '',
                'country_header_code': document.get('country_header_code', '') or '',
                'email': document.get('email', '') or '',
                'first_name': document.get('first_name', '') or '',
                'last_name': document.get('last_name', '') or '',
                'profile_summary': document.get('profile_summary', '') or '',
                'skills': [],
                'experience_years': 0,
                'highest_education': '',
                'current_designation': '',
                'languages': []
            }
            
            # Parse skills
            skills = document.get('skills')
            if skills:
                if isinstance(skills, list):
                    payload_data['skills'] = [str(skill).strip() for skill in skills if str(skill).strip()]
                elif isinstance(skills, str):
                    payload_data['skills'] = [skill.strip() for skill in skills.split(',') if skill.strip()]
            
            # Extract experience years and current designation from experience
            experience = document.get('experience', [])
            if isinstance(experience, list) and experience:
                # Get current/latest designation
                for exp in experience:
                    if isinstance(exp, dict) and exp.get('title'):
                        payload_data['current_designation'] = exp['title']
                        break
            
            # Extract highest education
            education = document.get('education', [])
            if isinstance(education, list) and education:
                for edu in education:
                    if isinstance(edu, dict) and edu.get('degree'):
                        payload_data['highest_education'] = edu['degree']
                        break
            
            # Parse languages
            languages = document.get('languages')
            if languages:
                if isinstance(languages, list):
                    payload_data['languages'] = [str(lang).strip() for lang in languages if str(lang).strip()]
                elif isinstance(languages, str):
                    payload_data['languages'] = [lang.strip() for lang in languages.split(',') if lang.strip()]
            
            if combined_text.strip():
                # Store in Qdrant using embedder with detailed payload
                return self.embedder.store_in_qdrant_with_details(jobseeker_id, combined_text, None, payload_data)
            else:
                logger.warning(f"No text content found for jobseeker {jobseeker_id} to store in Qdrant")
                # Store minimal info as fallback
                fallback_text = f"Resume ID: {jobseeker_id}"
                if document.get('email'):
                    fallback_text += f", Email: {document['email']}"
                if document.get('nationality'):
                    fallback_text += f", Nationality: {document['nationality']}"
                if document.get('country_name'):
                    fallback_text += f", Country: {document['country_name']}"
                if document.get('country_header_code'):
                    fallback_text += f", Country Code: {document['country_header_code']}"
                    
                return self.embedder.store_in_qdrant_with_details(jobseeker_id, fallback_text, None, payload_data)
                
        except Exception as e:
            logger.error(f"Error storing jobseeker {jobseeker_id} in Qdrant: {str(e)}")
            return False