# download_embeddings.py
"""
This script connects to the FranAI backend, downloads the latest InsightFace
embeddings for all registered users, and saves them into the local
`insightface_embeddings` directory.

This is designed to be run as a scheduled task (e.g., a cron job or via
Windows Task Scheduler) to keep the standalone application's local data
in sync with the server.
"""

import os
import requests
import pickle
import numpy as np
from dotenv import load_dotenv
from collections import defaultdict
import datetime

# --- Configuration ---
# Construct the absolute path to the .env file, relative to the script's location
script_dir = os.path.dirname(os.path.abspath(__file__))
ENV_FILE_PATH = os.path.join(script_dir, ".env")
EMBEDDINGS_DIR = os.path.join(script_dir, "insightface_embeddings")

def download_and_cache_embeddings():
    """
    Connects to the backend and saves the latest embeddings locally.
    """
    print("--- Starting Embedding Synchronization ---")
    
    # 1. Load configuration from .env file
    print(f"Attempting to load configuration from: {ENV_FILE_PATH}...")
    was_loaded = load_dotenv(dotenv_path=ENV_FILE_PATH)
    
    if was_loaded:
        print("✅ .env file loaded successfully.")
    else:
        print("⚠️ WARNING: .env file not found or is empty. Using default values.")
    
    # Use the production backend URL for syncing
    backend_url = os.getenv("BACKEND_URL_PRODUCTION")
    if not backend_url:
        print("❌ ERROR: BACKEND_URL_PRODUCTION not found in .env file. Cannot proceed.")
        return

    print(f"Target backend: {backend_url}")

    # 2. Scan for existing local embeddings
    os.makedirs(EMBEDDINGS_DIR, exist_ok=True)
    print("Scanning for existing local embeddings...")
    existing_user_ids = set()
    for filename in os.listdir(EMBEDDINGS_DIR):
        if filename.startswith('insightface_embeddings_') and filename.endswith('.pkl'):
            try:
                user_id = filename.split('_')[-1].split('.')[0]
                existing_user_ids.add(user_id)
            except IndexError:
                continue  # Skip malformed filenames
    print(f"Found {len(existing_user_ids)} existing local user embedding files.")

    # 3. Fetch embeddings from the server
    embeddings_url = f"{backend_url}/insightface/embeddings"
    print(f"Fetching latest embeddings from {embeddings_url}...")
    
    try:
        # Disable SSL verification for environments with self-signed certs
        response = requests.get(embeddings_url, timeout=20, verify=False)
        response.raise_for_status()  # Raises an HTTPError for bad responses (4xx or 5xx)
        
        data = response.json()
        if not data.get('success') or 'embeddings' not in data:
            print(f"❌ ERROR: The server responded successfully, but the data format is incorrect.")
            print(f"Server response: {data}")
            return

        embeddings = data['embeddings']
        names = data['names']
        ids = data['ids']
        
        print(f"✅ Successfully downloaded {len(embeddings)} total embeddings for {len(set(ids))} users.")

    except requests.exceptions.RequestException as e:
        print(f"❌ NETWORK ERROR: Could not connect to the backend server: {e}")
        print("Please ensure the server is running and accessible.")
        return
    except json.JSONDecodeError:
        print("❌ ERROR: Failed to parse the server's response. It was not valid JSON.")
        return

    # 4. Group embeddings by user ID
    user_embeddings = defaultdict(list)
    for user_id, embedding in zip(ids, embeddings):
        user_embeddings[user_id].append(np.array(embedding))

    # 5. Save embeddings for NEW users only
    saved_count = 0
    skipped_count = 0
    for user_id, embs in user_embeddings.items():
        if str(user_id) in existing_user_ids:
            skipped_count += 1
            continue  # Skip if the user's embedding file already exists

        try:
            embedding_data = {
                'user_id': user_id,
                'embeddings': embs,
                'created_at': datetime.datetime.now().isoformat(),
                'image_count': len(embs)
            }
            
            output_filename = os.path.join(EMBEDDINGS_DIR, f"insightface_embeddings_{user_id}.pkl")
            with open(output_filename, 'wb') as f:
                pickle.dump(embedding_data, f)
            
            print(f"  -> Saved {len(embs)} embeddings for NEW user {user_id} to {output_filename}")
            saved_count += 1
        except Exception as e:
            print(f"  -> ⚠️ FAILED to save embeddings for user {user_id}: {e}")

    print(f"✅ Synchronization complete. Saved data for {saved_count} new users. Skipped {skipped_count} existing users.")
    print("--- Embedding Synchronization Finished ---")


if __name__ == "__main__":
    import time
    import os

    # --- Enhanced Debugging for .env loading ---
    print("--- Initializing Scheduler ---")
    script_dir = os.path.dirname(os.path.abspath(__file__))
    env_file_path = os.path.join(script_dir, ".env")
    print(f"1. Looking for .env file at: {env_file_path}")

    if os.path.exists(env_file_path):
        print("2. SUCCESS: .env file found.")
        load_dotenv(dotenv_path=env_file_path)
    else:
        print("2. FAILURE: .env file NOT found at the specified path.")

    # --- Interval Loading with Debugging ---
    interval_minutes_raw = os.getenv("EMBEDDING_SYNC_INTERVAL_MINUTES")
    print(f"3. Raw value from getenv('EMBEDDING_SYNC_INTERVAL_MINUTES'): {interval_minutes_raw} (Type: {type(interval_minutes_raw)})")

    try:
        # Use 15 as default if the raw value is None (not found)
        interval_minutes = int(interval_minutes_raw or 15)
    except (ValueError, TypeError):
        print("⚠️ WARNING: Invalid value for interval. Using default of 15 minutes.")
        interval_minutes = 15
    
    print(f"4. Final interval calculated: {interval_minutes} minutes.")
    interval_seconds = interval_minutes * 60
    # --- End of Debugging ---

    try:
        while True:
            download_and_cache_embeddings()
            print(f"--- Sync complete. Waiting for {interval_minutes} minutes before the next sync... ---")
            print("(Press Ctrl+C to stop)")
            time.sleep(interval_seconds)
    except KeyboardInterrupt:
        print("\n--- Scheduler stopped by user. Goodbye! ---")
