import os
import sys
import json
import numpy as np
import pickle
from datetime import datetime

# Add the project root to the Python path to resolve module imports
# This must be done before importing from the 'franai' module
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from franai.models.registered_user_model import RegisteredUser

def generate_dummy_users():
    """
    Generates 1000 dummy users, their individual embeddings in the correct format,
    an updated user_data.json file, and inserts them into the database.
    This script is safe to run multiple times; it will not overwrite any
    existing user data, database entries, or .pkl files.
    """
    num_users_to_generate = 1000
    output_dir = os.path.dirname(os.path.abspath(__file__))
    user_data_path = os.path.join(output_dir, "user_data.json")
    embeddings_dir = os.path.join(output_dir, "insightface_embeddings")

    # Create embeddings directory if it doesn't exist
    if not os.path.exists(embeddings_dir):
        os.makedirs(embeddings_dir)
        print(f"Created directory: {embeddings_dir}")

    # --- Load existing data and check for conflicts ---

    # 1. Scan for existing .pkl files to avoid overwriting
    print("Scanning for existing .pkl embedding files...")
    existing_pkl_ids = set()
    for filename in os.listdir(embeddings_dir):
        if filename.startswith("insightface_embeddings_") and filename.endswith(".pkl"):
            try:
                # Extract the ID part: "insightface_embeddings_420.pkl" -> "420"
                user_id_str = filename.replace("insightface_embeddings_", "").replace(".pkl", "")
                existing_pkl_ids.add(user_id_str)
            except ValueError:
                print(f"Could not parse user ID from filename: {filename}")
    print(f"Found {len(existing_pkl_ids)} existing .pkl files.")

    # 2. Load existing user_data.json if it exists
    if os.path.exists(user_data_path):
        with open(user_data_path, 'r') as f:
            user_data = json.load(f)
    else:
        user_data = {}

    # 3. Check database for existing users
    RegisteredUser.create_table_if_not_exists()
    print("Fetching existing users from the database...")
    existing_users_list = RegisteredUser.get_all()
    existing_pids = set()
    if existing_users_list:
        existing_pids = {user['pid'] for user in existing_users_list}
    print(f"Found {len(existing_pids)} existing users in the database.")

    # --- Generate new users ---

    start_id = 10000
    users_added = 0
    i = 0
    
    print(f"\nAttempting to generate {num_users_to_generate} new users...")
    while users_added < num_users_to_generate:
        user_id_int = start_id + i
        user_id = str(user_id_int)
        i += 1

        # THE CRITICAL CHECK: Skip if ID exists in DB, JSON, or as a .pkl file
        if user_id in existing_pkl_ids or user_id_int in existing_pids or user_id in user_data:
            continue

        name = f"Dummy User {user_id}"
        email = f"dummy.user.{user_id}@example.com"
        department = "Technology"
        status = "hired"
        now = datetime.now()
        last_updated = now.isoformat()

        # Add user data
        user_data[user_id] = {
            "name": name, "email": email, "department": department,
            "status": status, "last_updated": last_updated
        }

        # Generate and save individual dummy embedding in the correct format
        embedding_array = np.random.rand(512).astype(np.float32)
        embedding_data = {
            'user_id': user_id, 'embeddings': [embedding_array],
            'created_at': now.strftime("%Y-%m-%d %H:%M:%S"), 'image_count': 1
        }
        embedding_path = os.path.join(embeddings_dir, f"insightface_embeddings_{user_id}.pkl")
        with open(embedding_path, 'wb') as f:
            pickle.dump(embedding_data, f)

        # Insert into database
        if RegisteredUser.add_or_update(user_id_int):
            users_added += 1
            print(f"Added new user {user_id} ({users_added}/{num_users_to_generate})")
        else:
            print(f"⚠️ Failed to add user {user_id} to the database. Rolling back.")
            del user_data[user_id]
            if os.path.exists(embedding_path):
                os.remove(embedding_path)
        
        # Safety break to prevent infinite loops
        if i > num_users_to_generate * 3:
             print("⚠️ Safety break: Exiting loop after checking a large number of existing IDs.")
             break

    # --- Save updated data ---

    with open(user_data_path, 'w') as f:
        json.dump(user_data, f, indent=2)
    print(f"\n✅ Successfully updated {user_data_path}. Total users: {len(user_data)}.")

    print("\nScript finished.")
    print(f"Successfully added {users_added} new users and created their individual .pkl files.")


if __name__ == "__main__":
    generate_dummy_users()