import os
import requests
import time
import json
import hashlib
import hmac
import base64
from datetime import datetime, timedelta
from urllib.parse import quote

# Azure configuration
AZURE_REGION = "westus"
AZURE_SUBSCRIPTION_KEY = "AZURE_SUBSCRIPTION_KEY"
AZURE_STORAGE_ACCOUNT = "AZURE_STORAGE_ACCOUNT"  # need install
AZURE_STORAGE_KEY = "AZURE_STORAGE_KEY"  # need install
AZURE_STORAGE_CONTAINER = "AZURE_STORAGE_CONTAINER"

PROGRESS_UPLOADED = 40
PROGRESS_START_PROCESSING = 45

def get_info():
    return {
        "auto-lang": False,
        "languages": [
            "af-ZA", "am-ET", "ar-AE", "ar-BH", "ar-DZ", "ar-EG", "ar-IQ", "ar-JO",
            "ar-KW", "ar-LB", "ar-LY", "ar-MA", "ar-OM", "ar-QA", "ar-SA", "ar-SY", 
            "ar-TN", "ar-YE", "as-IN", "az-AZ", "bg-BG", "bn-BD", "bn-IN", "bs-BA", 
            "ca-ES", "cs-CZ", "cy-GB", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", 
            "en-AU", "en-CA", "en-GB", "en-HK", "en-IE", "en-IN", "en-KE", "en-NG", 
            "en-NZ", "en-PH", "en-SG", "en-TZ", "en-US", "en-ZA", "es-AR", "es-BO", 
            "es-CL", "es-CO", "es-CR", "es-CU", "es-DO", "es-EC", "es-ES", "es-GQ", 
            "es-GT", "es-HN", "es-MX", "es-NI", "es-PA", "es-PE", "es-PR", "es-PY", 
            "es-SV", "es-US", "es-UY", "es-VE", "et-EE", "eu-ES", "fa-IR", "fi-FI", 
            "fil-PH", "fr-BE", "fr-CA", "fr-CH", "fr-FR", "ga-IE", "gl-ES", "gu-IN", 
            "he-IL", "hi-IN", "hr-HR", "hu-HU", "hy-AM", "id-ID", "is-IS", "it-IT", 
            "ja-JP", "jv-ID", "ka-GE", "kk-KZ", "km-KH", "kn-IN", "ko-KR", "lo-LA", 
            "lt-LT", "lv-LV", "mk-MK", "ml-IN", "mn-MN", "mr-IN", "ms-MY", "mt-MT", 
            "my-MM", "nb-NO", "ne-NP", "nl-BE", "nl-NL", "or-IN", "pa-IN", "pl-PL", 
            "ps-AF", "pt-BR", "pt-PT", "ro-RO", "ru-RU", "si-LK", "sk-SK", "sl-SI", 
            "so-SO", "sq-AL", "sr-RS", "su-ID", "sv-SE", "sw-KE", "sw-TZ", "ta-IN", 
            "ta-LK", "ta-MY", "ta-SG", "te-IN", "th-TH", "tr-TR", "uk-UA", "ur-IN", 
            "ur-PK", "uz-UZ", "vi-VN", "zh-CN", "zh-HK", "zh-TW", "zu-ZA"
        ]
    }

def speech_to_text(audio_path, language_code, progress_callback=None):
    def upload_progress_handler(upload_progress, message=""):
        if progress_callback:
            return progress_callback(int(upload_progress * PROGRESS_UPLOADED / 100), message)
        return True
    
    # Check if storage is configured
    if not AZURE_STORAGE_ACCOUNT or not AZURE_STORAGE_KEY:
        raise Exception("Azure Storage Account and Key are required for word-level timestamps. Please set AZURE_STORAGE_ACCOUNT and AZURE_STORAGE_KEY.")
    
    # Upload file to Azure Blob Storage
    content_url, blob_info = upload_to_azure(audio_path, upload_progress_handler)
    if not content_url:
        return None
    
    try:
        # Get authentication token
        auth_token = get_azure_token()
        
        if progress_callback and not progress_callback(PROGRESS_START_PROCESSING, "Starting transcription..."):
            return None
        
        # Create transcription
        transcription_endpoint = f"https://{AZURE_REGION}.api.cognitive.microsoft.com/speechtotext/v3.2/transcriptions"
        
        headers = {
            "Authorization": f"Bearer {auth_token}",
            "Content-Type": "application/json"
        }
        
        payload = {
            "contentUrls": [content_url],
            "locale": language_code,
            "displayName": f"Transcription_{int(time.time())}",
            "properties": {
                "wordLevelTimestampsEnabled": True,
                "punctuationMode": "DictatedAndAutomatic",
                "profanityFilterMode": "None"
            }
        }
        
        response = requests.post(transcription_endpoint, headers=headers, json=payload)
        
        if response.status_code not in (200, 201):
            handle_response_error(response, "Failed to start batch transcription")
        
        transcription_data = response.json()
        transcription_url = transcription_data["self"]
        
        def transcription_progress_handler(server_progress, message=""):
            if progress_callback:
                return progress_callback(
                    PROGRESS_START_PROCESSING + int(server_progress * (1 - PROGRESS_START_PROCESSING/100)), 
                    message
                )
            return True
        
        # Poll for completion
        result_data = poll_transcription(transcription_url, auth_token, transcription_progress_handler)
        if not result_data:
            return None
        
        # Process results
        transcript = process_transcription_results(result_data)
        
        # Cleanup
        delete_transcription(transcription_url, auth_token)
        
        if progress_callback:
            progress_callback(100, "Complete")
        
        return transcript
    
    finally:
        cleanup_blob(blob_info)

def upload_to_azure(file_path, progress_callback=None):
    """Upload to Azure Blob Storage using REST API"""
    def update_progress(percent, message=""):
        if progress_callback and not progress_callback(percent, message):
            return False
        return True
    
    if not update_progress(0, "Preparing upload..."):
        return None, None
    
    try:
        # Generate unique blob name
        timestamp = int(time.time())
        file_extension = os.path.splitext(os.path.basename(file_path))[1]
        blob_name = f"{timestamp}_{os.urandom(8).hex()}{file_extension}"
        
        # Ensure container exists
        ensure_container_exists(AZURE_STORAGE_ACCOUNT, AZURE_STORAGE_KEY, AZURE_STORAGE_CONTAINER)
        
        if not update_progress(5, "Uploading to Azure..."):
            return None, None
        
        # Upload blob
        blob_url = f"https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net/{AZURE_STORAGE_CONTAINER}/{quote(blob_name)}"
        
        with open(file_path, 'rb') as f:
            file_data = f.read()
        
        file_size = len(file_data)
        now = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
        
        headers = {
            'x-ms-date': now,
            'x-ms-version': '2021-08-06',
            'x-ms-blob-type': 'BlockBlob',
            'Content-Length': str(file_size),
            'Content-Type': 'application/octet-stream'
        }
        
        auth_header = create_blob_authorization(
            AZURE_STORAGE_ACCOUNT,
            AZURE_STORAGE_KEY,
            'PUT',
            AZURE_STORAGE_CONTAINER,
            blob_name,
            headers
        )
        headers['Authorization'] = auth_header
        
        response = requests.put(blob_url, headers=headers, data=file_data, timeout=300)
        
        if response.status_code not in (200, 201):
            raise Exception(f"Upload failed: {response.status_code} - {response.text}")
        
        if not update_progress(100, "Upload complete"):
            return None, None
        
        # Generate SAS URL
        sas_token = generate_sas_token(
            AZURE_STORAGE_ACCOUNT,
            AZURE_STORAGE_KEY,
            AZURE_STORAGE_CONTAINER,
            blob_name,
            hours=24
        )
        
        sas_url = f"{blob_url}?{sas_token}"
        
        blob_info = {
            "account": AZURE_STORAGE_ACCOUNT,
            "container": AZURE_STORAGE_CONTAINER,
            "blob_name": blob_name,
            "storage_key": AZURE_STORAGE_KEY
        }
        
        return sas_url, blob_info
    
    except Exception as e:
        log_message(f"Upload failed: {str(e)}")
        raise

def ensure_container_exists(account_name, account_key, container_name):
    """Create container if it doesn't exist"""
    container_url = f"https://{account_name}.blob.core.windows.net/{container_name}?restype=container"
    now = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
    
    headers = {
        'x-ms-date': now,
        'x-ms-version': '2021-08-06'
    }
    
    auth_header = create_container_authorization(account_name, account_key, 'HEAD', container_name, headers)
    headers['Authorization'] = auth_header
    
    response = requests.head(container_url, headers=headers)
    
    if response.status_code == 404:
        headers['x-ms-date'] = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
        auth_header = create_container_authorization(account_name, account_key, 'PUT', container_name, headers)
        headers['Authorization'] = auth_header
        response = requests.put(container_url, headers=headers)
        if response.status_code not in (201, 409):
            raise Exception(f"Failed to create container: {response.status_code} - {response.text}")

def create_blob_authorization(account_name, account_key, method, container, blob_name, headers):
    """Create authorization header for blob operations"""
    string_to_sign = (
        f"{method}\n\n\n{headers.get('Content-Length', '')}\n\n{headers.get('Content-Type', '')}\n\n\n\n\n\n\n"
        f"x-ms-blob-type:{headers.get('x-ms-blob-type', '')}\n"
        f"x-ms-date:{headers.get('x-ms-date', '')}\n"
        f"x-ms-version:{headers.get('x-ms-version', '')}\n"
        f"/{account_name}/{container}/{blob_name}"
    )
    
    signature = base64.b64encode(
        hmac.new(base64.b64decode(account_key), string_to_sign.encode('utf-8'), hashlib.sha256).digest()
    ).decode('utf-8')
    
    return f"SharedKey {account_name}:{signature}"

def create_container_authorization(account_name, account_key, method, container, headers):
    """Create authorization header for container operations"""
    string_to_sign = (
        f"{method}\n\n\n\n\n\n\n\n\n\n\n\n"
        f"x-ms-date:{headers.get('x-ms-date', '')}\n"
        f"x-ms-version:{headers.get('x-ms-version', '')}\n"
        f"/{account_name}/{container}\nrestype:container"
    )
    
    signature = base64.b64encode(
        hmac.new(base64.b64decode(account_key), string_to_sign.encode('utf-8'), hashlib.sha256).digest()
    ).decode('utf-8')
    
    return f"SharedKey {account_name}:{signature}"

def generate_sas_token(account_name, account_key, container, blob_name, hours=24):
    """Generate SAS token"""
    start_time = datetime.utcnow()
    expiry_time = start_time + timedelta(hours=hours)
    
    start = start_time.strftime('%Y-%m-%dT%H:%M:%SZ')
    expiry = expiry_time.strftime('%Y-%m-%dT%H:%M:%SZ')
    
    signed_permissions = 'r'
    signed_version = '2021-08-06'
    
    string_to_sign = (
        f"{signed_permissions}\n{start}\n{expiry}\n/{account_name}/{container}/{blob_name}\n\n\n\n{signed_version}\n\n\n\n\n\n"
    )
    
    signature = base64.b64encode(
        hmac.new(base64.b64decode(account_key), string_to_sign.encode('utf-8'), hashlib.sha256).digest()
    ).decode('utf-8')
    
    return f"sv={signed_version}&st={quote(start)}&se={quote(expiry)}&sr=b&sp={signed_permissions}&sig={quote(signature)}"

def cleanup_blob(blob_info):
    """Delete blob"""
    if not blob_info:
        return
    try:
        blob_url = f"https://{blob_info['account']}.blob.core.windows.net/{blob_info['container']}/{quote(blob_info['blob_name'])}"
        now = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT')
        headers = {'x-ms-date': now, 'x-ms-version': '2021-08-06'}
        auth_header = create_blob_authorization(blob_info['account'], blob_info['storage_key'], 'DELETE', blob_info['container'], blob_info['blob_name'], headers)
        headers['Authorization'] = auth_header
        requests.delete(blob_url, headers=headers)
    except Exception:
        pass

def poll_transcription(transcription_url, auth_token, progress_callback=None):
    """Poll transcription status"""
    headers = {"Authorization": f"Bearer {auth_token}"}
    poll_interval = 5
    progress = 0
    
    while True:
        response = requests.get(transcription_url, headers=headers)
        if response.status_code == 429:
            poll_interval = min(2 * poll_interval, 60)
            time.sleep(poll_interval)
            continue
        elif response.status_code != 200:
            handle_response_error(response, "Failed to get transcription status")
        
        data = response.json()
        status = data.get("status")
        
        if status == "Succeeded":
            files_url = data["links"]["files"]
            return get_transcription_results(files_url, auth_token)
        elif status == "Failed":
            error_msg = data.get("properties", {}).get("error", {}).get("message", "Transcription failed")
            raise Exception(error_msg)
        elif status == "Running":
            progress = min(progress + 5, 95)
        
        if progress_callback:
            if not progress_callback(progress, f"Status: {status}"):
                delete_transcription(transcription_url, auth_token)
                return None
        
        time.sleep(poll_interval)

def get_transcription_results(files_url, auth_token):
    """Get transcription results"""
    headers = {"Authorization": f"Bearer {auth_token}"}
    response = requests.get(files_url, headers=headers)
    if response.status_code != 200:
        handle_response_error(response, "Failed to get transcription files")
    
    files = response.json()["values"]
    result_file = next((f for f in files if f["kind"] == "Transcription"), None)
    
    if not result_file:
        raise Exception("Transcription result file not found")
    
    content_url = result_file["links"]["contentUrl"]
    response = requests.get(content_url)
    if response.status_code != 200:
        handle_response_error(response, "Failed to download results")
    
    return response.json()

def process_transcription_results(result_data):
    """Process results"""
    transcript = {"duration": 0, "segments": []}
    
    combined_phrases = result_data.get("combinedRecognizedPhrases", [])
    if combined_phrases:
        last_phrase = combined_phrases[-1]
        duration_ticks = last_phrase.get("offsetInTicks", 0) + last_phrase.get("durationInTicks", 0)
        transcript["duration"] = ticks_to_ms(duration_ticks)
    
    recognized_phrases = result_data.get("recognizedPhrases", [])
    
    for phrase in recognized_phrases:
        best = phrase.get("nBest", [{}])[0]
        segment = {
            "start": ticks_to_ms(phrase.get("offsetInTicks", 0)),
            "end": ticks_to_ms(phrase.get("offsetInTicks", 0) + phrase.get("durationInTicks", 0)),
            "text": best.get("display", ""),
            "words": []
        }
        
        words = best.get("words", [])
        for word_data in words:
            word = {
                "text": word_data.get("word", ""),
                "start": ticks_to_ms(word_data.get("offsetInTicks", 0)),
                "end": ticks_to_ms(word_data.get("offsetInTicks", 0) + word_data.get("durationInTicks", 0))
            }
            segment["words"].append(word)
        
        transcript["segments"].append(segment)
    
    return transcript

def delete_transcription(transcription_url, auth_token):
    """Delete transcription"""
    try:
        headers = {"Authorization": f"Bearer {auth_token}"}
        requests.delete(transcription_url, headers=headers)
    except Exception:
        pass

def get_azure_token():
    """Get token"""
    endpoint = f"https://{AZURE_REGION}.api.cognitive.microsoft.com/sts/v1.0/issuetoken"
    headers = {"Ocp-Apim-Subscription-Key": AZURE_SUBSCRIPTION_KEY}
    response = requests.post(endpoint, headers=headers)
    if response.status_code != 200:
        handle_response_error(response, "Error fetching token")
    return response.text

def ticks_to_ms(ticks):
    """Convert ticks to ms"""
    return int(ticks / 10000)

def handle_response_error(response, log_msg):
    if log_msg:
        log_message(log_msg)
    error_msg = ""
    try:
        response_json = response.json()
        error_msg = response_json.get("error", {}).get("message", "")
        if not error_msg:
            error_msg = response_json.get("message", "")
    except Exception:
        pass
    if error_msg:
        log_message(f"Error {response.status_code} - {error_msg}")
        raise Exception(error_msg)
    else:
        raise Exception(f"Error {response.status_code} - {response.text}")

def log_message(message):
    log = getattr(globals().get('app', None), 'Log', print)
    log(message)

def raise_error(message):
    raise Exception(message)