MoneyMap/MoneyMap/Services/AIVisionClient.cs

using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;

namespace MoneyMap.Services
{
    /// <summary>
    /// Result of an AI vision API call.
    /// </summary>
    public class VisionApiResult
    {
        public bool IsSuccess { get; init; }
        public string? Content { get; init; }
        public string? ErrorMessage { get; init; }

        public static VisionApiResult Success(string content) =>
            new() { IsSuccess = true, Content = content };

        public static VisionApiResult Failure(string error) =>
            new() { IsSuccess = false, ErrorMessage = error };
    }

    /// <summary>
    /// Client for making vision API calls to AI providers.
    /// </summary>
    public interface IAIVisionClient
    {
        Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model);
    }

    /// <summary>
    /// OpenAI Vision API client.
    /// </summary>
    public class OpenAIVisionClient : IAIVisionClient
    {
        private readonly HttpClient _httpClient;
        private readonly IConfiguration _configuration;
        private readonly ILogger<OpenAIVisionClient> _logger;

        public OpenAIVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OpenAIVisionClient> logger)
        {
            _httpClient = httpClient;
            _configuration = configuration;
            _logger = logger;
        }

        public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
        {
            var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY")
                ?? _configuration["OpenAI:ApiKey"];

            if (string.IsNullOrWhiteSpace(apiKey))
                return VisionApiResult.Failure("OpenAI API key not configured. Set OPENAI_API_KEY environment variable or OpenAI:ApiKey in appsettings.json");

            var requestBody = new
            {
                model = model,
                messages = new[]
                {
                    new
                    {
                        role = "user",
                        content = new object[]
                        {
                            new { type = "text", text = prompt },
                            new
                            {
                                type = "image_url",
                                image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
                            }
                        }
                    }
                },
                max_tokens = 2000,
                temperature = 0.1
            };

            try
            {
                _httpClient.DefaultRequestHeaders.Clear();
                _httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}");

                var json = JsonSerializer.Serialize(requestBody);
                var content = new StringContent(json, Encoding.UTF8, "application/json");

                var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content);

                if (!response.IsSuccessStatusCode)
                {
                    var errorContent = await response.Content.ReadAsStringAsync();
                    _logger.LogError("OpenAI API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
                    return VisionApiResult.Failure($"OpenAI API error ({response.StatusCode}): {errorContent}");
                }

                var responseJson = await response.Content.ReadAsStringAsync();
                var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);

                var messageContent = responseObj
                    .GetProperty("choices")[0]
                    .GetProperty("message")
                    .GetProperty("content")
                    .GetString();

                return VisionApiResult.Success(CleanJsonResponse(messageContent));
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "OpenAI Vision API call failed: {Message}", ex.Message);
                return VisionApiResult.Failure($"OpenAI API error: {ex.Message}");
            }
        }

        private static string CleanJsonResponse(string? content)
        {
            var trimmed = content?.Trim() ?? "";
            if (trimmed.StartsWith("```json"))
            {
                trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
            }
            return trimmed;
        }
    }

    /// <summary>
    /// Anthropic Claude Vision API client.
    /// </summary>
    public class ClaudeVisionClient : IAIVisionClient
    {
        private readonly HttpClient _httpClient;
        private readonly IConfiguration _configuration;
        private readonly ILogger<ClaudeVisionClient> _logger;

        public ClaudeVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<ClaudeVisionClient> logger)
        {
            _httpClient = httpClient;
            _configuration = configuration;
            _logger = logger;
        }

        public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
        {
            var apiKey = Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY")
                ?? _configuration["Anthropic:ApiKey"];

            if (string.IsNullOrWhiteSpace(apiKey))
                return VisionApiResult.Failure("Anthropic API key not configured. Set ANTHROPIC_API_KEY environment variable or Anthropic:ApiKey in appsettings.json");

            var requestBody = new
            {
                model = model,
                max_tokens = 2000,
                messages = new[]
                {
                    new
                    {
                        role = "user",
                        content = new object[]
                        {
                            new
                            {
                                type = "image",
                                source = new
                                {
                                    type = "base64",
                                    media_type = mediaType,
                                    data = base64Image
                                }
                            },
                            new { type = "text", text = prompt }
                        }
                    }
                }
            };

            try
            {
                _httpClient.DefaultRequestHeaders.Clear();
                _httpClient.DefaultRequestHeaders.Add("x-api-key", apiKey);
                _httpClient.DefaultRequestHeaders.Add("anthropic-version", "2023-06-01");

                var json = JsonSerializer.Serialize(requestBody);
                var content = new StringContent(json, Encoding.UTF8, "application/json");

                var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content);

                if (!response.IsSuccessStatusCode)
                {
                    var errorContent = await response.Content.ReadAsStringAsync();
                    _logger.LogError("Anthropic API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
                    return VisionApiResult.Failure($"Anthropic API error ({response.StatusCode}): {errorContent}");
                }

                var responseJson = await response.Content.ReadAsStringAsync();
                var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);

                var messageContent = responseObj
                    .GetProperty("content")[0]
                    .GetProperty("text")
                    .GetString();

                return VisionApiResult.Success(CleanJsonResponse(messageContent));
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "Claude Vision API call failed: {Message}", ex.Message);
                return VisionApiResult.Failure($"Anthropic API error: {ex.Message}");
            }
        }

        private static string CleanJsonResponse(string? content)
        {
            var trimmed = content?.Trim() ?? "";
            if (trimmed.StartsWith("```json"))
            {
                trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
            }
            return trimmed;
        }
    }

    /// <summary>
    /// llama.cpp server client using OpenAI-compatible vision API for local LLM inference.
    /// </summary>
    public class LlamaCppVisionClient : IAIVisionClient
    {
        private readonly HttpClient _httpClient;
        private readonly IConfiguration _configuration;
        private readonly ILogger<LlamaCppVisionClient> _logger;

        public LlamaCppVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<LlamaCppVisionClient> logger)
        {
            _httpClient = httpClient;
            _httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
            _configuration = configuration;
            _logger = logger;
        }

        /// <summary>
        /// Get available models from the llama.cpp server.
        /// </summary>
        public async Task<List<LlamaCppModel>> GetAvailableModelsAsync()
        {
            var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434";

            try
            {
                var response = await _httpClient.GetAsync($"{baseUrl.TrimEnd('/')}/v1/models");

                if (!response.IsSuccessStatusCode)
                {
                    _logger.LogWarning("Failed to fetch models: {StatusCode}", response.StatusCode);
                    return new List<LlamaCppModel>();
                }

                var json = await response.Content.ReadAsStringAsync();
                var modelsResponse = JsonSerializer.Deserialize<LlamaCppModelsResponse>(json);

                return modelsResponse?.Data?
                    .Where(m => !m.Id.StartsWith("mmproj-")) // Filter out multimodal projectors
                    .Select(m => new LlamaCppModel
                    {
                        Id = m.Id,
                        IsLoaded = m.Status?.Value == "loaded"
                    })
                    .OrderByDescending(m => m.IsLoaded)
                    .ThenBy(m => m.Id)
                    .ToList() ?? new List<LlamaCppModel>();
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "Error fetching models from llama.cpp");
                return new List<LlamaCppModel>();
            }
        }

        /// <summary>
        /// Send a text-only prompt to the LLM (no image).
        /// </summary>
        public async Task<VisionApiResult> SendTextPromptAsync(string prompt, string? model = null)
        {
            var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434";
            var llamaModel = model ?? "GLM-4.6V-UD-Q4_K_XL-00001-of-00002";
            if (llamaModel.StartsWith("llamacpp:"))
                llamaModel = llamaModel[9..];

            _logger.LogInformation("LlamaCpp: Sending text prompt to {BaseUrl} with model {Model}", baseUrl, llamaModel);

            var requestBody = new
            {
                model = llamaModel,
                messages = new[]
                {
                    new
                    {
                        role = "user",
                        content = prompt
                    }
                },
                max_tokens = 1024,
                temperature = 0.1
            };

            try
            {
                var json = JsonSerializer.Serialize(requestBody);
                var content = new StringContent(json, Encoding.UTF8, "application/json");

                var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);

                if (!response.IsSuccessStatusCode)
                {
                    var errorContent = await response.Content.ReadAsStringAsync();
                    _logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
                    return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
                }

                var responseJson = await response.Content.ReadAsStringAsync();
                var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);

                var messageContent = responseObj
                    .GetProperty("choices")[0]
                    .GetProperty("message")
                    .GetProperty("content")
                    .GetString();

                _logger.LogInformation("LlamaCpp: Text prompt completed successfully");
                return VisionApiResult.Success(CleanJsonResponse(messageContent));
            }
            catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
            {
                _logger.LogError("llama.cpp request timed out");
                return VisionApiResult.Failure("llama.cpp request timed out.");
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "llama.cpp API call failed: {Message}", ex.Message);
                return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
            }
        }

        public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
        {
            var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434";

            // Strip "llamacpp:" prefix if present
            var llamaModel = model.StartsWith("llamacpp:") ? model[9..] : model;

            _logger.LogInformation("LlamaCpp: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
                baseUrl, llamaModel, base64Image.Length);

            var requestBody = new
            {
                model = llamaModel,
                messages = new[]
                {
                    new
                    {
                        role = "user",
                        content = new object[]
                        {
                            new
                            {
                                type = "image_url",
                                image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
                            },
                            new { type = "text", text = prompt }
                        }
                    }
                },
                max_tokens = 4096,
                temperature = 0.1
            };

            try
            {
                var json = JsonSerializer.Serialize(requestBody);
                _logger.LogDebug("LlamaCpp: Request payload size: {Size} bytes", json.Length);
                var content = new StringContent(json, Encoding.UTF8, "application/json");

                var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);

                if (!response.IsSuccessStatusCode)
                {
                    var errorContent = await response.Content.ReadAsStringAsync();
                    _logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
                    return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
                }

                var responseJson = await response.Content.ReadAsStringAsync();
                _logger.LogDebug("LlamaCpp: Response received, size: {Size} bytes", responseJson.Length);
                var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);

                var messageContent = responseObj
                    .GetProperty("choices")[0]
                    .GetProperty("message")
                    .GetProperty("content")
                    .GetString();

                _logger.LogInformation("LlamaCpp: Successfully parsed response");
                return VisionApiResult.Success(CleanJsonResponse(messageContent));
            }
            catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
            {
                _logger.LogError("llama.cpp request timed out after 5 minutes");
                return VisionApiResult.Failure("llama.cpp request timed out. The model may be too slow or not loaded.");
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "llama.cpp Vision API call failed: {Message}", ex.Message);
                return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
            }
        }

        private static string CleanJsonResponse(string? content)
        {
            var trimmed = content?.Trim() ?? "";
            if (trimmed.StartsWith("```json"))
            {
                trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
            }
            return trimmed;
        }
    }

    /// <summary>
    /// Ollama Vision API client for local LLM inference.
    /// </summary>
    public class OllamaVisionClient : IAIVisionClient
    {
        private readonly HttpClient _httpClient;
        private readonly IConfiguration _configuration;
        private readonly ILogger<OllamaVisionClient> _logger;

        public OllamaVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OllamaVisionClient> logger)
        {
            _httpClient = httpClient;
            _httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
            _configuration = configuration;
            _logger = logger;
        }

        public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
        {
            var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434";

            // Strip "ollama:" prefix if present
            var ollamaModel = model.StartsWith("ollama:") ? model[7..] : model;

            _logger.LogInformation("Ollama: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
                baseUrl, ollamaModel, base64Image.Length);

            var requestBody = new
            {
                model = ollamaModel,
                prompt = prompt,
                images = new[] { base64Image },
                stream = false,
                options = new
                {
                    temperature = 0.1
                }
            };

            try
            {
                var json = JsonSerializer.Serialize(requestBody);
                _logger.LogDebug("Ollama: Request payload size: {Size} bytes", json.Length);
                var content = new StringContent(json, Encoding.UTF8, "application/json");

                var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/api/generate", content);

                if (!response.IsSuccessStatusCode)
                {
                    var errorContent = await response.Content.ReadAsStringAsync();
                    _logger.LogError("Ollama API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
                    return VisionApiResult.Failure($"Ollama API error ({response.StatusCode}): {errorContent}");
                }

                var responseJson = await response.Content.ReadAsStringAsync();
                _logger.LogDebug("Ollama: Response received, size: {Size} bytes", responseJson.Length);
                var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);

                var messageContent = responseObj.GetProperty("response").GetString();

                _logger.LogInformation("Ollama: Successfully parsed response");
                return VisionApiResult.Success(CleanJsonResponse(messageContent));
            }
            catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
            {
                _logger.LogError("Ollama request timed out after 5 minutes");
                return VisionApiResult.Failure("Ollama request timed out. The model may be too slow or not loaded.");
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "Ollama Vision API call failed: {Message}", ex.Message);
                return VisionApiResult.Failure($"Ollama API error: {ex.Message}");
            }
        }

        private static string CleanJsonResponse(string? content)
        {
            var trimmed = content?.Trim() ?? "";
            if (trimmed.StartsWith("```json"))
            {
                trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
            }
            return trimmed;
        }
    }

    // Models for llama.cpp /v1/models endpoint
    public class LlamaCppModel
    {
        public string Id { get; set; } = "";
        public bool IsLoaded { get; set; }
    }

    public class LlamaCppModelsResponse
    {
        [JsonPropertyName("data")]
        public List<LlamaCppModelData>? Data { get; set; }
    }

    public class LlamaCppModelData
    {
        [JsonPropertyName("id")]
        public string Id { get; set; } = "";

        [JsonPropertyName("status")]
        public LlamaCppModelStatus? Status { get; set; }
    }

    public class LlamaCppModelStatus
    {
        [JsonPropertyName("value")]
        public string? Value { get; set; }
    }
}