using MoneyMap.Services.AITools; using System.Text; using System.Text.Json; using System.Text.Json.Serialization; namespace MoneyMap.Services { /// /// Result of an AI vision API call. /// public class VisionApiResult { public bool IsSuccess { get; init; } public string? Content { get; init; } public string? ErrorMessage { get; init; } public static VisionApiResult Success(string content) => new() { IsSuccess = true, Content = content }; public static VisionApiResult Failure(string error) => new() { IsSuccess = false, ErrorMessage = error }; } /// /// Client for making vision API calls to AI providers. /// public interface IAIVisionClient { Task AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model); } /// /// Extended interface for vision clients that support tool use / function calling. /// public interface IAIToolAwareVisionClient : IAIVisionClient { bool SupportsToolUse { get; } Task AnalyzeImageWithToolsAsync( string base64Image, string mediaType, string prompt, string model, List tools, Func> toolExecutor, int maxToolRounds = 5); } /// /// Shared helper for the OpenAI-compatible tool-use wire format. /// Used by both OpenAIVisionClient and LlamaCppVisionClient since they share /v1/chat/completions. /// public static class OpenAIToolUseHelper { /// /// Convert AIToolDefinitions to the OpenAI tools array format. /// public static List BuildToolsArray(List tools) { return tools.Select(t => (object)new { type = "function", function = new { name = t.Name, description = t.Description, parameters = new { type = "object", properties = t.Parameters.ToDictionary( p => p.Name, p => (object)new { type = p.Type, description = p.Description } ), required = t.Parameters.Where(p => p.Required).Select(p => p.Name).ToArray() } } }).ToList(); } /// /// Execute the tool-use loop for OpenAI-compatible /v1/chat/completions endpoints. /// public static async Task ExecuteWithToolsAsync( HttpClient httpClient, string apiUrl, Action configureHeaders, string model, List initialMessages, List toolsArray, Func> toolExecutor, int maxToolRounds, int maxTokens, ILogger logger) { // Build mutable message list var messages = new List(initialMessages); for (int round = 0; round <= maxToolRounds; round++) { var requestBody = new Dictionary { ["model"] = model, ["messages"] = messages, ["max_tokens"] = maxTokens, ["temperature"] = 0.1 }; // Only include tools if we haven't exhausted rounds if (round < maxToolRounds && toolsArray.Count > 0) { requestBody["tools"] = toolsArray; requestBody["tool_choice"] = "auto"; } configureHeaders(httpClient); var json = JsonSerializer.Serialize(requestBody); var content = new StringContent(json, Encoding.UTF8, "application/json"); var response = await httpClient.PostAsync(apiUrl, content); if (!response.IsSuccessStatusCode) { var errorContent = await response.Content.ReadAsStringAsync(); logger.LogError("API error ({StatusCode}): {Error}", response.StatusCode, errorContent); return VisionApiResult.Failure($"API error ({response.StatusCode}): {errorContent}"); } var responseJson = await response.Content.ReadAsStringAsync(); var responseObj = JsonSerializer.Deserialize(responseJson); var choice = responseObj.GetProperty("choices")[0]; var message = choice.GetProperty("message"); var finishReason = choice.GetProperty("finish_reason").GetString(); // Check for tool calls var hasToolCalls = message.TryGetProperty("tool_calls", out var toolCallsElement) && toolCallsElement.ValueKind == JsonValueKind.Array && toolCallsElement.GetArrayLength() > 0; if (hasToolCalls || finishReason == "tool_calls") { if (!hasToolCalls) { // finish_reason says tool_calls but no tool_calls array - treat as final response var fallbackContent = message.TryGetProperty("content", out var fc) ? fc.GetString() : null; return VisionApiResult.Success(CleanJsonResponse(fallbackContent)); } logger.LogInformation("Tool-use round {Round}: model requested {Count} tool calls", round + 1, toolCallsElement.GetArrayLength()); // Add the assistant message (with tool_calls) to conversation messages.Add(JsonSerializer.Deserialize(message.GetRawText())!); // Execute each tool call and add results foreach (var tc in toolCallsElement.EnumerateArray()) { var toolCall = new AIToolCall { Id = tc.GetProperty("id").GetString() ?? "", Name = tc.GetProperty("function").GetProperty("name").GetString() ?? "", Arguments = ParseArguments(tc.GetProperty("function").GetProperty("arguments").GetString()) }; logger.LogInformation("Executing tool: {ToolName}", toolCall.Name); var result = await toolExecutor(toolCall); messages.Add(new { role = "tool", tool_call_id = toolCall.Id, content = result.Content }); } continue; // Send another request with tool results } // No tool calls - extract final content var messageContent = message.TryGetProperty("content", out var contentElement) ? contentElement.GetString() : null; return VisionApiResult.Success(CleanJsonResponse(messageContent)); } return VisionApiResult.Failure("Exceeded maximum tool-use rounds without getting a final response."); } private static Dictionary ParseArguments(string? argsJson) { if (string.IsNullOrWhiteSpace(argsJson)) return new(); try { var element = JsonSerializer.Deserialize(argsJson); var dict = new Dictionary(); foreach (var prop in element.EnumerateObject()) { dict[prop.Name] = prop.Value.ValueKind switch { JsonValueKind.String => prop.Value.GetString(), JsonValueKind.Number => prop.Value.GetRawText(), JsonValueKind.True => "true", JsonValueKind.False => "false", JsonValueKind.Null => null, _ => prop.Value.GetRawText() }; } return dict; } catch { return new(); } } public static string CleanJsonResponse(string? content) { var trimmed = content?.Trim() ?? ""; // Strip ... blocks from reasoning models (e.g. Qwen3-VL-Thinking) trimmed = System.Text.RegularExpressions.Regex.Replace( trimmed, @"[\s\S]*?", "", System.Text.RegularExpressions.RegexOptions.IgnoreCase).Trim(); // Strip markdown code fences if (trimmed.StartsWith("```json")) { trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim(); } else if (trimmed.StartsWith("```")) { trimmed = trimmed.Replace("```", "").Trim(); } // If the response doesn't start with '{', try to extract the JSON object. // This handles HTML error pages, XML-wrapped responses, or other non-JSON wrapping. if (!trimmed.StartsWith("{")) { var firstBrace = trimmed.IndexOf('{'); var lastBrace = trimmed.LastIndexOf('}'); if (firstBrace >= 0 && lastBrace > firstBrace) { trimmed = trimmed[firstBrace..(lastBrace + 1)]; } } return trimmed; } } /// /// OpenAI Vision API client with tool-use support. /// public class OpenAIVisionClient : IAIToolAwareVisionClient { private readonly HttpClient _httpClient; private readonly IConfiguration _configuration; private readonly ILogger _logger; public OpenAIVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger logger) { _httpClient = httpClient; _configuration = configuration; _logger = logger; } public bool SupportsToolUse => true; public async Task AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model) { var apiKey = GetApiKey(); if (apiKey == null) return VisionApiResult.Failure("OpenAI API key not configured. Set OPENAI_API_KEY environment variable or OpenAI:ApiKey in appsettings.json"); var requestBody = new { model = model, messages = new[] { new { role = "user", content = new object[] { new { type = "text", text = prompt }, new { type = "image_url", image_url = new { url = $"data:{mediaType};base64,{base64Image}" } } } } }, max_tokens = 2000, temperature = 0.1 }; try { _httpClient.DefaultRequestHeaders.Clear(); _httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}"); var json = JsonSerializer.Serialize(requestBody); var content = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content); if (!response.IsSuccessStatusCode) { var errorContent = await response.Content.ReadAsStringAsync(); _logger.LogError("OpenAI API error ({StatusCode}): {Error}", response.StatusCode, errorContent); return VisionApiResult.Failure($"OpenAI API error ({response.StatusCode}): {errorContent}"); } var responseJson = await response.Content.ReadAsStringAsync(); var responseObj = JsonSerializer.Deserialize(responseJson); var messageContent = responseObj .GetProperty("choices")[0] .GetProperty("message") .GetProperty("content") .GetString(); return VisionApiResult.Success(OpenAIToolUseHelper.CleanJsonResponse(messageContent)); } catch (Exception ex) { _logger.LogError(ex, "OpenAI Vision API call failed: {Message}", ex.Message); return VisionApiResult.Failure($"OpenAI API error: {ex.Message}"); } } public async Task AnalyzeImageWithToolsAsync( string base64Image, string mediaType, string prompt, string model, List tools, Func> toolExecutor, int maxToolRounds = 5) { var apiKey = GetApiKey(); if (apiKey == null) return VisionApiResult.Failure("OpenAI API key not configured."); var initialMessages = new List { new { role = "user", content = new object[] { new { type = "text", text = prompt }, new { type = "image_url", image_url = new { url = $"data:{mediaType};base64,{base64Image}" } } } } }; try { return await OpenAIToolUseHelper.ExecuteWithToolsAsync( _httpClient, "https://api.openai.com/v1/chat/completions", client => { client.DefaultRequestHeaders.Clear(); client.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}"); }, model, initialMessages, OpenAIToolUseHelper.BuildToolsArray(tools), toolExecutor, maxToolRounds, maxTokens: 4096, _logger); } catch (Exception ex) { _logger.LogError(ex, "OpenAI tool-use call failed: {Message}", ex.Message); return VisionApiResult.Failure($"OpenAI API error: {ex.Message}"); } } private string? GetApiKey() => Environment.GetEnvironmentVariable("OPENAI_API_KEY") ?? _configuration["OpenAI:ApiKey"]; } /// /// Anthropic Claude Vision API client with tool-use support. /// public class ClaudeVisionClient : IAIToolAwareVisionClient { private readonly HttpClient _httpClient; private readonly IConfiguration _configuration; private readonly ILogger _logger; public ClaudeVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger logger) { _httpClient = httpClient; _configuration = configuration; _logger = logger; } public bool SupportsToolUse => true; public async Task AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model) { var apiKey = GetApiKey(); if (apiKey == null) return VisionApiResult.Failure("Anthropic API key not configured. Set ANTHROPIC_API_KEY environment variable or Anthropic:ApiKey in appsettings.json"); var requestBody = new { model = model, max_tokens = 2000, messages = new[] { new { role = "user", content = new object[] { new { type = "image", source = new { type = "base64", media_type = mediaType, data = base64Image } }, new { type = "text", text = prompt } } } } }; try { ConfigureHeaders(); var json = JsonSerializer.Serialize(requestBody); var content = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content); if (!response.IsSuccessStatusCode) { var errorContent = await response.Content.ReadAsStringAsync(); _logger.LogError("Anthropic API error ({StatusCode}): {Error}", response.StatusCode, errorContent); return VisionApiResult.Failure($"Anthropic API error ({response.StatusCode}): {errorContent}"); } var responseJson = await response.Content.ReadAsStringAsync(); var responseObj = JsonSerializer.Deserialize(responseJson); var messageContent = responseObj .GetProperty("content")[0] .GetProperty("text") .GetString(); return VisionApiResult.Success(OpenAIToolUseHelper.CleanJsonResponse(messageContent)); } catch (Exception ex) { _logger.LogError(ex, "Claude Vision API call failed: {Message}", ex.Message); return VisionApiResult.Failure($"Anthropic API error: {ex.Message}"); } } public async Task AnalyzeImageWithToolsAsync( string base64Image, string mediaType, string prompt, string model, List tools, Func> toolExecutor, int maxToolRounds = 5) { var apiKey = GetApiKey(); if (apiKey == null) return VisionApiResult.Failure("Anthropic API key not configured."); // Build Anthropic-format tools array var anthropicTools = tools.Select(t => new { name = t.Name, description = t.Description, input_schema = new { type = "object", properties = t.Parameters.ToDictionary( p => p.Name, p => (object)new { type = p.Type, description = p.Description } ), required = t.Parameters.Where(p => p.Required).Select(p => p.Name).ToArray() } }).ToList(); // Initial message with image var messages = new List { new { role = "user", content = new object[] { new { type = "image", source = new { type = "base64", media_type = mediaType, data = base64Image } }, new { type = "text", text = prompt } } } }; try { for (int round = 0; round <= maxToolRounds; round++) { var requestBody = new Dictionary { ["model"] = model, ["max_tokens"] = 4096, ["messages"] = messages }; if (round < maxToolRounds && anthropicTools.Count > 0) requestBody["tools"] = anthropicTools; ConfigureHeaders(); var json = JsonSerializer.Serialize(requestBody); var content = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content); if (!response.IsSuccessStatusCode) { var errorContent = await response.Content.ReadAsStringAsync(); _logger.LogError("Anthropic API error ({StatusCode}): {Error}", response.StatusCode, errorContent); return VisionApiResult.Failure($"Anthropic API error ({response.StatusCode}): {errorContent}"); } var responseJson = await response.Content.ReadAsStringAsync(); var responseObj = JsonSerializer.Deserialize(responseJson); var stopReason = responseObj.GetProperty("stop_reason").GetString(); var contentBlocks = responseObj.GetProperty("content"); // Check for tool_use blocks var toolUseBlocks = contentBlocks.EnumerateArray() .Where(b => b.GetProperty("type").GetString() == "tool_use") .ToList(); if (stopReason == "tool_use" && toolUseBlocks.Count > 0) { _logger.LogInformation("Claude tool-use round {Round}: {Count} tool calls", round + 1, toolUseBlocks.Count); // Add assistant response to messages (contains tool_use blocks) var assistantContent = JsonSerializer.Deserialize(contentBlocks.GetRawText())!; messages.Add(new { role = "assistant", content = assistantContent }); // Build tool_result blocks var toolResults = new List(); foreach (var block in toolUseBlocks) { var toolCall = new AIToolCall { Id = block.GetProperty("id").GetString() ?? "", Name = block.GetProperty("name").GetString() ?? "", Arguments = ParseAnthropicInput(block.GetProperty("input")) }; _logger.LogInformation("Executing tool: {ToolName}", toolCall.Name); var result = await toolExecutor(toolCall); toolResults.Add(new { type = "tool_result", tool_use_id = toolCall.Id, content = result.Content, is_error = result.IsError }); } messages.Add(new { role = "user", content = toolResults }); continue; } // Extract final text content var textBlock = contentBlocks.EnumerateArray() .FirstOrDefault(b => b.GetProperty("type").GetString() == "text"); var text = textBlock.ValueKind != JsonValueKind.Undefined ? textBlock.GetProperty("text").GetString() : null; return VisionApiResult.Success(OpenAIToolUseHelper.CleanJsonResponse(text)); } return VisionApiResult.Failure("Exceeded maximum tool-use rounds."); } catch (Exception ex) { _logger.LogError(ex, "Claude tool-use call failed: {Message}", ex.Message); return VisionApiResult.Failure($"Anthropic API error: {ex.Message}"); } } private static Dictionary ParseAnthropicInput(JsonElement input) { var dict = new Dictionary(); if (input.ValueKind == JsonValueKind.Object) { foreach (var prop in input.EnumerateObject()) { dict[prop.Name] = prop.Value.ValueKind switch { JsonValueKind.String => prop.Value.GetString(), JsonValueKind.Number => prop.Value.GetRawText(), JsonValueKind.True => "true", JsonValueKind.False => "false", JsonValueKind.Null => null, _ => prop.Value.GetRawText() }; } } return dict; } private void ConfigureHeaders() { var apiKey = GetApiKey()!; _httpClient.DefaultRequestHeaders.Clear(); _httpClient.DefaultRequestHeaders.Add("x-api-key", apiKey); _httpClient.DefaultRequestHeaders.Add("anthropic-version", "2023-06-01"); } private string? GetApiKey() => Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY") ?? _configuration["Anthropic:ApiKey"]; } /// /// llama.cpp server client using OpenAI-compatible vision API with tool-use support. /// public class LlamaCppVisionClient : IAIToolAwareVisionClient { private readonly HttpClient _httpClient; private readonly IConfiguration _configuration; private readonly ILogger _logger; // Model families whose Jinja chat templates support the OpenAI tool role format. private static readonly string[] _toolCapableModelPrefixes = new[] { "qwen3", "qwen2.5", "hermes", "functionary", "mistral" }; private string? _currentModel; public LlamaCppVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger logger) { _httpClient = httpClient; _httpClient.Timeout = TimeSpan.FromMinutes(5); _configuration = configuration; _logger = logger; } /// /// Whether the current model supports OpenAI-style tool/function calling. /// Only certain model families (Qwen3, Hermes, etc.) have chat templates that handle the tool role. /// public bool SupportsToolUse => _currentModel != null && _toolCapableModelPrefixes.Any(p => _currentModel.StartsWith(p, StringComparison.OrdinalIgnoreCase)); /// /// Set the model name so SupportsToolUse can be evaluated per-model. /// Called by AIReceiptParser before the tool-use check. /// public void SetCurrentModel(string model) { _currentModel = model.StartsWith("llamacpp:") ? model[9..] : model; } /// /// Get available models from the llama.cpp server. /// public async Task> GetAvailableModelsAsync() { var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434"; try { var response = await _httpClient.GetAsync($"{baseUrl.TrimEnd('/')}/v1/models"); if (!response.IsSuccessStatusCode) { _logger.LogWarning("Failed to fetch models: {StatusCode}", response.StatusCode); return new List(); } var json = await response.Content.ReadAsStringAsync(); var modelsResponse = JsonSerializer.Deserialize(json); return modelsResponse?.Data? .Where(m => !m.Id.StartsWith("mmproj-")) .Select(m => new LlamaCppModel { Id = m.Id, IsLoaded = m.Status?.Value == "loaded" }) .OrderByDescending(m => m.IsLoaded) .ThenBy(m => m.Id) .ToList() ?? new List(); } catch (Exception ex) { _logger.LogError(ex, "Error fetching models from llama.cpp"); return new List(); } } /// /// Send a text-only prompt to the LLM (no image). /// public async Task SendTextPromptAsync(string prompt, string? model = null) { var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434"; var llamaModel = model ?? _configuration["AI:ReceiptParsingModel"] ?? "Qwen3-8B-Q6_K"; if (llamaModel.StartsWith("llamacpp:")) llamaModel = llamaModel[9..]; _logger.LogInformation("LlamaCpp: Sending text prompt to {BaseUrl} with model {Model}", baseUrl, llamaModel); var requestBody = new { model = llamaModel, messages = new[] { new { role = "user", content = prompt } }, max_tokens = 1024, temperature = 0.1 }; try { var json = JsonSerializer.Serialize(requestBody); var content = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content); if (!response.IsSuccessStatusCode) { var errorContent = await response.Content.ReadAsStringAsync(); _logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent); return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}"); } var responseJson = await response.Content.ReadAsStringAsync(); var responseObj = JsonSerializer.Deserialize(responseJson); var messageContent = responseObj .GetProperty("choices")[0] .GetProperty("message") .GetProperty("content") .GetString(); _logger.LogInformation("LlamaCpp: Text prompt completed successfully"); return VisionApiResult.Success(OpenAIToolUseHelper.CleanJsonResponse(messageContent)); } catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException) { _logger.LogError("llama.cpp request timed out"); return VisionApiResult.Failure("llama.cpp request timed out."); } catch (Exception ex) { _logger.LogError(ex, "llama.cpp API call failed: {Message}", ex.Message); return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}"); } } public async Task AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model) { var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434"; var llamaModel = model.StartsWith("llamacpp:") ? model[9..] : model; _logger.LogInformation("LlamaCpp: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes", baseUrl, llamaModel, base64Image.Length); var requestBody = new { model = llamaModel, messages = new[] { new { role = "user", content = new object[] { new { type = "image_url", image_url = new { url = $"data:{mediaType};base64,{base64Image}" } }, new { type = "text", text = prompt } } } }, max_tokens = 16384, temperature = 0.1 }; try { var json = JsonSerializer.Serialize(requestBody); _logger.LogDebug("LlamaCpp: Request payload size: {Size} bytes", json.Length); var content = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content); if (!response.IsSuccessStatusCode) { var errorContent = await response.Content.ReadAsStringAsync(); _logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent); return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}"); } var responseJson = await response.Content.ReadAsStringAsync(); _logger.LogDebug("LlamaCpp: Response received, size: {Size} bytes", responseJson.Length); var responseObj = JsonSerializer.Deserialize(responseJson); var messageContent = responseObj .GetProperty("choices")[0] .GetProperty("message") .GetProperty("content") .GetString(); _logger.LogInformation("LlamaCpp: Successfully parsed response"); return VisionApiResult.Success(OpenAIToolUseHelper.CleanJsonResponse(messageContent)); } catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException) { _logger.LogError("llama.cpp request timed out after 5 minutes"); return VisionApiResult.Failure("llama.cpp request timed out. The model may be too slow or not loaded."); } catch (Exception ex) { _logger.LogError(ex, "llama.cpp Vision API call failed: {Message}", ex.Message); return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}"); } } public async Task AnalyzeImageWithToolsAsync( string base64Image, string mediaType, string prompt, string model, List tools, Func> toolExecutor, int maxToolRounds = 5) { var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434"; var llamaModel = model.StartsWith("llamacpp:") ? model[9..] : model; _logger.LogInformation("LlamaCpp: Starting tool-use request with model {Model}", llamaModel); var initialMessages = new List { new { role = "user", content = new object[] { new { type = "image_url", image_url = new { url = $"data:{mediaType};base64,{base64Image}" } }, new { type = "text", text = prompt } } } }; try { return await OpenAIToolUseHelper.ExecuteWithToolsAsync( _httpClient, $"{baseUrl.TrimEnd('/')}/v1/chat/completions", _ => { }, // No auth headers needed for local llama.cpp llamaModel, initialMessages, OpenAIToolUseHelper.BuildToolsArray(tools), toolExecutor, maxToolRounds, maxTokens: 16384, _logger); } catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException) { _logger.LogError("llama.cpp tool-use request timed out"); return VisionApiResult.Failure("llama.cpp request timed out."); } catch (Exception ex) { _logger.LogError(ex, "llama.cpp tool-use call failed: {Message}", ex.Message); return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}"); } } } /// /// Ollama Vision API client for local LLM inference. /// Does NOT support tool use (uses /api/generate endpoint). /// Falls back to enriched prompt with pre-fetched context. /// public class OllamaVisionClient : IAIVisionClient { private readonly HttpClient _httpClient; private readonly IConfiguration _configuration; private readonly ILogger _logger; public OllamaVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger logger) { _httpClient = httpClient; _httpClient.Timeout = TimeSpan.FromMinutes(5); _configuration = configuration; _logger = logger; } public async Task AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model) { var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434"; var ollamaModel = model.StartsWith("ollama:") ? model[7..] : model; _logger.LogInformation("Ollama: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes", baseUrl, ollamaModel, base64Image.Length); var requestBody = new { model = ollamaModel, prompt = prompt, images = new[] { base64Image }, stream = false, options = new { temperature = 0.1 } }; try { var json = JsonSerializer.Serialize(requestBody); _logger.LogDebug("Ollama: Request payload size: {Size} bytes", json.Length); var content = new StringContent(json, Encoding.UTF8, "application/json"); var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/api/generate", content); if (!response.IsSuccessStatusCode) { var errorContent = await response.Content.ReadAsStringAsync(); _logger.LogError("Ollama API error ({StatusCode}): {Error}", response.StatusCode, errorContent); return VisionApiResult.Failure($"Ollama API error ({response.StatusCode}): {errorContent}"); } var responseJson = await response.Content.ReadAsStringAsync(); _logger.LogDebug("Ollama: Response received, size: {Size} bytes", responseJson.Length); var responseObj = JsonSerializer.Deserialize(responseJson); var messageContent = responseObj.GetProperty("response").GetString(); _logger.LogInformation("Ollama: Successfully parsed response"); return VisionApiResult.Success(OpenAIToolUseHelper.CleanJsonResponse(messageContent)); } catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException) { _logger.LogError("Ollama request timed out after 5 minutes"); return VisionApiResult.Failure("Ollama request timed out. The model may be too slow or not loaded."); } catch (Exception ex) { _logger.LogError(ex, "Ollama Vision API call failed: {Message}", ex.Message); return VisionApiResult.Failure($"Ollama API error: {ex.Message}"); } } } // Models for llama.cpp /v1/models endpoint public class LlamaCppModel { public string Id { get; set; } = ""; public bool IsLoaded { get; set; } } public class LlamaCppModelsResponse { [JsonPropertyName("data")] public List? Data { get; set; } } public class LlamaCppModelData { [JsonPropertyName("id")] public string Id { get; set; } = ""; [JsonPropertyName("status")] public LlamaCppModelStatus? Status { get; set; } } public class LlamaCppModelStatus { [JsonPropertyName("value")] public string? Value { get; set; } } }