Files
MoneyMap/MoneyMap/Services/AIVisionClient.cs
AJ Isaacs 29d26b4771 Refactor: Consolidate AI endpoint config to AI:ModelsEndpoint
- Simplify model dropdown to single flat list with local models first
- Show loaded/unloaded status with bullet indicators
- Remove separate Ollama:BaseUrl and LlamaCpp:BaseUrl configs
- All AI vision clients now use AI:ModelsEndpoint (default: athena.lan:11434)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-15 23:41:05 -05:00

539 lines
21 KiB
C#

using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace MoneyMap.Services
{
/// <summary>
/// Result of an AI vision API call.
/// </summary>
public class VisionApiResult
{
public bool IsSuccess { get; init; }
public string? Content { get; init; }
public string? ErrorMessage { get; init; }
public static VisionApiResult Success(string content) =>
new() { IsSuccess = true, Content = content };
public static VisionApiResult Failure(string error) =>
new() { IsSuccess = false, ErrorMessage = error };
}
/// <summary>
/// Client for making vision API calls to AI providers.
/// </summary>
public interface IAIVisionClient
{
Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model);
}
/// <summary>
/// OpenAI Vision API client.
/// </summary>
public class OpenAIVisionClient : IAIVisionClient
{
private readonly HttpClient _httpClient;
private readonly IConfiguration _configuration;
private readonly ILogger<OpenAIVisionClient> _logger;
public OpenAIVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OpenAIVisionClient> logger)
{
_httpClient = httpClient;
_configuration = configuration;
_logger = logger;
}
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
{
var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY")
?? _configuration["OpenAI:ApiKey"];
if (string.IsNullOrWhiteSpace(apiKey))
return VisionApiResult.Failure("OpenAI API key not configured. Set OPENAI_API_KEY environment variable or OpenAI:ApiKey in appsettings.json");
var requestBody = new
{
model = model,
messages = new[]
{
new
{
role = "user",
content = new object[]
{
new { type = "text", text = prompt },
new
{
type = "image_url",
image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
}
}
}
},
max_tokens = 2000,
temperature = 0.1
};
try
{
_httpClient.DefaultRequestHeaders.Clear();
_httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}");
var json = JsonSerializer.Serialize(requestBody);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("OpenAI API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
return VisionApiResult.Failure($"OpenAI API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString();
return VisionApiResult.Success(CleanJsonResponse(messageContent));
}
catch (Exception ex)
{
_logger.LogError(ex, "OpenAI Vision API call failed: {Message}", ex.Message);
return VisionApiResult.Failure($"OpenAI API error: {ex.Message}");
}
}
private static string CleanJsonResponse(string? content)
{
var trimmed = content?.Trim() ?? "";
if (trimmed.StartsWith("```json"))
{
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
}
return trimmed;
}
}
/// <summary>
/// Anthropic Claude Vision API client.
/// </summary>
public class ClaudeVisionClient : IAIVisionClient
{
private readonly HttpClient _httpClient;
private readonly IConfiguration _configuration;
private readonly ILogger<ClaudeVisionClient> _logger;
public ClaudeVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<ClaudeVisionClient> logger)
{
_httpClient = httpClient;
_configuration = configuration;
_logger = logger;
}
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
{
var apiKey = Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY")
?? _configuration["Anthropic:ApiKey"];
if (string.IsNullOrWhiteSpace(apiKey))
return VisionApiResult.Failure("Anthropic API key not configured. Set ANTHROPIC_API_KEY environment variable or Anthropic:ApiKey in appsettings.json");
var requestBody = new
{
model = model,
max_tokens = 2000,
messages = new[]
{
new
{
role = "user",
content = new object[]
{
new
{
type = "image",
source = new
{
type = "base64",
media_type = mediaType,
data = base64Image
}
},
new { type = "text", text = prompt }
}
}
}
};
try
{
_httpClient.DefaultRequestHeaders.Clear();
_httpClient.DefaultRequestHeaders.Add("x-api-key", apiKey);
_httpClient.DefaultRequestHeaders.Add("anthropic-version", "2023-06-01");
var json = JsonSerializer.Serialize(requestBody);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("Anthropic API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
return VisionApiResult.Failure($"Anthropic API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("content")[0]
.GetProperty("text")
.GetString();
return VisionApiResult.Success(CleanJsonResponse(messageContent));
}
catch (Exception ex)
{
_logger.LogError(ex, "Claude Vision API call failed: {Message}", ex.Message);
return VisionApiResult.Failure($"Anthropic API error: {ex.Message}");
}
}
private static string CleanJsonResponse(string? content)
{
var trimmed = content?.Trim() ?? "";
if (trimmed.StartsWith("```json"))
{
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
}
return trimmed;
}
}
/// <summary>
/// llama.cpp server client using OpenAI-compatible vision API for local LLM inference.
/// </summary>
public class LlamaCppVisionClient : IAIVisionClient
{
private readonly HttpClient _httpClient;
private readonly IConfiguration _configuration;
private readonly ILogger<LlamaCppVisionClient> _logger;
public LlamaCppVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<LlamaCppVisionClient> logger)
{
_httpClient = httpClient;
_httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
_configuration = configuration;
_logger = logger;
}
/// <summary>
/// Get available models from the llama.cpp server.
/// </summary>
public async Task<List<LlamaCppModel>> GetAvailableModelsAsync()
{
var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434";
try
{
var response = await _httpClient.GetAsync($"{baseUrl.TrimEnd('/')}/v1/models");
if (!response.IsSuccessStatusCode)
{
_logger.LogWarning("Failed to fetch models: {StatusCode}", response.StatusCode);
return new List<LlamaCppModel>();
}
var json = await response.Content.ReadAsStringAsync();
var modelsResponse = JsonSerializer.Deserialize<LlamaCppModelsResponse>(json);
return modelsResponse?.Data?
.Where(m => !m.Id.StartsWith("mmproj-")) // Filter out multimodal projectors
.Select(m => new LlamaCppModel
{
Id = m.Id,
IsLoaded = m.Status?.Value == "loaded"
})
.OrderByDescending(m => m.IsLoaded)
.ThenBy(m => m.Id)
.ToList() ?? new List<LlamaCppModel>();
}
catch (Exception ex)
{
_logger.LogError(ex, "Error fetching models from llama.cpp");
return new List<LlamaCppModel>();
}
}
/// <summary>
/// Send a text-only prompt to the LLM (no image).
/// </summary>
public async Task<VisionApiResult> SendTextPromptAsync(string prompt, string? model = null)
{
var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434";
var llamaModel = model ?? "GLM-4.6V-UD-Q4_K_XL-00001-of-00002";
if (llamaModel.StartsWith("llamacpp:"))
llamaModel = llamaModel[9..];
_logger.LogInformation("LlamaCpp: Sending text prompt to {BaseUrl} with model {Model}", baseUrl, llamaModel);
var requestBody = new
{
model = llamaModel,
messages = new[]
{
new
{
role = "user",
content = prompt
}
},
max_tokens = 1024,
temperature = 0.1
};
try
{
var json = JsonSerializer.Serialize(requestBody);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString();
_logger.LogInformation("LlamaCpp: Text prompt completed successfully");
return VisionApiResult.Success(CleanJsonResponse(messageContent));
}
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
{
_logger.LogError("llama.cpp request timed out");
return VisionApiResult.Failure("llama.cpp request timed out.");
}
catch (Exception ex)
{
_logger.LogError(ex, "llama.cpp API call failed: {Message}", ex.Message);
return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
}
}
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
{
var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434";
// Strip "llamacpp:" prefix if present
var llamaModel = model.StartsWith("llamacpp:") ? model[9..] : model;
_logger.LogInformation("LlamaCpp: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
baseUrl, llamaModel, base64Image.Length);
var requestBody = new
{
model = llamaModel,
messages = new[]
{
new
{
role = "user",
content = new object[]
{
new
{
type = "image_url",
image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
},
new { type = "text", text = prompt }
}
}
},
max_tokens = 4096,
temperature = 0.1
};
try
{
var json = JsonSerializer.Serialize(requestBody);
_logger.LogDebug("LlamaCpp: Request payload size: {Size} bytes", json.Length);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
_logger.LogDebug("LlamaCpp: Response received, size: {Size} bytes", responseJson.Length);
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj
.GetProperty("choices")[0]
.GetProperty("message")
.GetProperty("content")
.GetString();
_logger.LogInformation("LlamaCpp: Successfully parsed response");
return VisionApiResult.Success(CleanJsonResponse(messageContent));
}
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
{
_logger.LogError("llama.cpp request timed out after 5 minutes");
return VisionApiResult.Failure("llama.cpp request timed out. The model may be too slow or not loaded.");
}
catch (Exception ex)
{
_logger.LogError(ex, "llama.cpp Vision API call failed: {Message}", ex.Message);
return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
}
}
private static string CleanJsonResponse(string? content)
{
var trimmed = content?.Trim() ?? "";
if (trimmed.StartsWith("```json"))
{
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
}
return trimmed;
}
}
/// <summary>
/// Ollama Vision API client for local LLM inference.
/// </summary>
public class OllamaVisionClient : IAIVisionClient
{
private readonly HttpClient _httpClient;
private readonly IConfiguration _configuration;
private readonly ILogger<OllamaVisionClient> _logger;
public OllamaVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OllamaVisionClient> logger)
{
_httpClient = httpClient;
_httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
_configuration = configuration;
_logger = logger;
}
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
{
var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434";
// Strip "ollama:" prefix if present
var ollamaModel = model.StartsWith("ollama:") ? model[7..] : model;
_logger.LogInformation("Ollama: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
baseUrl, ollamaModel, base64Image.Length);
var requestBody = new
{
model = ollamaModel,
prompt = prompt,
images = new[] { base64Image },
stream = false,
options = new
{
temperature = 0.1
}
};
try
{
var json = JsonSerializer.Serialize(requestBody);
_logger.LogDebug("Ollama: Request payload size: {Size} bytes", json.Length);
var content = new StringContent(json, Encoding.UTF8, "application/json");
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/api/generate", content);
if (!response.IsSuccessStatusCode)
{
var errorContent = await response.Content.ReadAsStringAsync();
_logger.LogError("Ollama API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
return VisionApiResult.Failure($"Ollama API error ({response.StatusCode}): {errorContent}");
}
var responseJson = await response.Content.ReadAsStringAsync();
_logger.LogDebug("Ollama: Response received, size: {Size} bytes", responseJson.Length);
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
var messageContent = responseObj.GetProperty("response").GetString();
_logger.LogInformation("Ollama: Successfully parsed response");
return VisionApiResult.Success(CleanJsonResponse(messageContent));
}
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
{
_logger.LogError("Ollama request timed out after 5 minutes");
return VisionApiResult.Failure("Ollama request timed out. The model may be too slow or not loaded.");
}
catch (Exception ex)
{
_logger.LogError(ex, "Ollama Vision API call failed: {Message}", ex.Message);
return VisionApiResult.Failure($"Ollama API error: {ex.Message}");
}
}
private static string CleanJsonResponse(string? content)
{
var trimmed = content?.Trim() ?? "";
if (trimmed.StartsWith("```json"))
{
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
}
return trimmed;
}
}
// Models for llama.cpp /v1/models endpoint
public class LlamaCppModel
{
public string Id { get; set; } = "";
public bool IsLoaded { get; set; }
}
public class LlamaCppModelsResponse
{
[JsonPropertyName("data")]
public List<LlamaCppModelData>? Data { get; set; }
}
public class LlamaCppModelData
{
[JsonPropertyName("id")]
public string Id { get; set; } = "";
[JsonPropertyName("status")]
public LlamaCppModelStatus? Status { get; set; }
}
public class LlamaCppModelStatus
{
[JsonPropertyName("value")]
public string? Value { get; set; }
}
}