- Simplify model dropdown to single flat list with local models first - Show loaded/unloaded status with bullet indicators - Remove separate Ollama:BaseUrl and LlamaCpp:BaseUrl configs - All AI vision clients now use AI:ModelsEndpoint (default: athena.lan:11434) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
539 lines
21 KiB
C#
539 lines
21 KiB
C#
using System.Text;
|
|
using System.Text.Json;
|
|
using System.Text.Json.Serialization;
|
|
|
|
namespace MoneyMap.Services
|
|
{
|
|
/// <summary>
|
|
/// Result of an AI vision API call.
|
|
/// </summary>
|
|
public class VisionApiResult
|
|
{
|
|
public bool IsSuccess { get; init; }
|
|
public string? Content { get; init; }
|
|
public string? ErrorMessage { get; init; }
|
|
|
|
public static VisionApiResult Success(string content) =>
|
|
new() { IsSuccess = true, Content = content };
|
|
|
|
public static VisionApiResult Failure(string error) =>
|
|
new() { IsSuccess = false, ErrorMessage = error };
|
|
}
|
|
|
|
/// <summary>
|
|
/// Client for making vision API calls to AI providers.
|
|
/// </summary>
|
|
public interface IAIVisionClient
|
|
{
|
|
Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model);
|
|
}
|
|
|
|
/// <summary>
|
|
/// OpenAI Vision API client.
|
|
/// </summary>
|
|
public class OpenAIVisionClient : IAIVisionClient
|
|
{
|
|
private readonly HttpClient _httpClient;
|
|
private readonly IConfiguration _configuration;
|
|
private readonly ILogger<OpenAIVisionClient> _logger;
|
|
|
|
public OpenAIVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OpenAIVisionClient> logger)
|
|
{
|
|
_httpClient = httpClient;
|
|
_configuration = configuration;
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
|
|
{
|
|
var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY")
|
|
?? _configuration["OpenAI:ApiKey"];
|
|
|
|
if (string.IsNullOrWhiteSpace(apiKey))
|
|
return VisionApiResult.Failure("OpenAI API key not configured. Set OPENAI_API_KEY environment variable or OpenAI:ApiKey in appsettings.json");
|
|
|
|
var requestBody = new
|
|
{
|
|
model = model,
|
|
messages = new[]
|
|
{
|
|
new
|
|
{
|
|
role = "user",
|
|
content = new object[]
|
|
{
|
|
new { type = "text", text = prompt },
|
|
new
|
|
{
|
|
type = "image_url",
|
|
image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
|
|
}
|
|
}
|
|
}
|
|
},
|
|
max_tokens = 2000,
|
|
temperature = 0.1
|
|
};
|
|
|
|
try
|
|
{
|
|
_httpClient.DefaultRequestHeaders.Clear();
|
|
_httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}");
|
|
|
|
var json = JsonSerializer.Serialize(requestBody);
|
|
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
|
|
|
var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content);
|
|
|
|
if (!response.IsSuccessStatusCode)
|
|
{
|
|
var errorContent = await response.Content.ReadAsStringAsync();
|
|
_logger.LogError("OpenAI API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
|
return VisionApiResult.Failure($"OpenAI API error ({response.StatusCode}): {errorContent}");
|
|
}
|
|
|
|
var responseJson = await response.Content.ReadAsStringAsync();
|
|
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
|
|
|
var messageContent = responseObj
|
|
.GetProperty("choices")[0]
|
|
.GetProperty("message")
|
|
.GetProperty("content")
|
|
.GetString();
|
|
|
|
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "OpenAI Vision API call failed: {Message}", ex.Message);
|
|
return VisionApiResult.Failure($"OpenAI API error: {ex.Message}");
|
|
}
|
|
}
|
|
|
|
private static string CleanJsonResponse(string? content)
|
|
{
|
|
var trimmed = content?.Trim() ?? "";
|
|
if (trimmed.StartsWith("```json"))
|
|
{
|
|
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
|
|
}
|
|
return trimmed;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Anthropic Claude Vision API client.
|
|
/// </summary>
|
|
public class ClaudeVisionClient : IAIVisionClient
|
|
{
|
|
private readonly HttpClient _httpClient;
|
|
private readonly IConfiguration _configuration;
|
|
private readonly ILogger<ClaudeVisionClient> _logger;
|
|
|
|
public ClaudeVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<ClaudeVisionClient> logger)
|
|
{
|
|
_httpClient = httpClient;
|
|
_configuration = configuration;
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
|
|
{
|
|
var apiKey = Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY")
|
|
?? _configuration["Anthropic:ApiKey"];
|
|
|
|
if (string.IsNullOrWhiteSpace(apiKey))
|
|
return VisionApiResult.Failure("Anthropic API key not configured. Set ANTHROPIC_API_KEY environment variable or Anthropic:ApiKey in appsettings.json");
|
|
|
|
var requestBody = new
|
|
{
|
|
model = model,
|
|
max_tokens = 2000,
|
|
messages = new[]
|
|
{
|
|
new
|
|
{
|
|
role = "user",
|
|
content = new object[]
|
|
{
|
|
new
|
|
{
|
|
type = "image",
|
|
source = new
|
|
{
|
|
type = "base64",
|
|
media_type = mediaType,
|
|
data = base64Image
|
|
}
|
|
},
|
|
new { type = "text", text = prompt }
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
try
|
|
{
|
|
_httpClient.DefaultRequestHeaders.Clear();
|
|
_httpClient.DefaultRequestHeaders.Add("x-api-key", apiKey);
|
|
_httpClient.DefaultRequestHeaders.Add("anthropic-version", "2023-06-01");
|
|
|
|
var json = JsonSerializer.Serialize(requestBody);
|
|
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
|
|
|
var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content);
|
|
|
|
if (!response.IsSuccessStatusCode)
|
|
{
|
|
var errorContent = await response.Content.ReadAsStringAsync();
|
|
_logger.LogError("Anthropic API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
|
return VisionApiResult.Failure($"Anthropic API error ({response.StatusCode}): {errorContent}");
|
|
}
|
|
|
|
var responseJson = await response.Content.ReadAsStringAsync();
|
|
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
|
|
|
var messageContent = responseObj
|
|
.GetProperty("content")[0]
|
|
.GetProperty("text")
|
|
.GetString();
|
|
|
|
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Claude Vision API call failed: {Message}", ex.Message);
|
|
return VisionApiResult.Failure($"Anthropic API error: {ex.Message}");
|
|
}
|
|
}
|
|
|
|
private static string CleanJsonResponse(string? content)
|
|
{
|
|
var trimmed = content?.Trim() ?? "";
|
|
if (trimmed.StartsWith("```json"))
|
|
{
|
|
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
|
|
}
|
|
return trimmed;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// llama.cpp server client using OpenAI-compatible vision API for local LLM inference.
|
|
/// </summary>
|
|
public class LlamaCppVisionClient : IAIVisionClient
|
|
{
|
|
private readonly HttpClient _httpClient;
|
|
private readonly IConfiguration _configuration;
|
|
private readonly ILogger<LlamaCppVisionClient> _logger;
|
|
|
|
public LlamaCppVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<LlamaCppVisionClient> logger)
|
|
{
|
|
_httpClient = httpClient;
|
|
_httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
|
|
_configuration = configuration;
|
|
_logger = logger;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Get available models from the llama.cpp server.
|
|
/// </summary>
|
|
public async Task<List<LlamaCppModel>> GetAvailableModelsAsync()
|
|
{
|
|
var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434";
|
|
|
|
try
|
|
{
|
|
var response = await _httpClient.GetAsync($"{baseUrl.TrimEnd('/')}/v1/models");
|
|
|
|
if (!response.IsSuccessStatusCode)
|
|
{
|
|
_logger.LogWarning("Failed to fetch models: {StatusCode}", response.StatusCode);
|
|
return new List<LlamaCppModel>();
|
|
}
|
|
|
|
var json = await response.Content.ReadAsStringAsync();
|
|
var modelsResponse = JsonSerializer.Deserialize<LlamaCppModelsResponse>(json);
|
|
|
|
return modelsResponse?.Data?
|
|
.Where(m => !m.Id.StartsWith("mmproj-")) // Filter out multimodal projectors
|
|
.Select(m => new LlamaCppModel
|
|
{
|
|
Id = m.Id,
|
|
IsLoaded = m.Status?.Value == "loaded"
|
|
})
|
|
.OrderByDescending(m => m.IsLoaded)
|
|
.ThenBy(m => m.Id)
|
|
.ToList() ?? new List<LlamaCppModel>();
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Error fetching models from llama.cpp");
|
|
return new List<LlamaCppModel>();
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Send a text-only prompt to the LLM (no image).
|
|
/// </summary>
|
|
public async Task<VisionApiResult> SendTextPromptAsync(string prompt, string? model = null)
|
|
{
|
|
var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434";
|
|
var llamaModel = model ?? "GLM-4.6V-UD-Q4_K_XL-00001-of-00002";
|
|
if (llamaModel.StartsWith("llamacpp:"))
|
|
llamaModel = llamaModel[9..];
|
|
|
|
_logger.LogInformation("LlamaCpp: Sending text prompt to {BaseUrl} with model {Model}", baseUrl, llamaModel);
|
|
|
|
var requestBody = new
|
|
{
|
|
model = llamaModel,
|
|
messages = new[]
|
|
{
|
|
new
|
|
{
|
|
role = "user",
|
|
content = prompt
|
|
}
|
|
},
|
|
max_tokens = 1024,
|
|
temperature = 0.1
|
|
};
|
|
|
|
try
|
|
{
|
|
var json = JsonSerializer.Serialize(requestBody);
|
|
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
|
|
|
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
|
|
|
|
if (!response.IsSuccessStatusCode)
|
|
{
|
|
var errorContent = await response.Content.ReadAsStringAsync();
|
|
_logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
|
return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
|
|
}
|
|
|
|
var responseJson = await response.Content.ReadAsStringAsync();
|
|
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
|
|
|
var messageContent = responseObj
|
|
.GetProperty("choices")[0]
|
|
.GetProperty("message")
|
|
.GetProperty("content")
|
|
.GetString();
|
|
|
|
_logger.LogInformation("LlamaCpp: Text prompt completed successfully");
|
|
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
|
}
|
|
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
|
|
{
|
|
_logger.LogError("llama.cpp request timed out");
|
|
return VisionApiResult.Failure("llama.cpp request timed out.");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "llama.cpp API call failed: {Message}", ex.Message);
|
|
return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
|
|
}
|
|
}
|
|
|
|
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
|
|
{
|
|
var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434";
|
|
|
|
// Strip "llamacpp:" prefix if present
|
|
var llamaModel = model.StartsWith("llamacpp:") ? model[9..] : model;
|
|
|
|
_logger.LogInformation("LlamaCpp: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
|
|
baseUrl, llamaModel, base64Image.Length);
|
|
|
|
var requestBody = new
|
|
{
|
|
model = llamaModel,
|
|
messages = new[]
|
|
{
|
|
new
|
|
{
|
|
role = "user",
|
|
content = new object[]
|
|
{
|
|
new
|
|
{
|
|
type = "image_url",
|
|
image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
|
|
},
|
|
new { type = "text", text = prompt }
|
|
}
|
|
}
|
|
},
|
|
max_tokens = 4096,
|
|
temperature = 0.1
|
|
};
|
|
|
|
try
|
|
{
|
|
var json = JsonSerializer.Serialize(requestBody);
|
|
_logger.LogDebug("LlamaCpp: Request payload size: {Size} bytes", json.Length);
|
|
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
|
|
|
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/v1/chat/completions", content);
|
|
|
|
if (!response.IsSuccessStatusCode)
|
|
{
|
|
var errorContent = await response.Content.ReadAsStringAsync();
|
|
_logger.LogError("llama.cpp API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
|
return VisionApiResult.Failure($"llama.cpp API error ({response.StatusCode}): {errorContent}");
|
|
}
|
|
|
|
var responseJson = await response.Content.ReadAsStringAsync();
|
|
_logger.LogDebug("LlamaCpp: Response received, size: {Size} bytes", responseJson.Length);
|
|
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
|
|
|
var messageContent = responseObj
|
|
.GetProperty("choices")[0]
|
|
.GetProperty("message")
|
|
.GetProperty("content")
|
|
.GetString();
|
|
|
|
_logger.LogInformation("LlamaCpp: Successfully parsed response");
|
|
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
|
}
|
|
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
|
|
{
|
|
_logger.LogError("llama.cpp request timed out after 5 minutes");
|
|
return VisionApiResult.Failure("llama.cpp request timed out. The model may be too slow or not loaded.");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "llama.cpp Vision API call failed: {Message}", ex.Message);
|
|
return VisionApiResult.Failure($"llama.cpp API error: {ex.Message}");
|
|
}
|
|
}
|
|
|
|
private static string CleanJsonResponse(string? content)
|
|
{
|
|
var trimmed = content?.Trim() ?? "";
|
|
if (trimmed.StartsWith("```json"))
|
|
{
|
|
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
|
|
}
|
|
return trimmed;
|
|
}
|
|
}
|
|
|
|
/// <summary>
|
|
/// Ollama Vision API client for local LLM inference.
|
|
/// </summary>
|
|
public class OllamaVisionClient : IAIVisionClient
|
|
{
|
|
private readonly HttpClient _httpClient;
|
|
private readonly IConfiguration _configuration;
|
|
private readonly ILogger<OllamaVisionClient> _logger;
|
|
|
|
public OllamaVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OllamaVisionClient> logger)
|
|
{
|
|
_httpClient = httpClient;
|
|
_httpClient.Timeout = TimeSpan.FromMinutes(5); // Local models can be slow
|
|
_configuration = configuration;
|
|
_logger = logger;
|
|
}
|
|
|
|
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
|
|
{
|
|
var baseUrl = _configuration["AI:ModelsEndpoint"] ?? "http://athena.lan:11434";
|
|
|
|
// Strip "ollama:" prefix if present
|
|
var ollamaModel = model.StartsWith("ollama:") ? model[7..] : model;
|
|
|
|
_logger.LogInformation("Ollama: Sending request to {BaseUrl} with model {Model}, image size: {Size} bytes",
|
|
baseUrl, ollamaModel, base64Image.Length);
|
|
|
|
var requestBody = new
|
|
{
|
|
model = ollamaModel,
|
|
prompt = prompt,
|
|
images = new[] { base64Image },
|
|
stream = false,
|
|
options = new
|
|
{
|
|
temperature = 0.1
|
|
}
|
|
};
|
|
|
|
try
|
|
{
|
|
var json = JsonSerializer.Serialize(requestBody);
|
|
_logger.LogDebug("Ollama: Request payload size: {Size} bytes", json.Length);
|
|
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
|
|
|
var response = await _httpClient.PostAsync($"{baseUrl.TrimEnd('/')}/api/generate", content);
|
|
|
|
if (!response.IsSuccessStatusCode)
|
|
{
|
|
var errorContent = await response.Content.ReadAsStringAsync();
|
|
_logger.LogError("Ollama API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
|
return VisionApiResult.Failure($"Ollama API error ({response.StatusCode}): {errorContent}");
|
|
}
|
|
|
|
var responseJson = await response.Content.ReadAsStringAsync();
|
|
_logger.LogDebug("Ollama: Response received, size: {Size} bytes", responseJson.Length);
|
|
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
|
|
|
var messageContent = responseObj.GetProperty("response").GetString();
|
|
|
|
_logger.LogInformation("Ollama: Successfully parsed response");
|
|
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
|
}
|
|
catch (TaskCanceledException ex) when (ex.InnerException is TimeoutException)
|
|
{
|
|
_logger.LogError("Ollama request timed out after 5 minutes");
|
|
return VisionApiResult.Failure("Ollama request timed out. The model may be too slow or not loaded.");
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
_logger.LogError(ex, "Ollama Vision API call failed: {Message}", ex.Message);
|
|
return VisionApiResult.Failure($"Ollama API error: {ex.Message}");
|
|
}
|
|
}
|
|
|
|
private static string CleanJsonResponse(string? content)
|
|
{
|
|
var trimmed = content?.Trim() ?? "";
|
|
if (trimmed.StartsWith("```json"))
|
|
{
|
|
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
|
|
}
|
|
return trimmed;
|
|
}
|
|
}
|
|
|
|
// Models for llama.cpp /v1/models endpoint
|
|
public class LlamaCppModel
|
|
{
|
|
public string Id { get; set; } = "";
|
|
public bool IsLoaded { get; set; }
|
|
}
|
|
|
|
public class LlamaCppModelsResponse
|
|
{
|
|
[JsonPropertyName("data")]
|
|
public List<LlamaCppModelData>? Data { get; set; }
|
|
}
|
|
|
|
public class LlamaCppModelData
|
|
{
|
|
[JsonPropertyName("id")]
|
|
public string Id { get; set; } = "";
|
|
|
|
[JsonPropertyName("status")]
|
|
public LlamaCppModelStatus? Status { get; set; }
|
|
}
|
|
|
|
public class LlamaCppModelStatus
|
|
{
|
|
[JsonPropertyName("value")]
|
|
public string? Value { get; set; }
|
|
}
|
|
}
|