Refactor: Extract AI vision clients from AIReceiptParser
Extract for better separation of concerns: - Services/PdfToImageConverter.cs - PDF to image conversion using ImageMagick - Services/AIVisionClient.cs - OpenAI and Claude vision API clients - IAIVisionClient interface - OpenAIVisionClient, ClaudeVisionClient implementations AIReceiptParser now orchestrates using injected services. Adds proper logging for auto-mapping operations. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,6 @@
|
||||
using ImageMagick;
|
||||
using Microsoft.EntityFrameworkCore;
|
||||
using MoneyMap.Data;
|
||||
using MoneyMap.Models;
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace MoneyMap.Services
|
||||
@@ -16,26 +14,32 @@ namespace MoneyMap.Services
|
||||
{
|
||||
private readonly MoneyMapContext _db;
|
||||
private readonly IWebHostEnvironment _environment;
|
||||
private readonly IConfiguration _configuration;
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly IPdfToImageConverter _pdfConverter;
|
||||
private readonly OpenAIVisionClient _openAIClient;
|
||||
private readonly ClaudeVisionClient _claudeClient;
|
||||
private readonly IMerchantService _merchantService;
|
||||
private readonly IServiceProvider _serviceProvider;
|
||||
private readonly ILogger<AIReceiptParser> _logger;
|
||||
private string? _promptTemplate;
|
||||
|
||||
public AIReceiptParser(
|
||||
MoneyMapContext db,
|
||||
IWebHostEnvironment environment,
|
||||
IConfiguration configuration,
|
||||
HttpClient httpClient,
|
||||
IPdfToImageConverter pdfConverter,
|
||||
OpenAIVisionClient openAIClient,
|
||||
ClaudeVisionClient claudeClient,
|
||||
IMerchantService merchantService,
|
||||
IServiceProvider serviceProvider)
|
||||
IServiceProvider serviceProvider,
|
||||
ILogger<AIReceiptParser> logger)
|
||||
{
|
||||
_db = db;
|
||||
_environment = environment;
|
||||
_configuration = configuration;
|
||||
_httpClient = httpClient;
|
||||
_pdfConverter = pdfConverter;
|
||||
_openAIClient = openAIClient;
|
||||
_claudeClient = claudeClient;
|
||||
_merchantService = merchantService;
|
||||
_serviceProvider = serviceProvider;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<ReceiptParseResult> ParseReceiptAsync(long receiptId, string? model = null)
|
||||
@@ -47,30 +51,10 @@ namespace MoneyMap.Services
|
||||
if (receipt == null)
|
||||
return ReceiptParseResult.Failure("Receipt not found.");
|
||||
|
||||
// Default to gpt-4o-mini if no model specified
|
||||
var selectedModel = model ?? "gpt-4o-mini";
|
||||
|
||||
// Determine provider based on model name
|
||||
var isClaude = selectedModel.StartsWith("claude-");
|
||||
var provider = isClaude ? "Anthropic" : "OpenAI";
|
||||
|
||||
// Get appropriate API key
|
||||
string? apiKey;
|
||||
if (isClaude)
|
||||
{
|
||||
apiKey = Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY")
|
||||
?? _configuration["Anthropic:ApiKey"];
|
||||
if (string.IsNullOrWhiteSpace(apiKey))
|
||||
return ReceiptParseResult.Failure("Anthropic API key not configured. Set ANTHROPIC_API_KEY environment variable or Anthropic:ApiKey in appsettings.json");
|
||||
}
|
||||
else
|
||||
{
|
||||
apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY")
|
||||
?? _configuration["OpenAI:ApiKey"];
|
||||
if (string.IsNullOrWhiteSpace(apiKey))
|
||||
return ReceiptParseResult.Failure("OpenAI API key not configured. Set OPENAI_API_KEY environment variable or OpenAI:ApiKey in appsettings.json");
|
||||
}
|
||||
|
||||
var filePath = Path.Combine(_environment.WebRootPath, receipt.StoragePath.Replace("/", Path.DirectorySeparatorChar.ToString()));
|
||||
|
||||
if (!File.Exists(filePath))
|
||||
@@ -92,23 +76,45 @@ namespace MoneyMap.Services
|
||||
|
||||
if (receipt.ContentType == "application/pdf")
|
||||
{
|
||||
// Convert PDF to image using ImageMagick
|
||||
base64Data = await ConvertPdfToBase64ImageAsync(filePath);
|
||||
base64Data = await _pdfConverter.ConvertFirstPageToBase64Async(filePath);
|
||||
mediaType = "image/png";
|
||||
}
|
||||
else
|
||||
{
|
||||
// For images, use directly
|
||||
var fileBytes = await File.ReadAllBytesAsync(filePath);
|
||||
base64Data = Convert.ToBase64String(fileBytes);
|
||||
mediaType = receipt.ContentType;
|
||||
}
|
||||
|
||||
// Call Vision API with transaction name context
|
||||
// Build prompt
|
||||
var promptText = await LoadPromptTemplateAsync();
|
||||
var transactionName = receipt.Transaction?.Name;
|
||||
var parseData = isClaude
|
||||
? await CallClaudeVisionAsync(apiKey, base64Data, mediaType, selectedModel, transactionName)
|
||||
: await CallOpenAIVisionAsync(apiKey, base64Data, mediaType, selectedModel, transactionName);
|
||||
if (!string.IsNullOrWhiteSpace(transactionName))
|
||||
{
|
||||
promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear.";
|
||||
}
|
||||
promptText += "\n\nRespond ONLY with valid JSON, no other text.";
|
||||
|
||||
// Call appropriate vision API
|
||||
var client = isClaude ? (IAIVisionClient)_claudeClient : _openAIClient;
|
||||
var visionResult = await client.AnalyzeImageAsync(base64Data, mediaType, promptText, selectedModel);
|
||||
|
||||
if (!visionResult.IsSuccess)
|
||||
{
|
||||
parseLog.Error = visionResult.ErrorMessage;
|
||||
parseLog.CompletedAtUtc = DateTime.UtcNow;
|
||||
_db.ReceiptParseLogs.Add(parseLog);
|
||||
await _db.SaveChangesAsync();
|
||||
return ReceiptParseResult.Failure(visionResult.ErrorMessage!);
|
||||
}
|
||||
|
||||
// Parse the JSON response
|
||||
var parseData = string.IsNullOrWhiteSpace(visionResult.Content)
|
||||
? new ParsedReceiptData()
|
||||
: JsonSerializer.Deserialize<ParsedReceiptData>(visionResult.Content, new JsonSerializerOptions
|
||||
{
|
||||
PropertyNameCaseInsensitive = true
|
||||
}) ?? new ParsedReceiptData();
|
||||
|
||||
// Update receipt with parsed data
|
||||
receipt.Merchant = parseData.Merchant;
|
||||
@@ -118,7 +124,7 @@ namespace MoneyMap.Services
|
||||
receipt.ReceiptDate = parseData.ReceiptDate;
|
||||
receipt.DueDate = parseData.DueDate;
|
||||
|
||||
// Update transaction merchant if we extracted one and transaction doesn't have one yet
|
||||
// Update transaction merchant if extracted and transaction doesn't have one
|
||||
if (receipt.Transaction != null &&
|
||||
!string.IsNullOrWhiteSpace(parseData.Merchant) &&
|
||||
receipt.Transaction.MerchantId == null)
|
||||
@@ -156,19 +162,19 @@ namespace MoneyMap.Services
|
||||
_db.ReceiptParseLogs.Add(parseLog);
|
||||
await _db.SaveChangesAsync();
|
||||
|
||||
// Attempt auto-mapping after successful parse (only if receipt is not already mapped)
|
||||
// Attempt auto-mapping after successful parse
|
||||
if (!receipt.TransactionId.HasValue)
|
||||
{
|
||||
try
|
||||
{
|
||||
// Use service locator pattern to avoid circular dependency
|
||||
using var scope = _serviceProvider.CreateScope();
|
||||
var autoMapper = scope.ServiceProvider.GetRequiredService<IReceiptAutoMapper>();
|
||||
await autoMapper.AutoMapReceiptAsync(receiptId);
|
||||
_logger.LogInformation("Auto-mapping completed for receipt {ReceiptId}", receiptId);
|
||||
}
|
||||
catch
|
||||
catch (Exception ex)
|
||||
{
|
||||
// Ignore auto-mapping errors - parsing was successful
|
||||
_logger.LogWarning(ex, "Auto-mapping failed for receipt {ReceiptId}: {Message}", receiptId, ex.Message);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -181,44 +187,11 @@ namespace MoneyMap.Services
|
||||
_db.ReceiptParseLogs.Add(parseLog);
|
||||
await _db.SaveChangesAsync();
|
||||
|
||||
_logger.LogError(ex, "Error parsing receipt {ReceiptId}: {Message}", receiptId, ex.Message);
|
||||
return ReceiptParseResult.Failure($"Error parsing receipt: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<string> ConvertPdfToBase64ImageAsync(string pdfPath)
|
||||
{
|
||||
return await Task.Run(() =>
|
||||
{
|
||||
var pdfBytes = File.ReadAllBytes(pdfPath);
|
||||
|
||||
// Render settings: 220 DPI for good quality
|
||||
var settings = new MagickReadSettings
|
||||
{
|
||||
Density = new Density(220),
|
||||
BackgroundColor = MagickColors.White,
|
||||
ColorSpace = ColorSpace.sRGB
|
||||
};
|
||||
|
||||
using var pages = new MagickImageCollection();
|
||||
pages.Read(pdfBytes, settings);
|
||||
|
||||
// Use first page only
|
||||
if (pages.Count == 0)
|
||||
throw new Exception("PDF has no pages");
|
||||
|
||||
using var img = (MagickImage)pages[0].Clone();
|
||||
|
||||
// Ensure we have a clean 8-bit RGB canvas
|
||||
img.ColorType = ColorType.TrueColor;
|
||||
img.Alpha(AlphaOption.Remove); // flatten onto white
|
||||
img.ResetPage();
|
||||
|
||||
// Convert to PNG bytes
|
||||
var imageBytes = img.ToByteArray(MagickFormat.Png);
|
||||
return Convert.ToBase64String(imageBytes);
|
||||
});
|
||||
}
|
||||
|
||||
private async Task<string> LoadPromptTemplateAsync()
|
||||
{
|
||||
if (_promptTemplate != null)
|
||||
@@ -232,179 +205,6 @@ namespace MoneyMap.Services
|
||||
_promptTemplate = await File.ReadAllTextAsync(promptPath);
|
||||
return _promptTemplate;
|
||||
}
|
||||
|
||||
private async Task<ParsedReceiptData> CallOpenAIVisionAsync(string apiKey, string base64Image, string mediaType, string model, string? transactionName = null)
|
||||
{
|
||||
// Load the prompt template from file
|
||||
var promptText = await LoadPromptTemplateAsync();
|
||||
|
||||
// Add transaction context if available
|
||||
if (!string.IsNullOrWhiteSpace(transactionName))
|
||||
{
|
||||
promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear.";
|
||||
}
|
||||
|
||||
promptText += "\n\nRespond ONLY with valid JSON, no other text.";
|
||||
|
||||
var requestBody = new
|
||||
{
|
||||
model = model,
|
||||
messages = new[]
|
||||
{
|
||||
new
|
||||
{
|
||||
role = "user",
|
||||
content = new object[]
|
||||
{
|
||||
new
|
||||
{
|
||||
type = "text",
|
||||
text = promptText
|
||||
},
|
||||
new
|
||||
{
|
||||
type = "image_url",
|
||||
image_url = new
|
||||
{
|
||||
url = $"data:{mediaType};base64,{base64Image}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
max_tokens = 2000,
|
||||
temperature = 0.1
|
||||
};
|
||||
|
||||
_httpClient.DefaultRequestHeaders.Clear();
|
||||
_httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}");
|
||||
|
||||
var json = JsonSerializer.Serialize(requestBody);
|
||||
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
||||
|
||||
var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var errorContent = await response.Content.ReadAsStringAsync();
|
||||
throw new Exception($"OpenAI API error ({response.StatusCode}): {errorContent}");
|
||||
}
|
||||
|
||||
var responseJson = await response.Content.ReadAsStringAsync();
|
||||
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
||||
|
||||
var messageContent = responseObj
|
||||
.GetProperty("choices")[0]
|
||||
.GetProperty("message")
|
||||
.GetProperty("content")
|
||||
.GetString();
|
||||
|
||||
// Clean up the response - remove markdown code blocks if present
|
||||
messageContent = messageContent?.Trim();
|
||||
if (messageContent?.StartsWith("```json") == true)
|
||||
{
|
||||
messageContent = messageContent.Replace("```json", "").Replace("```", "").Trim();
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(messageContent))
|
||||
{
|
||||
return new ParsedReceiptData();
|
||||
}
|
||||
|
||||
var parsedData = JsonSerializer.Deserialize<ParsedReceiptData>(messageContent, new JsonSerializerOptions
|
||||
{
|
||||
PropertyNameCaseInsensitive = true
|
||||
});
|
||||
|
||||
return parsedData ?? new ParsedReceiptData();
|
||||
}
|
||||
|
||||
private async Task<ParsedReceiptData> CallClaudeVisionAsync(string apiKey, string base64Image, string mediaType, string model, string? transactionName = null)
|
||||
{
|
||||
// Load the prompt template from file
|
||||
var promptText = await LoadPromptTemplateAsync();
|
||||
|
||||
// Add transaction context if available
|
||||
if (!string.IsNullOrWhiteSpace(transactionName))
|
||||
{
|
||||
promptText += $"\n\nNote: This transaction was recorded as \"{transactionName}\" in the bank statement, which may help identify the merchant if the receipt is unclear.";
|
||||
}
|
||||
|
||||
promptText += "\n\nRespond ONLY with valid JSON, no other text.";
|
||||
|
||||
var requestBody = new
|
||||
{
|
||||
model = model,
|
||||
max_tokens = 2000,
|
||||
messages = new[]
|
||||
{
|
||||
new
|
||||
{
|
||||
role = "user",
|
||||
content = new object[]
|
||||
{
|
||||
new
|
||||
{
|
||||
type = "image",
|
||||
source = new
|
||||
{
|
||||
type = "base64",
|
||||
media_type = mediaType,
|
||||
data = base64Image
|
||||
}
|
||||
},
|
||||
new
|
||||
{
|
||||
type = "text",
|
||||
text = promptText
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
_httpClient.DefaultRequestHeaders.Clear();
|
||||
_httpClient.DefaultRequestHeaders.Add("x-api-key", apiKey);
|
||||
_httpClient.DefaultRequestHeaders.Add("anthropic-version", "2023-06-01");
|
||||
|
||||
var json = JsonSerializer.Serialize(requestBody);
|
||||
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
||||
|
||||
var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var errorContent = await response.Content.ReadAsStringAsync();
|
||||
throw new Exception($"Anthropic API error ({response.StatusCode}): {errorContent}");
|
||||
}
|
||||
|
||||
var responseJson = await response.Content.ReadAsStringAsync();
|
||||
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
||||
|
||||
var messageContent = responseObj
|
||||
.GetProperty("content")[0]
|
||||
.GetProperty("text")
|
||||
.GetString();
|
||||
|
||||
// Clean up the response - remove markdown code blocks if present
|
||||
messageContent = messageContent?.Trim();
|
||||
if (messageContent?.StartsWith("```json") == true)
|
||||
{
|
||||
messageContent = messageContent.Replace("```json", "").Replace("```", "").Trim();
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(messageContent))
|
||||
{
|
||||
return new ParsedReceiptData();
|
||||
}
|
||||
|
||||
var parsedData = JsonSerializer.Deserialize<ParsedReceiptData>(messageContent, new JsonSerializerOptions
|
||||
{
|
||||
PropertyNameCaseInsensitive = true
|
||||
});
|
||||
|
||||
return parsedData ?? new ParsedReceiptData();
|
||||
}
|
||||
}
|
||||
|
||||
public class ParsedReceiptData
|
||||
@@ -440,4 +240,4 @@ namespace MoneyMap.Services
|
||||
public static ReceiptParseResult Failure(string message) =>
|
||||
new() { IsSuccess = false, Message = message };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
219
MoneyMap/Services/AIVisionClient.cs
Normal file
219
MoneyMap/Services/AIVisionClient.cs
Normal file
@@ -0,0 +1,219 @@
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
|
||||
namespace MoneyMap.Services
|
||||
{
|
||||
/// <summary>
|
||||
/// Result of an AI vision API call.
|
||||
/// </summary>
|
||||
public class VisionApiResult
|
||||
{
|
||||
public bool IsSuccess { get; init; }
|
||||
public string? Content { get; init; }
|
||||
public string? ErrorMessage { get; init; }
|
||||
|
||||
public static VisionApiResult Success(string content) =>
|
||||
new() { IsSuccess = true, Content = content };
|
||||
|
||||
public static VisionApiResult Failure(string error) =>
|
||||
new() { IsSuccess = false, ErrorMessage = error };
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Client for making vision API calls to AI providers.
|
||||
/// </summary>
|
||||
public interface IAIVisionClient
|
||||
{
|
||||
Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// OpenAI Vision API client.
|
||||
/// </summary>
|
||||
public class OpenAIVisionClient : IAIVisionClient
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly IConfiguration _configuration;
|
||||
private readonly ILogger<OpenAIVisionClient> _logger;
|
||||
|
||||
public OpenAIVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<OpenAIVisionClient> logger)
|
||||
{
|
||||
_httpClient = httpClient;
|
||||
_configuration = configuration;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
|
||||
{
|
||||
var apiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY")
|
||||
?? _configuration["OpenAI:ApiKey"];
|
||||
|
||||
if (string.IsNullOrWhiteSpace(apiKey))
|
||||
return VisionApiResult.Failure("OpenAI API key not configured. Set OPENAI_API_KEY environment variable or OpenAI:ApiKey in appsettings.json");
|
||||
|
||||
var requestBody = new
|
||||
{
|
||||
model = model,
|
||||
messages = new[]
|
||||
{
|
||||
new
|
||||
{
|
||||
role = "user",
|
||||
content = new object[]
|
||||
{
|
||||
new { type = "text", text = prompt },
|
||||
new
|
||||
{
|
||||
type = "image_url",
|
||||
image_url = new { url = $"data:{mediaType};base64,{base64Image}" }
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
max_tokens = 2000,
|
||||
temperature = 0.1
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
_httpClient.DefaultRequestHeaders.Clear();
|
||||
_httpClient.DefaultRequestHeaders.Add("Authorization", $"Bearer {apiKey}");
|
||||
|
||||
var json = JsonSerializer.Serialize(requestBody);
|
||||
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
||||
|
||||
var response = await _httpClient.PostAsync("https://api.openai.com/v1/chat/completions", content);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var errorContent = await response.Content.ReadAsStringAsync();
|
||||
_logger.LogError("OpenAI API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
||||
return VisionApiResult.Failure($"OpenAI API error ({response.StatusCode}): {errorContent}");
|
||||
}
|
||||
|
||||
var responseJson = await response.Content.ReadAsStringAsync();
|
||||
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
||||
|
||||
var messageContent = responseObj
|
||||
.GetProperty("choices")[0]
|
||||
.GetProperty("message")
|
||||
.GetProperty("content")
|
||||
.GetString();
|
||||
|
||||
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "OpenAI Vision API call failed: {Message}", ex.Message);
|
||||
return VisionApiResult.Failure($"OpenAI API error: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private static string CleanJsonResponse(string? content)
|
||||
{
|
||||
var trimmed = content?.Trim() ?? "";
|
||||
if (trimmed.StartsWith("```json"))
|
||||
{
|
||||
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Anthropic Claude Vision API client.
|
||||
/// </summary>
|
||||
public class ClaudeVisionClient : IAIVisionClient
|
||||
{
|
||||
private readonly HttpClient _httpClient;
|
||||
private readonly IConfiguration _configuration;
|
||||
private readonly ILogger<ClaudeVisionClient> _logger;
|
||||
|
||||
public ClaudeVisionClient(HttpClient httpClient, IConfiguration configuration, ILogger<ClaudeVisionClient> logger)
|
||||
{
|
||||
_httpClient = httpClient;
|
||||
_configuration = configuration;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
public async Task<VisionApiResult> AnalyzeImageAsync(string base64Image, string mediaType, string prompt, string model)
|
||||
{
|
||||
var apiKey = Environment.GetEnvironmentVariable("ANTHROPIC_API_KEY")
|
||||
?? _configuration["Anthropic:ApiKey"];
|
||||
|
||||
if (string.IsNullOrWhiteSpace(apiKey))
|
||||
return VisionApiResult.Failure("Anthropic API key not configured. Set ANTHROPIC_API_KEY environment variable or Anthropic:ApiKey in appsettings.json");
|
||||
|
||||
var requestBody = new
|
||||
{
|
||||
model = model,
|
||||
max_tokens = 2000,
|
||||
messages = new[]
|
||||
{
|
||||
new
|
||||
{
|
||||
role = "user",
|
||||
content = new object[]
|
||||
{
|
||||
new
|
||||
{
|
||||
type = "image",
|
||||
source = new
|
||||
{
|
||||
type = "base64",
|
||||
media_type = mediaType,
|
||||
data = base64Image
|
||||
}
|
||||
},
|
||||
new { type = "text", text = prompt }
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
_httpClient.DefaultRequestHeaders.Clear();
|
||||
_httpClient.DefaultRequestHeaders.Add("x-api-key", apiKey);
|
||||
_httpClient.DefaultRequestHeaders.Add("anthropic-version", "2023-06-01");
|
||||
|
||||
var json = JsonSerializer.Serialize(requestBody);
|
||||
var content = new StringContent(json, Encoding.UTF8, "application/json");
|
||||
|
||||
var response = await _httpClient.PostAsync("https://api.anthropic.com/v1/messages", content);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
var errorContent = await response.Content.ReadAsStringAsync();
|
||||
_logger.LogError("Anthropic API error ({StatusCode}): {Error}", response.StatusCode, errorContent);
|
||||
return VisionApiResult.Failure($"Anthropic API error ({response.StatusCode}): {errorContent}");
|
||||
}
|
||||
|
||||
var responseJson = await response.Content.ReadAsStringAsync();
|
||||
var responseObj = JsonSerializer.Deserialize<JsonElement>(responseJson);
|
||||
|
||||
var messageContent = responseObj
|
||||
.GetProperty("content")[0]
|
||||
.GetProperty("text")
|
||||
.GetString();
|
||||
|
||||
return VisionApiResult.Success(CleanJsonResponse(messageContent));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogError(ex, "Claude Vision API call failed: {Message}", ex.Message);
|
||||
return VisionApiResult.Failure($"Anthropic API error: {ex.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
private static string CleanJsonResponse(string? content)
|
||||
{
|
||||
var trimmed = content?.Trim() ?? "";
|
||||
if (trimmed.StartsWith("```json"))
|
||||
{
|
||||
trimmed = trimmed.Replace("```json", "").Replace("```", "").Trim();
|
||||
}
|
||||
return trimmed;
|
||||
}
|
||||
}
|
||||
}
|
||||
61
MoneyMap/Services/PdfToImageConverter.cs
Normal file
61
MoneyMap/Services/PdfToImageConverter.cs
Normal file
@@ -0,0 +1,61 @@
|
||||
using ImageMagick;
|
||||
|
||||
namespace MoneyMap.Services
|
||||
{
|
||||
/// <summary>
|
||||
/// Service for converting PDF files to images for AI processing.
|
||||
/// </summary>
|
||||
public interface IPdfToImageConverter
|
||||
{
|
||||
/// <summary>
|
||||
/// Converts the first page of a PDF to a base64-encoded PNG image.
|
||||
/// </summary>
|
||||
Task<string> ConvertFirstPageToBase64Async(string pdfPath);
|
||||
|
||||
/// <summary>
|
||||
/// Converts PDF bytes to a base64-encoded PNG image.
|
||||
/// </summary>
|
||||
Task<string> ConvertFirstPageToBase64Async(byte[] pdfBytes);
|
||||
}
|
||||
|
||||
public class PdfToImageConverter : IPdfToImageConverter
|
||||
{
|
||||
private const int DefaultDpi = 220;
|
||||
|
||||
public Task<string> ConvertFirstPageToBase64Async(string pdfPath)
|
||||
{
|
||||
var pdfBytes = File.ReadAllBytes(pdfPath);
|
||||
return ConvertFirstPageToBase64Async(pdfBytes);
|
||||
}
|
||||
|
||||
public Task<string> ConvertFirstPageToBase64Async(byte[] pdfBytes)
|
||||
{
|
||||
return Task.Run(() =>
|
||||
{
|
||||
var settings = new MagickReadSettings
|
||||
{
|
||||
Density = new Density(DefaultDpi),
|
||||
BackgroundColor = MagickColors.White,
|
||||
ColorSpace = ColorSpace.sRGB
|
||||
};
|
||||
|
||||
using var pages = new MagickImageCollection();
|
||||
pages.Read(pdfBytes, settings);
|
||||
|
||||
if (pages.Count == 0)
|
||||
throw new InvalidOperationException("PDF has no pages");
|
||||
|
||||
using var img = (MagickImage)pages[0].Clone();
|
||||
|
||||
// Ensure we have a clean 8-bit RGB canvas
|
||||
img.ColorType = ColorType.TrueColor;
|
||||
img.Alpha(AlphaOption.Remove); // flatten onto white
|
||||
img.ResetPage();
|
||||
|
||||
// Convert to PNG bytes
|
||||
var imageBytes = img.ToByteArray(MagickFormat.Png);
|
||||
return Convert.ToBase64String(imageBytes);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user