feat(console): improve training data collection and best-fit persistence

- Add verbose per-file and per-sheet-size console output during collection
- Skip already-processed parts at the sheet-size level instead of all-or-nothing
- Precompute best-fits once per part and reuse across all sheet sizes
- Clear best-fit cache after each part to prevent memory growth
- Save best-fits in separate bestfits/ zip entries instead of embedding in nest.json
- Filter to Keep=true results only and scope to plate sizes in the nest
- Set nest name to match filename (includes sheet size and part count)
- Add TrainingDatabase with per-run skip logic and SQLite schema

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-14 12:39:24 -04:00
parent 3133228fc9
commit d6ffa77f35
8 changed files with 497 additions and 15 deletions

View File

@@ -299,11 +299,6 @@ int RunDataCollection(string dir, string dbPath, string saveDir, double s, strin
Color.FromArgb(215, 130, 130),
};
var dxfFiles = Directory.GetFiles(dir, "*.dxf", SearchOption.AllDirectories);
Console.WriteLine($"Found {dxfFiles.Length} DXF files. Initializing SQLite database at: {dbPath}");
using var db = new TrainingDatabase(dbPath);
var sheetSuite = new[]
{
new Size(96, 48), new Size(120, 48), new Size(144, 48),
@@ -312,17 +307,48 @@ int RunDataCollection(string dir, string dbPath, string saveDir, double s, strin
new Size(48, 24), new Size(120, 10)
};
var dxfFiles = Directory.GetFiles(dir, "*.dxf", SearchOption.AllDirectories);
Console.WriteLine($"Found {dxfFiles.Length} DXF files");
Console.WriteLine($"Database: {Path.GetFullPath(dbPath)}");
Console.WriteLine($"Sheet sizes: {sheetSuite.Length} configurations");
Console.WriteLine($"Spacing: {s:F2}");
if (saveDir != null) Console.WriteLine($"Saving nests to: {saveDir}");
Console.WriteLine("---");
using var db = new TrainingDatabase(dbPath);
var importer = new DxfImporter();
var colorIndex = 0;
var processed = 0;
var skippedGeometry = 0;
var skippedFeatures = 0;
var skippedExisting = 0;
var totalRuns = 0;
var totalSw = Stopwatch.StartNew();
foreach (var file in dxfFiles)
{
var fileNum = processed + skippedGeometry + skippedFeatures + skippedExisting + 1;
var partNo = Path.GetFileNameWithoutExtension(file);
Console.Write($"[{fileNum}/{dxfFiles.Length}] {partNo}");
try
{
if (!importer.GetGeometry(file, out var entities)) continue;
var existingRuns = db.RunCount(Path.GetFileName(file));
if (existingRuns >= sheetSuite.Length)
{
Console.WriteLine(" - SKIP (all sizes done)");
skippedExisting++;
continue;
}
if (!importer.GetGeometry(file, out var entities))
{
Console.WriteLine(" - SKIP (no geometry)");
skippedGeometry++;
continue;
}
var partNo = Path.GetFileNameWithoutExtension(file);
var drawing = new Drawing(Path.GetFileName(file));
drawing.Program = OpenNest.Converters.ConvertGeometry.ToProgram(entities);
drawing.UpdateArea();
@@ -330,14 +356,38 @@ int RunDataCollection(string dir, string dbPath, string saveDir, double s, strin
colorIndex++;
var features = FeatureExtractor.Extract(drawing);
if (features == null) continue;
if (features == null)
{
Console.WriteLine(" - SKIP (feature extraction failed)");
skippedFeatures++;
continue;
}
Console.WriteLine($" (area={features.Area:F1}, verts={features.VertexCount})");
// Precompute best-fits once for all sheet sizes.
var sizes = sheetSuite.Select(sz => (sz.Width, sz.Length)).ToList();
var bfSw = Stopwatch.StartNew();
BestFitCache.ComputeForSizes(drawing, s, sizes);
bfSw.Stop();
Console.WriteLine($" Best-fits computed in {bfSw.ElapsedMilliseconds}ms");
using var txn = db.BeginTransaction();
var partId = db.GetOrAddPart(Path.GetFileName(file), features, drawing.Program.ToString());
var partSw = Stopwatch.StartNew();
var runsThisPart = 0;
var bestUtil = 0.0;
var bestCount = 0;
foreach (var size in sheetSuite)
{
if (db.HasRun(Path.GetFileName(file), size.Width, size.Length, s))
{
Console.WriteLine($" {size.Length}x{size.Width} - skip (exists)");
continue;
}
Plate runPlate;
if (templateNest != null)
{
@@ -350,8 +400,23 @@ int RunDataCollection(string dir, string dbPath, string saveDir, double s, strin
runPlate = new Plate { Size = size, PartSpacing = s };
}
var sizeSw = Stopwatch.StartNew();
var result = BruteForceRunner.Run(drawing, runPlate);
if (result == null) continue;
sizeSw.Stop();
if (result == null)
{
Console.WriteLine($" {size.Length}x{size.Width} - no fit");
continue;
}
if (result.Utilization > bestUtil)
{
bestUtil = result.Utilization;
bestCount = result.PartCount;
}
Console.WriteLine($" {size.Length}x{size.Width} - {result.PartCount}pcs, {result.Utilization:P1}, {sizeSw.ElapsedMilliseconds}ms");
string savedFilePath = null;
if (saveDir != null)
@@ -364,14 +429,15 @@ int RunDataCollection(string dir, string dbPath, string saveDir, double s, strin
var partDir = Path.Combine(saveDir, bucket, partNo);
Directory.CreateDirectory(partDir);
var fileName = $"{partNo}-{size.Length}x{size.Width}-{result.PartCount}pcs.zip";
var nestName = $"{partNo}-{size.Length}x{size.Width}-{result.PartCount}pcs";
var fileName = nestName + ".zip";
savedFilePath = Path.Combine(partDir, fileName);
// Create nest from template or from scratch
Nest nestObj;
if (templateNest != null)
{
nestObj = new Nest(partNo)
nestObj = new Nest(nestName)
{
Units = templateNest.Units,
DateCreated = DateTime.Now
@@ -380,7 +446,7 @@ int RunDataCollection(string dir, string dbPath, string saveDir, double s, strin
}
else
{
nestObj = new Nest(partNo) { Units = Units.Inches, DateCreated = DateTime.Now };
nestObj = new Nest(nestName) { Units = Units.Inches, DateCreated = DateTime.Now };
}
nestObj.Drawings.Add(drawing);
@@ -394,19 +460,29 @@ int RunDataCollection(string dir, string dbPath, string saveDir, double s, strin
}
db.AddRun(partId, size.Width, size.Length, s, result, savedFilePath);
runsThisPart++;
totalRuns++;
}
txn.Commit();
BestFitCache.Invalidate(drawing);
partSw.Stop();
processed++;
if (processed % 10 == 0) Console.WriteLine($"Processed {processed}/{dxfFiles.Length} parts across all sheet sizes...");
Console.WriteLine($" Total: {runsThisPart} runs, best={bestCount}pcs @ {bestUtil:P1}, {partSw.ElapsedMilliseconds}ms");
}
catch (Exception ex)
{
Console.Error.WriteLine($"Error processing {file}: {ex.Message}");
Console.WriteLine();
Console.Error.WriteLine($" ERROR: {ex.Message}");
}
}
Console.WriteLine($"Done! Brute-force data for {processed} parts saved to {dbPath}");
totalSw.Stop();
Console.WriteLine("---");
Console.WriteLine($"Processed: {processed} parts, {totalRuns} total runs");
Console.WriteLine($"Skipped: {skippedExisting} (existing) + {skippedGeometry} (no geometry) + {skippedFeatures} (no features)");
Console.WriteLine($"Time: {totalSw.Elapsed:h\\:mm\\:ss}");
Console.WriteLine($"Database: {Path.GetFullPath(dbPath)}");
return 0;
}

View File

@@ -0,0 +1,145 @@
using System;
using Microsoft.Data.Sqlite;
using OpenNest.Engine.ML;
namespace OpenNest.Console
{
public class TrainingDatabase : IDisposable
{
private readonly SqliteConnection _connection;
public TrainingDatabase(string dbPath)
{
var connectionString = new SqliteConnectionStringBuilder
{
DataSource = dbPath,
Mode = SqliteOpenMode.ReadWriteCreate
}.ToString();
_connection = new SqliteConnection(connectionString);
_connection.Open();
InitializeSchema();
}
private void InitializeSchema()
{
using var cmd = _connection.CreateCommand();
cmd.CommandText = @"
CREATE TABLE IF NOT EXISTS Parts (
Id INTEGER PRIMARY KEY AUTOINCREMENT,
FileName TEXT,
Area REAL,
Convexity REAL,
AspectRatio REAL,
BBFill REAL,
Circularity REAL,
VertexCount INTEGER,
Bitmask BLOB,
GeometryData TEXT
);
CREATE TABLE IF NOT EXISTS Runs (
Id INTEGER PRIMARY KEY AUTOINCREMENT,
PartId INTEGER,
SheetWidth REAL,
SheetHeight REAL,
Spacing REAL,
PartCount INTEGER,
Utilization REAL,
TimeMs INTEGER,
LayoutData TEXT,
FilePath TEXT,
FOREIGN KEY(PartId) REFERENCES Parts(Id)
);
CREATE INDEX IF NOT EXISTS idx_parts_filename ON Parts(FileName);
CREATE INDEX IF NOT EXISTS idx_runs_partid ON Runs(PartId);
";
cmd.ExecuteNonQuery();
}
public long GetOrAddPart(string fileName, PartFeatures features, string geometryData)
{
// Check if part already exists
using (var checkCmd = _connection.CreateCommand())
{
checkCmd.CommandText = "SELECT Id FROM Parts WHERE FileName = @name";
checkCmd.Parameters.AddWithValue("@name", fileName);
var result = checkCmd.ExecuteScalar();
if (result != null) return (long)result;
}
// Add new part
using (var insertCmd = _connection.CreateCommand())
{
insertCmd.CommandText = @"
INSERT INTO Parts (FileName, Area, Convexity, AspectRatio, BBFill, Circularity, VertexCount, Bitmask, GeometryData)
VALUES (@name, @area, @conv, @asp, @fill, @circ, @vert, @mask, @geo);
SELECT last_insert_rowid();";
insertCmd.Parameters.AddWithValue("@name", fileName);
insertCmd.Parameters.AddWithValue("@area", features.Area);
insertCmd.Parameters.AddWithValue("@conv", features.Convexity);
insertCmd.Parameters.AddWithValue("@asp", features.AspectRatio);
insertCmd.Parameters.AddWithValue("@fill", features.BoundingBoxFill);
insertCmd.Parameters.AddWithValue("@circ", features.Circularity);
insertCmd.Parameters.AddWithValue("@vert", features.VertexCount);
insertCmd.Parameters.AddWithValue("@mask", features.Bitmask);
insertCmd.Parameters.AddWithValue("@geo", geometryData);
return (long)insertCmd.ExecuteScalar();
}
}
public bool HasRun(string fileName, double sheetWidth, double sheetHeight, double spacing)
{
using var cmd = _connection.CreateCommand();
cmd.CommandText = @"SELECT COUNT(*) FROM Runs r JOIN Parts p ON r.PartId = p.Id
WHERE p.FileName = @name AND r.SheetWidth = @w AND r.SheetHeight = @h AND r.Spacing = @s";
cmd.Parameters.AddWithValue("@name", fileName);
cmd.Parameters.AddWithValue("@w", sheetWidth);
cmd.Parameters.AddWithValue("@h", sheetHeight);
cmd.Parameters.AddWithValue("@s", spacing);
return (long)cmd.ExecuteScalar() > 0;
}
public int RunCount(string fileName)
{
using var cmd = _connection.CreateCommand();
cmd.CommandText = "SELECT COUNT(*) FROM Runs r JOIN Parts p ON r.PartId = p.Id WHERE p.FileName = @name";
cmd.Parameters.AddWithValue("@name", fileName);
return (int)(long)cmd.ExecuteScalar();
}
public void AddRun(long partId, double w, double h, double s, BruteForceResult result, string filePath)
{
using var cmd = _connection.CreateCommand();
cmd.CommandText = @"
INSERT INTO Runs (PartId, SheetWidth, SheetHeight, Spacing, PartCount, Utilization, TimeMs, LayoutData, FilePath)
VALUES (@pid, @w, @h, @s, @cnt, @util, @time, @layout, @path)";
cmd.Parameters.AddWithValue("@pid", partId);
cmd.Parameters.AddWithValue("@w", w);
cmd.Parameters.AddWithValue("@h", h);
cmd.Parameters.AddWithValue("@s", s);
cmd.Parameters.AddWithValue("@cnt", result.PartCount);
cmd.Parameters.AddWithValue("@util", result.Utilization);
cmd.Parameters.AddWithValue("@time", result.TimeMs);
cmd.Parameters.AddWithValue("@layout", result.LayoutData ?? "");
cmd.Parameters.AddWithValue("@path", filePath ?? "");
cmd.ExecuteNonQuery();
}
public SqliteTransaction BeginTransaction()
{
return _connection.BeginTransaction();
}
public void Dispose()
{
_connection?.Dispose();
}
}
}

View File

@@ -54,6 +54,93 @@ namespace OpenNest.Engine.BestFit
}
}
public static void ComputeForSizes(
Drawing drawing, double spacing,
IEnumerable<(double Width, double Height)> plateSizes)
{
// Skip sizes that are already cached.
var needed = new List<(double Width, double Height)>();
foreach (var size in plateSizes)
{
var key = new CacheKey(drawing, size.Width, size.Height, spacing);
if (!_cache.ContainsKey(key))
needed.Add(size);
}
if (needed.Count == 0)
return;
// Find the largest plate to use for the initial computation — this
// keeps the filter maximally permissive so we don't discard results
// that a smaller plate might still use after re-filtering.
var maxWidth = 0.0;
var maxHeight = 0.0;
foreach (var size in needed)
{
if (size.Width > maxWidth) maxWidth = size.Width;
if (size.Height > maxHeight) maxHeight = size.Height;
}
IPairEvaluator evaluator = null;
ISlideComputer slideComputer = null;
try
{
if (CreateEvaluator != null)
{
try { evaluator = CreateEvaluator(drawing, spacing); }
catch { /* fall back to default evaluator */ }
}
if (CreateSlideComputer != null)
{
try { slideComputer = CreateSlideComputer(); }
catch { /* fall back to CPU slide computation */ }
}
// Compute candidates and evaluate once with the largest plate.
var finder = new BestFitFinder(maxWidth, maxHeight, evaluator, slideComputer);
var baseResults = finder.FindBestFits(drawing, spacing, StepSize);
// Cache a filtered copy for each plate size.
foreach (var size in needed)
{
var filter = new BestFitFilter
{
MaxPlateWidth = size.Width,
MaxPlateHeight = size.Height
};
var copy = new List<BestFitResult>(baseResults.Count);
for (var i = 0; i < baseResults.Count; i++)
{
var r = baseResults[i];
copy.Add(new BestFitResult
{
Candidate = r.Candidate,
RotatedArea = r.RotatedArea,
BoundingWidth = r.BoundingWidth,
BoundingHeight = r.BoundingHeight,
OptimalRotation = r.OptimalRotation,
TrueArea = r.TrueArea,
HullAngles = r.HullAngles,
Keep = r.Keep,
Reason = r.Reason
});
}
filter.Apply(copy);
var key = new CacheKey(drawing, size.Width, size.Height, spacing);
_cache.TryAdd(key, copy);
}
}
finally
{
(evaluator as IDisposable)?.Dispose();
}
}
public static void Invalidate(Drawing drawing)
{
foreach (var key in _cache.Keys)
@@ -63,6 +150,25 @@ namespace OpenNest.Engine.BestFit
}
}
public static void Populate(Drawing drawing, double plateWidth, double plateHeight,
double spacing, List<BestFitResult> results)
{
var key = new CacheKey(drawing, plateWidth, plateHeight, spacing);
_cache.TryAdd(key, results);
}
public static Dictionary<(double PlateWidth, double PlateHeight, double Spacing), List<BestFitResult>>
GetAllForDrawing(Drawing drawing)
{
var result = new Dictionary<(double, double, double), List<BestFitResult>>();
foreach (var kvp in _cache)
{
if (ReferenceEquals(kvp.Key.Drawing, drawing))
result[(kvp.Key.PlateWidth, kvp.Key.PlateHeight, kvp.Key.Spacing)] = kvp.Value;
}
return result;
}
public static void Clear()
{
_cache.Clear();

View File

@@ -122,5 +122,32 @@ namespace OpenNest.IO
public double X { get; init; }
public double Y { get; init; }
}
public record BestFitSetDto
{
public double PlateWidth { get; init; }
public double PlateHeight { get; init; }
public double Spacing { get; init; }
public List<BestFitResultDto> Results { get; init; } = new();
}
public record BestFitResultDto
{
public double Part1Rotation { get; init; }
public double Part2Rotation { get; init; }
public double Part2OffsetX { get; init; }
public double Part2OffsetY { get; init; }
public int StrategyType { get; init; }
public int TestNumber { get; init; }
public double CandidateSpacing { get; init; }
public double RotatedArea { get; init; }
public double BoundingWidth { get; init; }
public double BoundingHeight { get; init; }
public double OptimalRotation { get; init; }
public bool Keep { get; init; }
public string Reason { get; init; } = "";
public double TrueArea { get; init; }
public List<double> HullAngles { get; init; } = new();
}
}
}

View File

@@ -6,6 +6,7 @@ using System.IO.Compression;
using System.Linq;
using System.Text.Json;
using OpenNest.CNC;
using OpenNest.Engine.BestFit;
using OpenNest.Geometry;
using static OpenNest.IO.NestFormat;
@@ -35,6 +36,7 @@ namespace OpenNest.IO
var programs = ReadPrograms(dto.Drawings.Count);
var drawingMap = BuildDrawings(dto, programs);
ReadBestFits(drawingMap);
var nest = BuildNest(dto, drawingMap);
zipArchive.Dispose();
@@ -97,6 +99,54 @@ namespace OpenNest.IO
return map;
}
private void ReadBestFits(Dictionary<int, Drawing> drawingMap)
{
foreach (var kvp in drawingMap)
{
var entry = zipArchive.GetEntry($"bestfits/bestfit-{kvp.Key}");
if (entry == null) continue;
using var entryStream = entry.Open();
using var reader = new StreamReader(entryStream);
var json = reader.ReadToEnd();
var sets = JsonSerializer.Deserialize<List<BestFitSetDto>>(json, JsonOptions);
if (sets == null) continue;
PopulateBestFitSets(kvp.Value, sets);
}
}
private void PopulateBestFitSets(Drawing drawing, List<BestFitSetDto> sets)
{
foreach (var set in sets)
{
var results = set.Results.Select(r => new BestFitResult
{
Candidate = new PairCandidate
{
Drawing = drawing,
Part1Rotation = r.Part1Rotation,
Part2Rotation = r.Part2Rotation,
Part2Offset = new Vector(r.Part2OffsetX, r.Part2OffsetY),
StrategyType = r.StrategyType,
TestNumber = r.TestNumber,
Spacing = r.CandidateSpacing
},
RotatedArea = r.RotatedArea,
BoundingWidth = r.BoundingWidth,
BoundingHeight = r.BoundingHeight,
OptimalRotation = r.OptimalRotation,
Keep = r.Keep,
Reason = r.Reason,
TrueArea = r.TrueArea,
HullAngles = r.HullAngles
}).ToList();
BestFitCache.Populate(drawing, set.PlateWidth, set.PlateHeight, set.Spacing, results);
}
}
private Nest BuildNest(NestDto dto, Dictionary<int, Drawing> drawingMap)
{
var nest = new Nest();

View File

@@ -6,6 +6,8 @@ using System.Linq;
using System.Text;
using System.Text.Json;
using OpenNest.CNC;
using OpenNest.Engine.BestFit;
using OpenNest.Geometry;
using OpenNest.Math;
using static OpenNest.IO.NestFormat;
@@ -35,6 +37,7 @@ namespace OpenNest.IO
WriteNestJson(zipArchive);
WritePrograms(zipArchive);
WriteBestFits(zipArchive);
return true;
}
@@ -185,6 +188,70 @@ namespace OpenNest.IO
return list;
}
private List<BestFitSetDto> BuildBestFitDtos(Drawing drawing)
{
var allBestFits = BestFitCache.GetAllForDrawing(drawing);
var sets = new List<BestFitSetDto>();
// Only save best-fit sets for plate sizes actually used in this nest.
var plateSizes = new HashSet<(double, double, double)>();
foreach (var plate in nest.Plates)
plateSizes.Add((plate.Size.Width, plate.Size.Length, plate.PartSpacing));
foreach (var kvp in allBestFits)
{
if (!plateSizes.Contains((kvp.Key.PlateWidth, kvp.Key.PlateHeight, kvp.Key.Spacing)))
continue;
var results = kvp.Value
.Where(r => r.Keep)
.Select(r => new BestFitResultDto
{
Part1Rotation = r.Candidate.Part1Rotation,
Part2Rotation = r.Candidate.Part2Rotation,
Part2OffsetX = r.Candidate.Part2Offset.X,
Part2OffsetY = r.Candidate.Part2Offset.Y,
StrategyType = r.Candidate.StrategyType,
TestNumber = r.Candidate.TestNumber,
CandidateSpacing = r.Candidate.Spacing,
RotatedArea = r.RotatedArea,
BoundingWidth = r.BoundingWidth,
BoundingHeight = r.BoundingHeight,
OptimalRotation = r.OptimalRotation,
Keep = r.Keep,
Reason = r.Reason ?? "",
TrueArea = r.TrueArea,
HullAngles = r.HullAngles ?? new List<double>()
}).ToList();
sets.Add(new BestFitSetDto
{
PlateWidth = kvp.Key.PlateWidth,
PlateHeight = kvp.Key.PlateHeight,
Spacing = kvp.Key.Spacing,
Results = results
});
}
return sets;
}
private void WriteBestFits(ZipArchive zipArchive)
{
foreach (var kvp in drawingDict.OrderBy(k => k.Key))
{
var sets = BuildBestFitDtos(kvp.Value);
if (sets.Count == 0)
continue;
var json = JsonSerializer.Serialize(sets, JsonOptions);
var entry = zipArchive.CreateEntry($"bestfits/bestfit-{kvp.Key}");
using var stream = entry.Open();
using var writer = new StreamWriter(stream, Encoding.UTF8);
writer.Write(json);
}
}
private void WritePrograms(ZipArchive zipArchive)
{
foreach (var kvp in drawingDict.OrderBy(k => k.Key))

View File

@@ -6,6 +6,7 @@
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\OpenNest.Core\OpenNest.Core.csproj" />
<ProjectReference Include="..\OpenNest.Engine\OpenNest.Engine.csproj" />
<PackageReference Include="ACadSharp" Version="3.1.32" />
</ItemGroup>
</Project>

10
collect-training-data.ps1 Normal file
View File

@@ -0,0 +1,10 @@
param(
[Parameter(Mandatory, Position = 0)]
[string]$DxfDir
)
$DbPath = Join-Path $PSScriptRoot 'test-training.db'
$SaveDir = 'X:\'
$Template = 'X:\Template.nstdot'
dotnet run --project (Join-Path $PSScriptRoot 'OpenNest.Console') -- --collect $DxfDir --db $DbPath --save-nests $SaveDir --template $Template