diff --git a/OpenNest.Engine/Models/.gitkeep b/OpenNest.Engine/Models/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/OpenNest.Training/notebooks/requirements.txt b/OpenNest.Training/notebooks/requirements.txt new file mode 100644 index 0000000..840594e --- /dev/null +++ b/OpenNest.Training/notebooks/requirements.txt @@ -0,0 +1,7 @@ +pandas>=2.0 +scikit-learn>=1.3 +xgboost>=2.0 +onnxmltools>=1.12 +skl2onnx>=1.16 +matplotlib>=3.7 +jupyter>=1.0 diff --git a/OpenNest.Training/notebooks/train_angle_model.ipynb b/OpenNest.Training/notebooks/train_angle_model.ipynb new file mode 100644 index 0000000..75c1b7c --- /dev/null +++ b/OpenNest.Training/notebooks/train_angle_model.ipynb @@ -0,0 +1,264 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.0" + } + }, + "cells": [ + { + "cell_type": "markdown", + "id": "a1b2c3d4-0001-0000-0000-000000000001", + "metadata": {}, + "source": [ + "# Angle Prediction Model Training\n", + "Trains an XGBoost multi-label classifier to predict which rotation angles are competitive for a given part geometry and sheet size.\n", + "\n", + "**Input:** SQLite database from OpenNest.Training data collection runs\n", + "**Output:** `angle_predictor.onnx` model file for `OpenNest.Engine/Models/`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b2c3d4-0002-0000-0000-000000000002", + "metadata": {}, + "outputs": [], + "source": [ + "import sqlite3\n", + "import pandas as pd\n", + "import numpy as np\n", + "from pathlib import Path\n", + "\n", + "DB_PATH = \"../OpenNestTraining.db\" # Adjust to your database location\n", + "OUTPUT_PATH = \"../../OpenNest.Engine/Models/angle_predictor.onnx\"\n", + "COMPETITIVE_THRESHOLD = 0.95 # Angle is \"competitive\" if >= 95% of best" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b2c3d4-0003-0000-0000-000000000003", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract training data from SQLite\n", + "conn = sqlite3.connect(DB_PATH)\n", + "\n", + "query = \"\"\"\n", + "SELECT\n", + " p.Area, p.Convexity, p.AspectRatio, p.BBFill, p.Circularity,\n", + " p.PerimeterToAreaRatio, p.VertexCount,\n", + " r.SheetWidth, r.SheetHeight, r.Id as RunId,\n", + " a.AngleDeg, a.Direction, a.PartCount\n", + "FROM AngleResults a\n", + "JOIN Runs r ON a.RunId = r.Id\n", + "JOIN Parts p ON r.PartId = p.Id\n", + "WHERE a.PartCount > 0\n", + "\"\"\"\n", + "\n", + "df = pd.read_sql_query(query, conn)\n", + "conn.close()\n", + "\n", + "print(f\"Loaded {len(df)} angle result rows\")\n", + "print(f\"Unique runs: {df['RunId'].nunique()}\")\n", + "print(f\"Angle range: {df['AngleDeg'].min()}-{df['AngleDeg'].max()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b2c3d4-0004-0000-0000-000000000004", + "metadata": {}, + "outputs": [], + "source": [ + "# For each run, find best PartCount (max of H and V per angle),\n", + "# then label angles within 95% of best as positive.\n", + "\n", + "# Best count per angle per run (max of H and V)\n", + "angle_best = df.groupby(['RunId', 'AngleDeg'])['PartCount'].max().reset_index()\n", + "angle_best.columns = ['RunId', 'AngleDeg', 'BestCount']\n", + "\n", + "# Best count per run (overall best angle)\n", + "run_best = angle_best.groupby('RunId')['BestCount'].max().reset_index()\n", + "run_best.columns = ['RunId', 'RunBest']\n", + "\n", + "# Merge and compute labels\n", + "labels = angle_best.merge(run_best, on='RunId')\n", + "labels['IsCompetitive'] = (labels['BestCount'] >= labels['RunBest'] * COMPETITIVE_THRESHOLD).astype(int)\n", + "\n", + "# Pivot to 36-column binary label matrix\n", + "label_matrix = labels.pivot_table(\n", + " index='RunId', columns='AngleDeg', values='IsCompetitive', fill_value=0\n", + ")\n", + "\n", + "# Ensure all 36 angle columns exist (0, 5, 10, ..., 175)\n", + "all_angles = [i * 5 for i in range(36)]\n", + "for a in all_angles:\n", + " if a not in label_matrix.columns:\n", + " label_matrix[a] = 0\n", + "label_matrix = label_matrix[all_angles]\n", + "\n", + "print(f\"Label matrix: {label_matrix.shape}\")\n", + "print(f\"Average competitive angles per run: {label_matrix.sum(axis=1).mean():.1f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b2c3d4-0005-0000-0000-000000000005", + "metadata": {}, + "outputs": [], + "source": [ + "# Build feature matrix - one row per run\n", + "features_query = \"\"\"\n", + "SELECT DISTINCT\n", + " r.Id as RunId, p.FileName,\n", + " p.Area, p.Convexity, p.AspectRatio, p.BBFill, p.Circularity,\n", + " p.PerimeterToAreaRatio, p.VertexCount,\n", + " r.SheetWidth, r.SheetHeight\n", + "FROM Runs r\n", + "JOIN Parts p ON r.PartId = p.Id\n", + "WHERE r.Id IN ({})\n", + "\"\"\".format(','.join(str(x) for x in label_matrix.index))\n", + "\n", + "conn = sqlite3.connect(DB_PATH)\n", + "features_df = pd.read_sql_query(features_query, conn)\n", + "conn.close()\n", + "\n", + "features_df = features_df.set_index('RunId')\n", + "\n", + "# Derived features\n", + "features_df['SheetAspectRatio'] = features_df['SheetWidth'] / features_df['SheetHeight']\n", + "features_df['PartToSheetAreaRatio'] = features_df['Area'] / (features_df['SheetWidth'] * features_df['SheetHeight'])\n", + "\n", + "# Filter outliers (title blocks, etc.)\n", + "mask = (features_df['BBFill'] >= 0.01) & (features_df['Area'] > 0.1)\n", + "print(f\"Filtering: {(~mask).sum()} outlier runs removed\")\n", + "features_df = features_df[mask]\n", + "label_matrix = label_matrix.loc[features_df.index]\n", + "\n", + "feature_cols = ['Area', 'Convexity', 'AspectRatio', 'BBFill', 'Circularity',\n", + " 'PerimeterToAreaRatio', 'VertexCount',\n", + " 'SheetWidth', 'SheetHeight', 'SheetAspectRatio', 'PartToSheetAreaRatio']\n", + "\n", + "X = features_df[feature_cols].values\n", + "y = label_matrix.values\n", + "\n", + "print(f\"Features: {X.shape}, Labels: {y.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b2c3d4-0006-0000-0000-000000000006", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import GroupShuffleSplit\n", + "from sklearn.multioutput import MultiOutputClassifier\n", + "import xgboost as xgb\n", + "\n", + "# Split by part (all sheet sizes for a part stay in the same split)\n", + "groups = features_df['FileName']\n", + "splitter = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)\n", + "train_idx, test_idx = next(splitter.split(X, y, groups))\n", + "\n", + "X_train, X_test = X[train_idx], X[test_idx]\n", + "y_train, y_test = y[train_idx], y[test_idx]\n", + "\n", + "print(f\"Train: {len(train_idx)}, Test: {len(test_idx)}\")\n", + "\n", + "# Train XGBoost multi-label classifier\n", + "base_clf = xgb.XGBClassifier(\n", + " n_estimators=200,\n", + " max_depth=6,\n", + " learning_rate=0.1,\n", + " use_label_encoder=False,\n", + " eval_metric='logloss',\n", + " random_state=42\n", + ")\n", + "\n", + "clf = MultiOutputClassifier(base_clf, n_jobs=-1)\n", + "clf.fit(X_train, y_train)\n", + "print(\"Training complete\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b2c3d4-0007-0000-0000-000000000007", + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import recall_score, precision_score\n", + "import matplotlib.pyplot as plt\n", + "\n", + "y_pred = clf.predict(X_test)\n", + "y_prob = np.array([est.predict_proba(X_test)[:, 1] for est in clf.estimators_]).T\n", + "\n", + "# Per-angle metrics\n", + "recalls = []\n", + "precisions = []\n", + "for i in range(36):\n", + " if y_test[:, i].sum() > 0:\n", + " recalls.append(recall_score(y_test[:, i], y_pred[:, i], zero_division=0))\n", + " precisions.append(precision_score(y_test[:, i], y_pred[:, i], zero_division=0))\n", + "\n", + "print(f\"Mean recall: {np.mean(recalls):.3f}\")\n", + "print(f\"Mean precision: {np.mean(precisions):.3f}\")\n", + "\n", + "# Average angles predicted per run\n", + "avg_predicted = y_pred.sum(axis=1).mean()\n", + "print(f\"Avg angles predicted per run: {avg_predicted:.1f}\")\n", + "\n", + "# Plot\n", + "fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n", + "axes[0].bar(range(len(recalls)), recalls)\n", + "axes[0].set_title('Recall per Angle Bin')\n", + "axes[0].set_xlabel('Angle (5-deg bins)')\n", + "axes[0].axhline(y=0.95, color='r', linestyle='--', label='Target 95%')\n", + "axes[0].legend()\n", + "\n", + "axes[1].bar(range(len(precisions)), precisions)\n", + "axes[1].set_title('Precision per Angle Bin')\n", + "axes[1].set_xlabel('Angle (5-deg bins)')\n", + "axes[1].axhline(y=0.60, color='r', linestyle='--', label='Target 60%')\n", + "axes[1].legend()\n", + "\n", + "plt.tight_layout()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1b2c3d4-0008-0000-0000-000000000008", + "metadata": {}, + "outputs": [], + "source": [ + "from skl2onnx import convert_sklearn\n", + "from skl2onnx.common.data_types import FloatTensorType\n", + "from pathlib import Path\n", + "\n", + "initial_type = [('features', FloatTensorType([None, 11]))]\n", + "onnx_model = convert_sklearn(clf, initial_types=initial_type)\n", + "\n", + "output_path = Path(OUTPUT_PATH)\n", + "output_path.parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + "with open(output_path, 'wb') as f:\n", + " f.write(onnx_model.SerializeToString())\n", + "\n", + "print(f\"Model saved to {output_path} ({output_path.stat().st_size / 1024:.0f} KB)\")" + ] + } + ] +}