{ "nbformat": 4, "nbformat_minor": 5, "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.0" } }, "cells": [ { "cell_type": "markdown", "id": "a1b2c3d4-0001-0000-0000-000000000001", "metadata": {}, "source": [ "# Angle Prediction Model Training\n", "Trains an XGBoost multi-label classifier to predict which rotation angles are competitive for a given part geometry and sheet size.\n", "\n", "**Input:** SQLite database from OpenNest.Training data collection runs\n", "**Output:** `angle_predictor.onnx` model file for `OpenNest.Engine/Models/`" ] }, { "cell_type": "code", "execution_count": null, "id": "a1b2c3d4-0002-0000-0000-000000000002", "metadata": {}, "outputs": [], "source": [ "import sqlite3\n", "import pandas as pd\n", "import numpy as np\n", "from pathlib import Path\n", "\n", "DB_PATH = \"../OpenNestTraining.db\" # Adjust to your database location\n", "OUTPUT_PATH = \"../../OpenNest.Engine/Models/angle_predictor.onnx\"\n", "COMPETITIVE_THRESHOLD = 0.95 # Angle is \"competitive\" if >= 95% of best" ] }, { "cell_type": "code", "execution_count": null, "id": "a1b2c3d4-0003-0000-0000-000000000003", "metadata": {}, "outputs": [], "source": [ "# Extract training data from SQLite\n", "conn = sqlite3.connect(DB_PATH)\n", "\n", "query = \"\"\"\n", "SELECT\n", " p.Area, p.Convexity, p.AspectRatio, p.BBFill, p.Circularity,\n", " p.PerimeterToAreaRatio, p.VertexCount,\n", " r.SheetWidth, r.SheetHeight, r.Id as RunId,\n", " a.AngleDeg, a.Direction, a.PartCount\n", "FROM AngleResults a\n", "JOIN Runs r ON a.RunId = r.Id\n", "JOIN Parts p ON r.PartId = p.Id\n", "WHERE a.PartCount > 0\n", "\"\"\"\n", "\n", "df = pd.read_sql_query(query, conn)\n", "conn.close()\n", "\n", "print(f\"Loaded {len(df)} angle result rows\")\n", "print(f\"Unique runs: {df['RunId'].nunique()}\")\n", "print(f\"Angle range: {df['AngleDeg'].min()}-{df['AngleDeg'].max()}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "a1b2c3d4-0004-0000-0000-000000000004", "metadata": {}, "outputs": [], "source": [ "# For each run, find best PartCount (max of H and V per angle),\n", "# then label angles within 95% of best as positive.\n", "\n", "# Best count per angle per run (max of H and V)\n", "angle_best = df.groupby(['RunId', 'AngleDeg'])['PartCount'].max().reset_index()\n", "angle_best.columns = ['RunId', 'AngleDeg', 'BestCount']\n", "\n", "# Best count per run (overall best angle)\n", "run_best = angle_best.groupby('RunId')['BestCount'].max().reset_index()\n", "run_best.columns = ['RunId', 'RunBest']\n", "\n", "# Merge and compute labels\n", "labels = angle_best.merge(run_best, on='RunId')\n", "labels['IsCompetitive'] = (labels['BestCount'] >= labels['RunBest'] * COMPETITIVE_THRESHOLD).astype(int)\n", "\n", "# Pivot to 36-column binary label matrix\n", "label_matrix = labels.pivot_table(\n", " index='RunId', columns='AngleDeg', values='IsCompetitive', fill_value=0\n", ")\n", "\n", "# Ensure all 36 angle columns exist (0, 5, 10, ..., 175)\n", "all_angles = [i * 5 for i in range(36)]\n", "for a in all_angles:\n", " if a not in label_matrix.columns:\n", " label_matrix[a] = 0\n", "label_matrix = label_matrix[all_angles]\n", "\n", "print(f\"Label matrix: {label_matrix.shape}\")\n", "print(f\"Average competitive angles per run: {label_matrix.sum(axis=1).mean():.1f}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "a1b2c3d4-0005-0000-0000-000000000005", "metadata": {}, "outputs": [], "source": [ "# Build feature matrix - one row per run\n", "features_query = \"\"\"\n", "SELECT DISTINCT\n", " r.Id as RunId, p.FileName,\n", " p.Area, p.Convexity, p.AspectRatio, p.BBFill, p.Circularity,\n", " p.PerimeterToAreaRatio, p.VertexCount,\n", " r.SheetWidth, r.SheetHeight\n", "FROM Runs r\n", "JOIN Parts p ON r.PartId = p.Id\n", "WHERE r.Id IN ({})\n", "\"\"\".format(','.join(str(x) for x in label_matrix.index))\n", "\n", "conn = sqlite3.connect(DB_PATH)\n", "features_df = pd.read_sql_query(features_query, conn)\n", "conn.close()\n", "\n", "features_df = features_df.set_index('RunId')\n", "\n", "# Derived features\n", "features_df['SheetAspectRatio'] = features_df['SheetWidth'] / features_df['SheetHeight']\n", "features_df['PartToSheetAreaRatio'] = features_df['Area'] / (features_df['SheetWidth'] * features_df['SheetHeight'])\n", "\n", "# Filter outliers (title blocks, etc.)\n", "mask = (features_df['BBFill'] >= 0.01) & (features_df['Area'] > 0.1)\n", "print(f\"Filtering: {(~mask).sum()} outlier runs removed\")\n", "features_df = features_df[mask]\n", "label_matrix = label_matrix.loc[features_df.index]\n", "\n", "feature_cols = ['Area', 'Convexity', 'AspectRatio', 'BBFill', 'Circularity',\n", " 'PerimeterToAreaRatio', 'VertexCount',\n", " 'SheetWidth', 'SheetHeight', 'SheetAspectRatio', 'PartToSheetAreaRatio']\n", "\n", "X = features_df[feature_cols].values\n", "y = label_matrix.values\n", "\n", "print(f\"Features: {X.shape}, Labels: {y.shape}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "a1b2c3d4-0006-0000-0000-000000000006", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import GroupShuffleSplit\n", "from sklearn.multioutput import MultiOutputClassifier\n", "import xgboost as xgb\n", "\n", "# Split by part (all sheet sizes for a part stay in the same split)\n", "groups = features_df['FileName']\n", "splitter = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)\n", "train_idx, test_idx = next(splitter.split(X, y, groups))\n", "\n", "X_train, X_test = X[train_idx], X[test_idx]\n", "y_train, y_test = y[train_idx], y[test_idx]\n", "\n", "print(f\"Train: {len(train_idx)}, Test: {len(test_idx)}\")\n", "\n", "# Train XGBoost multi-label classifier\n", "base_clf = xgb.XGBClassifier(\n", " n_estimators=200,\n", " max_depth=6,\n", " learning_rate=0.1,\n", " use_label_encoder=False,\n", " eval_metric='logloss',\n", " random_state=42\n", ")\n", "\n", "clf = MultiOutputClassifier(base_clf, n_jobs=-1)\n", "clf.fit(X_train, y_train)\n", "print(\"Training complete\")" ] }, { "cell_type": "code", "execution_count": null, "id": "a1b2c3d4-0007-0000-0000-000000000007", "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import recall_score, precision_score\n", "import matplotlib.pyplot as plt\n", "\n", "y_pred = clf.predict(X_test)\n", "y_prob = np.array([est.predict_proba(X_test)[:, 1] for est in clf.estimators_]).T\n", "\n", "# Per-angle metrics\n", "recalls = []\n", "precisions = []\n", "for i in range(36):\n", " if y_test[:, i].sum() > 0:\n", " recalls.append(recall_score(y_test[:, i], y_pred[:, i], zero_division=0))\n", " precisions.append(precision_score(y_test[:, i], y_pred[:, i], zero_division=0))\n", "\n", "print(f\"Mean recall: {np.mean(recalls):.3f}\")\n", "print(f\"Mean precision: {np.mean(precisions):.3f}\")\n", "\n", "# Average angles predicted per run\n", "avg_predicted = y_pred.sum(axis=1).mean()\n", "print(f\"Avg angles predicted per run: {avg_predicted:.1f}\")\n", "\n", "# Plot\n", "fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n", "axes[0].bar(range(len(recalls)), recalls)\n", "axes[0].set_title('Recall per Angle Bin')\n", "axes[0].set_xlabel('Angle (5-deg bins)')\n", "axes[0].axhline(y=0.95, color='r', linestyle='--', label='Target 95%')\n", "axes[0].legend()\n", "\n", "axes[1].bar(range(len(precisions)), precisions)\n", "axes[1].set_title('Precision per Angle Bin')\n", "axes[1].set_xlabel('Angle (5-deg bins)')\n", "axes[1].axhline(y=0.60, color='r', linestyle='--', label='Target 60%')\n", "axes[1].legend()\n", "\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "a1b2c3d4-0008-0000-0000-000000000008", "metadata": {}, "outputs": [], "source": [ "from skl2onnx import convert_sklearn\n", "from skl2onnx.common.data_types import FloatTensorType\n", "from pathlib import Path\n", "\n", "initial_type = [('features', FloatTensorType([None, 11]))]\n", "onnx_model = convert_sklearn(clf, initial_types=initial_type)\n", "\n", "output_path = Path(OUTPUT_PATH)\n", "output_path.parent.mkdir(parents=True, exist_ok=True)\n", "\n", "with open(output_path, 'wb') as f:\n", " f.write(onnx_model.SerializeToString())\n", "\n", "print(f\"Model saved to {output_path} ({output_path.stat().st_size / 1024:.0f} KB)\")" ] } ] }