265 lines
9.2 KiB
Plaintext
265 lines
9.2 KiB
Plaintext
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5,
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"name": "python",
|
|
"version": "3.11.0"
|
|
}
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "a1b2c3d4-0001-0000-0000-000000000001",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Angle Prediction Model Training\n",
|
|
"Trains an XGBoost multi-label classifier to predict which rotation angles are competitive for a given part geometry and sheet size.\n",
|
|
"\n",
|
|
"**Input:** SQLite database from OpenNest.Training data collection runs\n",
|
|
"**Output:** `angle_predictor.onnx` model file for `OpenNest.Engine/Models/`"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a1b2c3d4-0002-0000-0000-000000000002",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import sqlite3\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"DB_PATH = \"../OpenNestTraining.db\" # Adjust to your database location\n",
|
|
"OUTPUT_PATH = \"../../OpenNest.Engine/Models/angle_predictor.onnx\"\n",
|
|
"COMPETITIVE_THRESHOLD = 0.95 # Angle is \"competitive\" if >= 95% of best"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a1b2c3d4-0003-0000-0000-000000000003",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Extract training data from SQLite\n",
|
|
"conn = sqlite3.connect(DB_PATH)\n",
|
|
"\n",
|
|
"query = \"\"\"\n",
|
|
"SELECT\n",
|
|
" p.Area, p.Convexity, p.AspectRatio, p.BBFill, p.Circularity,\n",
|
|
" p.PerimeterToAreaRatio, p.VertexCount,\n",
|
|
" r.SheetWidth, r.SheetHeight, r.Id as RunId,\n",
|
|
" a.AngleDeg, a.Direction, a.PartCount\n",
|
|
"FROM AngleResults a\n",
|
|
"JOIN Runs r ON a.RunId = r.Id\n",
|
|
"JOIN Parts p ON r.PartId = p.Id\n",
|
|
"WHERE a.PartCount > 0\n",
|
|
"\"\"\"\n",
|
|
"\n",
|
|
"df = pd.read_sql_query(query, conn)\n",
|
|
"conn.close()\n",
|
|
"\n",
|
|
"print(f\"Loaded {len(df)} angle result rows\")\n",
|
|
"print(f\"Unique runs: {df['RunId'].nunique()}\")\n",
|
|
"print(f\"Angle range: {df['AngleDeg'].min()}-{df['AngleDeg'].max()}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a1b2c3d4-0004-0000-0000-000000000004",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# For each run, find best PartCount (max of H and V per angle),\n",
|
|
"# then label angles within 95% of best as positive.\n",
|
|
"\n",
|
|
"# Best count per angle per run (max of H and V)\n",
|
|
"angle_best = df.groupby(['RunId', 'AngleDeg'])['PartCount'].max().reset_index()\n",
|
|
"angle_best.columns = ['RunId', 'AngleDeg', 'BestCount']\n",
|
|
"\n",
|
|
"# Best count per run (overall best angle)\n",
|
|
"run_best = angle_best.groupby('RunId')['BestCount'].max().reset_index()\n",
|
|
"run_best.columns = ['RunId', 'RunBest']\n",
|
|
"\n",
|
|
"# Merge and compute labels\n",
|
|
"labels = angle_best.merge(run_best, on='RunId')\n",
|
|
"labels['IsCompetitive'] = (labels['BestCount'] >= labels['RunBest'] * COMPETITIVE_THRESHOLD).astype(int)\n",
|
|
"\n",
|
|
"# Pivot to 36-column binary label matrix\n",
|
|
"label_matrix = labels.pivot_table(\n",
|
|
" index='RunId', columns='AngleDeg', values='IsCompetitive', fill_value=0\n",
|
|
")\n",
|
|
"\n",
|
|
"# Ensure all 36 angle columns exist (0, 5, 10, ..., 175)\n",
|
|
"all_angles = [i * 5 for i in range(36)]\n",
|
|
"for a in all_angles:\n",
|
|
" if a not in label_matrix.columns:\n",
|
|
" label_matrix[a] = 0\n",
|
|
"label_matrix = label_matrix[all_angles]\n",
|
|
"\n",
|
|
"print(f\"Label matrix: {label_matrix.shape}\")\n",
|
|
"print(f\"Average competitive angles per run: {label_matrix.sum(axis=1).mean():.1f}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a1b2c3d4-0005-0000-0000-000000000005",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Build feature matrix - one row per run\n",
|
|
"features_query = \"\"\"\n",
|
|
"SELECT DISTINCT\n",
|
|
" r.Id as RunId, p.FileName,\n",
|
|
" p.Area, p.Convexity, p.AspectRatio, p.BBFill, p.Circularity,\n",
|
|
" p.PerimeterToAreaRatio, p.VertexCount,\n",
|
|
" r.SheetWidth, r.SheetHeight\n",
|
|
"FROM Runs r\n",
|
|
"JOIN Parts p ON r.PartId = p.Id\n",
|
|
"WHERE r.Id IN ({})\n",
|
|
"\"\"\".format(','.join(str(x) for x in label_matrix.index))\n",
|
|
"\n",
|
|
"conn = sqlite3.connect(DB_PATH)\n",
|
|
"features_df = pd.read_sql_query(features_query, conn)\n",
|
|
"conn.close()\n",
|
|
"\n",
|
|
"features_df = features_df.set_index('RunId')\n",
|
|
"\n",
|
|
"# Derived features\n",
|
|
"features_df['SheetAspectRatio'] = features_df['SheetWidth'] / features_df['SheetHeight']\n",
|
|
"features_df['PartToSheetAreaRatio'] = features_df['Area'] / (features_df['SheetWidth'] * features_df['SheetHeight'])\n",
|
|
"\n",
|
|
"# Filter outliers (title blocks, etc.)\n",
|
|
"mask = (features_df['BBFill'] >= 0.01) & (features_df['Area'] > 0.1)\n",
|
|
"print(f\"Filtering: {(~mask).sum()} outlier runs removed\")\n",
|
|
"features_df = features_df[mask]\n",
|
|
"label_matrix = label_matrix.loc[features_df.index]\n",
|
|
"\n",
|
|
"feature_cols = ['Area', 'Convexity', 'AspectRatio', 'BBFill', 'Circularity',\n",
|
|
" 'PerimeterToAreaRatio', 'VertexCount',\n",
|
|
" 'SheetWidth', 'SheetHeight', 'SheetAspectRatio', 'PartToSheetAreaRatio']\n",
|
|
"\n",
|
|
"X = features_df[feature_cols].values\n",
|
|
"y = label_matrix.values\n",
|
|
"\n",
|
|
"print(f\"Features: {X.shape}, Labels: {y.shape}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a1b2c3d4-0006-0000-0000-000000000006",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.model_selection import GroupShuffleSplit\n",
|
|
"from sklearn.multioutput import MultiOutputClassifier\n",
|
|
"import xgboost as xgb\n",
|
|
"\n",
|
|
"# Split by part (all sheet sizes for a part stay in the same split)\n",
|
|
"groups = features_df['FileName']\n",
|
|
"splitter = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)\n",
|
|
"train_idx, test_idx = next(splitter.split(X, y, groups))\n",
|
|
"\n",
|
|
"X_train, X_test = X[train_idx], X[test_idx]\n",
|
|
"y_train, y_test = y[train_idx], y[test_idx]\n",
|
|
"\n",
|
|
"print(f\"Train: {len(train_idx)}, Test: {len(test_idx)}\")\n",
|
|
"\n",
|
|
"# Train XGBoost multi-label classifier\n",
|
|
"base_clf = xgb.XGBClassifier(\n",
|
|
" n_estimators=200,\n",
|
|
" max_depth=6,\n",
|
|
" learning_rate=0.1,\n",
|
|
" use_label_encoder=False,\n",
|
|
" eval_metric='logloss',\n",
|
|
" random_state=42\n",
|
|
")\n",
|
|
"\n",
|
|
"clf = MultiOutputClassifier(base_clf, n_jobs=-1)\n",
|
|
"clf.fit(X_train, y_train)\n",
|
|
"print(\"Training complete\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a1b2c3d4-0007-0000-0000-000000000007",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.metrics import recall_score, precision_score\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"y_pred = clf.predict(X_test)\n",
|
|
"y_prob = np.array([est.predict_proba(X_test)[:, 1] for est in clf.estimators_]).T\n",
|
|
"\n",
|
|
"# Per-angle metrics\n",
|
|
"recalls = []\n",
|
|
"precisions = []\n",
|
|
"for i in range(36):\n",
|
|
" if y_test[:, i].sum() > 0:\n",
|
|
" recalls.append(recall_score(y_test[:, i], y_pred[:, i], zero_division=0))\n",
|
|
" precisions.append(precision_score(y_test[:, i], y_pred[:, i], zero_division=0))\n",
|
|
"\n",
|
|
"print(f\"Mean recall: {np.mean(recalls):.3f}\")\n",
|
|
"print(f\"Mean precision: {np.mean(precisions):.3f}\")\n",
|
|
"\n",
|
|
"# Average angles predicted per run\n",
|
|
"avg_predicted = y_pred.sum(axis=1).mean()\n",
|
|
"print(f\"Avg angles predicted per run: {avg_predicted:.1f}\")\n",
|
|
"\n",
|
|
"# Plot\n",
|
|
"fig, axes = plt.subplots(1, 2, figsize=(12, 4))\n",
|
|
"axes[0].bar(range(len(recalls)), recalls)\n",
|
|
"axes[0].set_title('Recall per Angle Bin')\n",
|
|
"axes[0].set_xlabel('Angle (5-deg bins)')\n",
|
|
"axes[0].axhline(y=0.95, color='r', linestyle='--', label='Target 95%')\n",
|
|
"axes[0].legend()\n",
|
|
"\n",
|
|
"axes[1].bar(range(len(precisions)), precisions)\n",
|
|
"axes[1].set_title('Precision per Angle Bin')\n",
|
|
"axes[1].set_xlabel('Angle (5-deg bins)')\n",
|
|
"axes[1].axhline(y=0.60, color='r', linestyle='--', label='Target 60%')\n",
|
|
"axes[1].legend()\n",
|
|
"\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a1b2c3d4-0008-0000-0000-000000000008",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from skl2onnx import convert_sklearn\n",
|
|
"from skl2onnx.common.data_types import FloatTensorType\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"initial_type = [('features', FloatTensorType([None, 11]))]\n",
|
|
"onnx_model = convert_sklearn(clf, initial_types=initial_type)\n",
|
|
"\n",
|
|
"output_path = Path(OUTPUT_PATH)\n",
|
|
"output_path.parent.mkdir(parents=True, exist_ok=True)\n",
|
|
"\n",
|
|
"with open(output_path, 'wb') as f:\n",
|
|
" f.write(onnx_model.SerializeToString())\n",
|
|
"\n",
|
|
"print(f\"Model saved to {output_path} ({output_path.stat().st_size / 1024:.0f} KB)\")"
|
|
]
|
|
}
|
|
]
|
|
}
|