best_backend = max(results, key=lambda b: results[b]["metrics"]["pr_auc"])
best = results[best_backend]
joblib.dump(best["pipeline"], PIPELINE_PATH)
metadata = {
"trained_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
"model_type": f"Calibrated({best_backend})",
"backend": best_backend,
"features": X_train.columns.tolist(),
"numeric": numeric,
"categorical": categorical,
"threshold": DEFAULT_THRESHOLD,
**{k: round(v, 6) for k, v in best["metrics"].items()},
}
with open(METADATA_PATH, "w") as f:
json.dump(metadata, f, indent=2)
print(f"Saved pipeline → {PIPELINE_PATH}")
print(f"Saved metadata → {METADATA_PATH}")
print(f"Best backend → {best_backend}")
print(f"\nMetadata:\n{json.dumps(metadata, indent=2)}")
Saved pipeline → artifacts/model.joblib
Saved metadata → artifacts/metadata.json
Best backend → xgboost
Metadata:
{
"trained_at": "2026-03-24T12:38:53Z",
"model_type": "Calibrated(xgboost)",
"backend": "xgboost",
"features": [
"year",
"loan_limit",
"Gender",
"approv_in_adv",
"loan_type",
"loan_purpose",
"Credit_Worthiness",
"open_credit",
"business_or_commercial",
"loan_amount",
"term",
"Neg_ammortization",
"interest_only",
"lump_sum_payment",
"construction_type",
"occupancy_type",
"Secured_by",
"total_units",
"income",
"credit_type",
"Credit_Score",
"co_applicant_credit_type",
"age",
"submission_of_application",
"Region",
"Security_Type",
"dtir1"
],
"numeric": [
"year",
"loan_amount",
"term",
"income",
"Credit_Score",
"dtir1"
],
"categorical": [
"loan_limit",
"Gender",
"approv_in_adv",
"loan_type",
"loan_purpose",
"Credit_Worthiness",
"open_credit",
"business_or_commercial",
"Neg_ammortization",
"interest_only",
"lump_sum_payment",
"construction_type",
"occupancy_type",
"Secured_by",
"total_units",
"credit_type",
"co_applicant_credit_type",
"age",
"submission_of_application",
"Region",
"Security_Type"
],
"threshold": 0.5,
"roc_auc": 0.882076,
"pr_auc": 0.822938,
"brier": 0.091336,
"f1": 0.730824,
"precision": 0.859277,
"recall": 0.635781,
"train_time_s": 24.8
}