.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "auto_examples/export_figures.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note :ref:`Go to the end ` to download the full example code. .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_examples_export_figures.py: Generate comparison PNGs for README. .. GENERATED FROM PYTHON SOURCE LINES 2-298 .. code-block:: Python # ruff: noqa: ANN001, ANN201, D103 import math import os import time import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np import pandas as pd import seaborn as sns from minisom import MiniSom from sklearn.cluster import AgglomerativeClustering, KMeans, MiniBatchKMeans from sklearn.datasets import load_digits from sklearn.decomposition import PCA from sklearn.metrics import adjusted_rand_score, davies_bouldin_score, silhouette_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from susi import SOMClustering from dbgsom.SomVQ import SomVQ EXPORT_DIR = os.path.join(os.path.dirname(__file__), "export") os.makedirs(EXPORT_DIR, exist_ok=True) # --------------------------------------------------------------------------- # Shared data # --------------------------------------------------------------------------- digits = load_digits() X = StandardScaler().fit_transform(digits.data) y = digits.target X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, stratify=y ) print(f"Digits: {X.shape}") # --------------------------------------------------------------------------- # Figure 1: clustering_metrics_digits.png # --------------------------------------------------------------------------- def compute_qe(X, labels): unique_labels = np.unique(labels) centroids = np.array([X[labels == k].mean(axis=0) for k in unique_labels]) label_to_idx = {lbl: i for i, lbl in enumerate(unique_labels)} centroid_idx = np.array([label_to_idx[lbl] for lbl in labels]) return float(np.mean(np.linalg.norm(X - centroids[centroid_idx], axis=1))) def build_row(X, y_true, labels, name, t, qe=None, te=None): return { "Algorithm": name, "Time (s)": round(t, 3), "ARI": round(adjusted_rand_score(y_true, labels), 3), "Silhouette": round(silhouette_score(X, labels), 3), "Davies-Bouldin": round(davies_bouldin_score(X, labels), 3), "QE": round(qe if qe is not None else compute_qe(X, labels), 4), } print("Training clustering comparison models...") t0 = time.perf_counter() som_cc = SomVQ(n_iter=500, lambda_=15.8, max_neurons=100, random_state=42) som_cc.fit(X_train) t_som = time.perf_counter() - t0 n_clust = len(som_cc.neurons_) labels_som = som_cc.predict(X_test) t0 = time.perf_counter() km_cc = KMeans(n_clusters=n_clust, random_state=42, n_init=10) km_cc.fit(X_train) t_km = time.perf_counter() - t0 labels_km = km_cc.predict(X_test) t0 = time.perf_counter() mbkm = MiniBatchKMeans(n_clusters=n_clust, random_state=42, n_init=10) mbkm.fit(X_train) t_mbkm = time.perf_counter() - t0 labels_mbkm = mbkm.predict(X_test) t0 = time.perf_counter() agg = AgglomerativeClustering(n_clusters=n_clust) labels_agg = agg.fit_predict(X_test) t_agg = time.perf_counter() - t0 print(f" DBGSOM: {n_clust} neurons") rows = [ build_row( X_test, y_test, labels_som, "DBGSOM", t_som, qe=som_cc.calculate_quantization_error(X_test), te=som_cc.topographic_error_, ), build_row(X_test, y_test, labels_km, "KMeans", t_km), build_row(X_test, y_test, labels_mbkm, "MiniBatchKMeans", t_mbkm), build_row(X_test, y_test, labels_agg, "AgglomerativeClustering", t_agg), ] df = pd.DataFrame(rows).set_index("Algorithm") n = len(df) colors = sns.color_palette("muted", n_colors=n) metric_specs = [ ("ARI", "ARI (higher is better)"), ("Silhouette", "Silhouette (higher is better)"), ("Davies-Bouldin", "Davies-Bouldin (lower is better)"), ("Time (s)", "Training time in seconds (lower is better)"), ] fig, axes = plt.subplots(1, 4, figsize=(18, 5)) fig.suptitle("Clustering metrics — Digits dataset", fontsize=13) for ax, (col, label) in zip(axes, metric_specs): vals = df[col].astype(float) bars = ax.bar(vals.index, vals.values, color=colors) ax.set_title(label, fontsize=10) ax.set_xticks(range(len(vals))) ax.set_xticklabels(vals.index, rotation=35, ha="right", fontsize=9) for bar, val in zip(bars, vals.values): ax.text( bar.get_x() + bar.get_width() / 2, bar.get_height(), f"{val:.3f}", ha="center", va="bottom", fontsize=8, ) plt.tight_layout() out1 = os.path.join(EXPORT_DIR, "clustering_metrics_digits.png") fig.savefig(out1, dpi=150, bbox_inches="tight") plt.close() print(f"Saved: {out1}") # --------------------------------------------------------------------------- # Figure 2: som_comparison.png (Digits only, no Fashion-MNIST) # --------------------------------------------------------------------------- print("Training SOM comparison models...") som_db = SomVQ( n_iter=500, lambda_=53.5, max_neurons=100, decay_function="linear", sigma_end=1, random_state=42, ) t0 = time.perf_counter() som_db.fit(X_train) t_db = time.perf_counter() - t0 n_neurons_db = len(som_db.neurons_) print(f" DBGSOM: {n_neurons_db} neurons") side = math.ceil(math.sqrt(n_neurons_db)) side2 = math.floor(math.sqrt(n_neurons_db)) som_ms = MiniSom( x=side, y=side2, input_len=X_train.shape[1], sigma=0.2 * np.sqrt(side**2), learning_rate=1.0, random_seed=42, ) som_ms.pca_weights_init(X_train) t0 = time.perf_counter() som_ms.train_batch(X_train, num_iteration=100 * len(X_train)) t_ms = time.perf_counter() - t0 n_neurons_ms = side * side2 print(f" MiniSom: {n_neurons_ms} neurons") susi_som = SOMClustering( n_rows=side, n_columns=side2, n_iter_unsupervised=1000, train_mode_unsupervised="online", random_state=42, ) t0 = time.perf_counter() susi_som.fit(X_train) t_susi = time.perf_counter() - t0 print(f" SuSi: {side * side2} neurons") km = KMeans(n_clusters=n_neurons_db, n_init=10, random_state=42) t0 = time.perf_counter() km.fit(X_train) t_km = time.perf_counter() - t0 print(f" KMeans: {n_neurons_db} centroids") pca = PCA(n_components=2, random_state=42) pca.fit(X) fig, axes = plt.subplots(1, 4, figsize=(22, 5)) fig.suptitle("SOM algorithm comparison — Digits dataset (PCA projection)", fontsize=13) # DBGSOM weights_db_2d = pca.transform(som_db.weights_) hit_counts = np.array( [som_db.som_.nodes[n].get("hit_count", 0.0) for n in som_db.neurons_] ) ax = axes[0] ax.set_title(f"DBGSOM\n{n_neurons_db} neurons (adaptive)", fontsize=11) node_pos = dict(zip(som_db.neurons_, weights_db_2d)) for u, v in som_db.som_.edges(): x0, y0 = node_pos[u] x1, y1 = node_pos[v] ax.plot([x0, x1], [y0, y1], color="lightgray", linewidth=0.8, zorder=1) sc = ax.scatter( weights_db_2d[:, 0], weights_db_2d[:, 1], c=hit_counts, cmap="bone", s=60, zorder=2 ) plt.colorbar(sc, ax=ax, label="Hit count") ax.set_xlabel("PC 1") ax.set_ylabel("PC 2") # MiniSom ms_weights_flat = som_ms.get_weights().reshape(-1, X.shape[1]) weights_ms_2d = pca.transform(ms_weights_flat) umatrix_flat = som_ms.activation_response(X).reshape(-1) ax = axes[1] ax.set_title(f"MiniSom\n{n_neurons_ms} neurons ({side}x{side2} fixed)", fontsize=11) for i in range(side): for j in range(side2): idx = i * side2 + j for di, dj in [(1, 0), (0, 1)]: ni, nj = i + di, j + dj if 0 <= ni < side and 0 <= nj < side2: nb = ni * side2 + nj ax.plot( [weights_ms_2d[idx, 0], weights_ms_2d[nb, 0]], [weights_ms_2d[idx, 1], weights_ms_2d[nb, 1]], color="lightgray", linewidth=0.8, zorder=1, ) sc = ax.scatter( weights_ms_2d[:, 0], weights_ms_2d[:, 1], c=umatrix_flat, cmap="bone", s=60, zorder=2, ) plt.colorbar(sc, ax=ax, label="Hit count") ax.set_xlabel("PC 1") ax.set_ylabel("PC 2") # SuSi susi_flat = susi_som.unsuper_som_.reshape(-1, X.shape[1]) weights_susi_2d = pca.transform(susi_flat) susi_umatrix = susi_som.get_u_matrix()[::2, ::2].reshape(-1) ax = axes[2] ax.set_title(f"SuSi\n{side * side2} neurons ({side}x{side2} fixed)", fontsize=11) for i in range(side): for j in range(side2): idx = i * side2 + j for di, dj in [(1, 0), (0, 1)]: ni, nj = i + di, j + dj if 0 <= ni < side and 0 <= nj < side2: nb = ni * side2 + nj ax.plot( [weights_susi_2d[idx, 0], weights_susi_2d[nb, 0]], [weights_susi_2d[idx, 1], weights_susi_2d[nb, 1]], color="lightgray", linewidth=0.8, zorder=1, ) sc = ax.scatter( weights_susi_2d[:, 0], weights_susi_2d[:, 1], c=susi_umatrix, cmap="bone", s=60, zorder=2, ) plt.colorbar(sc, ax=ax, label="U-matrix value") ax.set_xlabel("PC 1") ax.set_ylabel("PC 2") # KMeans km_2d = pca.transform(km.cluster_centers_) km_counts = np.bincount(km.labels_, minlength=n_neurons_db) ax = axes[3] ax.set_title(f"KMeans\n{n_neurons_db} centroids (reference, no topology)", fontsize=11) sc = ax.scatter(km_2d[:, 0], km_2d[:, 1], c=km_counts, cmap="bone", s=60) plt.colorbar(sc, ax=ax, label="Cluster size") ax.set_xlabel("PC 1") ax.set_ylabel("PC 2") plt.tight_layout() out2 = os.path.join(EXPORT_DIR, "som_comparison.png") fig.savefig(out2, dpi=150, bbox_inches="tight") plt.close() print(f"Saved: {out2}") print("Done.") .. _sphx_glr_download_auto_examples_export_figures.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: export_figures.ipynb ` .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: export_figures.py ` .. container:: sphx-glr-download sphx-glr-download-zip :download:`Download zipped: export_figures.zip ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_