Source code for hots.plugins.clustering.custom_spectral

# hots/plugins/clustering/custom_spectral.py

"""Clustering plugin: custom spectral clustering for HOTS."""

import numpy as np
import pandas as pd
from numpy.linalg import multi_dot
from scipy.linalg import fractional_matrix_power
from scipy.linalg.lapack import dsyevr

from hots.core.interfaces import ClusteringPlugin
from hots.plugins.clustering.builder import (
    build_matrix_indiv_attr,
    build_similarity_matrix,
)


[docs] class CustomSpectralClustering(ClusteringPlugin): """Custom spectral clustering plugin using normalized Laplacian.""" def __init__(self, parameters: dict, instance): """Initialize with cluster count and instance configuration.""" self.n_clusters = parameters.get( "nb_clusters", instance.config.clustering.nb_clusters, ) self.tick_field = instance.config.tick_field self.indiv_field = instance.config.individual_field self.metrics = instance.config.metrics self.id_map = instance.get_id_map()
[docs] def fit(self, df: pd.DataFrame) -> pd.Series: """Compute labels by eigen-decomposing the normalized Laplacian.""" x = build_matrix_indiv_attr( df, self.tick_field, self.indiv_field, self.metrics, self.id_map, ) w = build_similarity_matrix(x) d = np.diag(w.sum(axis=1)) d_inv_sqrt = fractional_matrix_power(d, -0.5) var_l = multi_dot([d_inv_sqrt, w, d_inv_sqrt]) eigvals, eigvecs, _ = dsyevr(var_l, range="A") idx = np.argsort(eigvals)[::-1][: self.n_clusters] u = eigvecs[:, idx] labels = (np.arange(len(u)) % self.n_clusters).astype(int) return pd.Series(labels, index=x.index)