Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Silhouette coefficients and Silhouette scores

scikit-learn and back-of-envelop calculatiions

import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics, datasets, cluster

Four points only

X = [[-1, -1],
     [-1,  1],
     [ 1, -1],
     [ 1,  1]]
y = [0, 1, 0, 1]
X = np.array(X); y = np.array(y)

print('Silhouette score calculated using scikit-learn')
print('before averaging:', end='')
print(metrics.silhouette_samples(X, y))
print('after averaging:', end='')
print(metrics.silhouette_score(X, y))

print('\nSilhouette score calculated manually back-of-the-envelop')
# a: mean intra-cluster distance
# b: mean inter-cluster distance
# s: Silhouette coefficient = (b-a) / max(a, b)
s = []
for ni in range(len(y)):
    a, b, na, nb = 0, 0, 0, 0
    for nj in range(len(y)):
        if y[ni]==y[nj]:
            a += np.linalg.norm(X[ni, :]-X[nj, :])
            na += 1
        else:
            b += np.linalg.norm(X[ni, :]-X[nj, :])
            nb += 1
    a /= na - 1
    b /= nb
    s.append( (b-a) / max(a, b) )
print('before averaging:', end='')
print(s)
print('after averaging:', end='')
print(np.mean(s))
Silhouette score calculated using scikit-learn
before averaging:[0.17157288 0.17157288 0.17157288 0.17157288]
after averaging:0.17157287525380985

Silhouette score calculated manually back-of-the-envelop
before averaging:[0.17157287525380985, 0.17157287525380985, 0.17157287525380985, 0.17157287525380985]
after averaging:0.17157287525380985

Two concentric circles

np.random.seed(77)
n_samples = 1000
plt.figure(figsize=(15, 15)); plt.set_cmap('Set1')
for n, factor in enumerate(np.arange(.1, 1, .1)):
    X, y = datasets.make_circles(n_samples=n_samples, factor=factor, noise=.05)
    plt.subplot(3, 3, n+1)
    plt.scatter(X[:, 0], X[:, 1], c=y)
    plt.axis('equal'); plt.axis('off')
    plt.title('f={:3.1f}; s={:4.2f}'.format(factor, metrics.silhouette_score(X, y)), size=20)
# Silhouette score deteriorates (decrease) with decreasing differentiation between clusters, eventually beginning to overlap
<Figure size 1080x1080 with 9 Axes>
plt.figure(figsize=(15, 15)); plt.set_cmap('Set1')
for n, cut in enumerate(np.linspace(-1, 1, 9)):
    X, y = datasets.make_circles(n_samples=n_samples, factor=.5, noise=.05)
    y[np.where(X[:, 0]<cut)[0]] = 0
    y[np.where(X[:, 0]>=cut)[0]] = 1
    plt.subplot(3, 3, n+1)
    plt.scatter(X[:, 0], X[:, 1], c=y)
    plt.axis('equal'); plt.axis('off')
    plt.title('cut={:3.1f}; s={:4.2f}'.format(cut, metrics.silhouette_score(X, y)), size=20)
<Figure size 1080x1080 with 9 Axes>
centerss = [[[-9, -9], [-9, 9], [9, 9]],
            [[-5, -5], [-5, 5], [5, 5]],
            [[-1, -1], [-1, 1], [1, 1]],
            [[ 0,  0], [ 0, 0], [0, 0]]]
plt.figure(figsize=(10, 10)); plt.set_cmap('Set1')
for n, centers in enumerate(centerss):         
    plt.subplot(2, 2, n+1)
    X, y = datasets.make_blobs(n_samples=n_samples, centers=centers)
    plt.scatter(X[:, 0], X[:, 1], c=y)
    plt.axis('equal')
    plt.title('s={:4.2f}'.format(metrics.silhouette_score(X, y)), size=15)
<Figure size 720x720 with 4 Axes>