Python ile makine öğrenmesi 3 K Means Clustering Kümeleme

Показать описание

Python ile makine öğrenmesi 3 K Means Clustering Kümeleme

Python kodu:

from time import time
import numpy as np

from sklearn import metrics

import pandas as pd

imp = SimpleImputer(missing_values=-12345, strategy='mean')
#clears all columns and be ready to be processed!

inputt = veriyeni[:,0:30]

n_clusters = 4
sample_size = 90
n_features = 31

labels = range(1, 89)

print("clusters: %d, \t n_samples %d, \t n_features %d"
% (n_clusters, sample_size, n_features))

print(82 * '_')
print('init\t\ttime\tinertia\thomo\tcompl\tv-meas\tARI\tAMI\tsilhouette')

def bench_k_means(estimator, name, data):
t0 = time()
print('%-9s\t%.2fs\t%i\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f'
metric='euclidean',
sample_size=sample_size)))

bench_k_means(KMeans(init='k-means++', n_clusters=n_clusters, n_init=4),
name="k-means++", data=inputt)

pca = PCA(n_components=n_clusters).fit(inputt)
name="PCA-based",
data=inputt)
print(82 * '_')

# #############################################################################
# Visualize the results on PCA-reduced data

reduced_data = PCA(n_components=2).fit_transform(inputt)
kmeans = KMeans(init='k-means++', n_clusters=n_clusters, n_init=4)

# Step size of the mesh. Decrease to increase the quality of the VQ.
h = .02 # point in the mesh [x_min, x_max]x[y_min, y_max].

# Plot the decision boundary. For that, we will assign a color to each
x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1

# Obtain labels for each point in mesh. Use last trained model.

# Put the result into a color plot
aspect='auto', origin='lower')

# Plot the centroids as a white X
marker='x', s=169, linewidths=3,
color='w', zorder=10)
'Centroids are marked with white cross')

lastkmeans = KMeans(init='k-means++', n_clusters=n_clusters, n_init=4)

for val in inputt: