bokeh.models import ColumnDataSource, Button, Select, Div from bokeh.sampledata.

data mining

Description

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
from bokeh.models import ColumnDataSource, Button, Select, Div
from bokeh.sampledata.iris import flowers
from bokeh.plotting import figure, curdoc, show
from bokeh.layouts import column, row


# In[2]:


# read and store the dataset
data = flowers.copy(deep=True)
data = data.drop(['species'], axis=1)


# In[194]:


dist_matrix = np.empty((m, k))
for i in range(m):
    dist = np.linalg.norm(pca_data[i, :] - initial_medoids, ord=1, axis=1)
    dist_matrix[i, :] = dist
        
dist_another = np.repeat(np.sum(np.abs(pca_data - initial_medoids), axis=-1),3)
dist_matrix_another = dist_another.reshape((m,k))


# In[6]:


#k-medoid algorithm using given medoids

m = len(data)

#Dimension Reduction
from sklearn.decomposition import PCA
pca_components = PCA(n_components=3)
pca_data = pca_components.fit_transform(data)

#initialize the given medoids
medoids = [24, 74, 124]
initial_medoids = np.array([[24,74,124]])


Related Questions in data mining category