Want to plot 3d scatter plot with color picking up from the fourth attribute which is cluster no - scatter-plot

# -*- coding: utf-8 -*-
"""
Created on Thu Feb 16 18:17:32 2023
#author: avnth
"""
import seaborn as sb
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import scale
from sklearn.metrics import silhouette_score
from sklearn.metrics import davies_bouldin_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler as sc
from mpl_toolkits import mplot3d
import plotly.express as px
dta=pd.read_csv("D:/XLRI/Term-4/ML/Assignment-2/Prpd_2.csv")
dta.head()
dta1=dta.drop("Cid",axis=1,inplace=False)
#dta1=dta1.iloc[:,1:4]
dta1=pd.DataFrame(dta1)
dta1.head()
dta1.describe()
dta1=pd.DataFrame(dta1)
dta1.describe()
ncl=[]
for i in range(1,15):
kn=KMeans(n_clusters=i)
kn.fit(dta1)
ncl.append(kn.inertia_)
plt.plot(range(1,15),ncl)
#silhoute method
sil = []
for n in range(2,15):
kn1=KMeans(n_clusters = n)
kn1.fit(dta1)
# labels = kn1.labels_
sil.append(silhouette_score(dta1,kn1.labels_, metric = 'euclidean'))
plt.plot(range(2,15),sil)
#Davies Bouldin Index method
db = []
K1 = range(2,8)
for l in K1:
kn2 = (KMeans(n_clusters = l) )
kn2.fit(dta1)
db.append(davies_bouldin_score(dta1,kn2.labels_))
plt.plot(range(2,8),db)
sa=sc()
sa.fit(dta1)
tdta1=sa.transform(dta1)
tdta1=pd.DataFrame(tdta1)
kmc=KMeans(n_clusters=6)
kmc.fit(tdta1)
clus=kmc.predict(tdta1)
dta["clus"]=clus
dta.head()
clus4=dta[dta.clus==4]
clus4.describe()
clus0=dta[dta.clus==0]
clus0.describe()
clus5=dta[dta.clus==5]
clus5.describe()
clus3=dta[dta.clus==3]
clus3.describe()
sb.scatterplot("Recency","Frequency",data=dta,hue="clus")
sb.scatterplot("Frequency","Money",data=dta,hue="clus")
# Creating dataset
z = dta.Recency
x = dta.Frequency
y = dta.Money
z.head()
x.head()
y.head()
# Creating figure
#fig = plt.figure()
#ax = fig.add_subplot(111,projection ="3d")
#dta=pd.DataFrame(dta)
#dta.head()
#for a in range(0,5):
# ax.scatter(dta.Frequency[dta.clus==a],dta.Recency[dta.clus==a],dta.Money[dta.clus==a],label=a,hue="clus")
#ax.legend()
#plt.title("simple 3D scatter plot")
#plt.show()
#df = px.data.iris()
#fig = px.scatter_3d(df, x='sepal_length', y='sepal_width', z='petal_width',color='petal_length',symbol='species')
#fig=plt.figure()
Hello Frieds,
I am newbie to python. Just learning. I have taken a dataset and clustered it. Now, I want to plot it in 3d scatter plot with a 4th dimension that is my cluster as color. For each cluster no new color should appear. So a data point will be plotted as x,y,z attribute but it will have color based on 4th column that is my cluster number. I know how to do it in 2d with hue. But I am unable to find similar thing in 3d plot. Any help will be appreicated. Atatching my code too.
I tried many libraries from online tutorial but I am not egtting exactly what I am looking for. I have attached a sample for how I want it to be plotted. Sample taken from plotly.com This is just replication how I want to plot.

enter image description here
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(z,x,y, marker=".", c=dta["clus"], s=50, cmap="RdBu")
plt.legend(clus)
plt.title("4D scatterplot")
ax.set_xlabel("Recency")
ax.set_ylabel("Frequency")
ax.set_zlabel("Money")
plt.show()

Related

Seaborn PairGrid: pairplot two data set with different transparency

I'd like to make a PairGrid plot with the seaborn library.
I have two classed data: a training set and one-target point.
I'd like to plot the one-target point as opaque, however, the samples in the training set should be transparent.
And I'd like to plot the one-target point also in lower cells.
Here is my code and image:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
data = pd.read_csv("data.csv")
g = sns.PairGrid(data, hue='type')
g.map_upper(sns.scatterplot, alpha=0.2, palette="husl")
g.map_lower(sns.kdeplot, lw=3, palette="husl")
g.map_diag(sns.kdeplot, lw=3, palette="husl")
g.add_legend()
plt.show()
And the data.csv is like belows:
logP tPSA QED HBA HBD type
0 -2.50000 200.00 0.300000 8 1 Target 1
1 1.68070 87.31 0.896898 3 2 Training set
2 3.72930 44.12 0.862259 4 0 Training set
3 2.29702 91.68 0.701022 6 3 Training set
4 -2.21310 102.28 0.646083 8 2 Training set
You can reassign the dataframe used after partial plotting. E.g. g.data = data[data['type'] == 'Target 1']. So, you can first plot the training dataset, change g.data and then plot the target with other parameters.
The following example supposes the first row of the iris dataset is used as training data. A custom legend is added (this might provoke a warning that should be ignored).
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
iris = sns.load_dataset('iris')
g = sns.PairGrid(iris)
color_for_trainingset = 'paleturquoise'
# color_for_trainingset = sns.color_palette('husl', 2) [-1] # this is the color from the question
g.map_upper(sns.scatterplot, alpha=0.2, color=color_for_trainingset)
g.map_lower(sns.kdeplot, color=color_for_trainingset)
g.map_diag(sns.kdeplot, lw=3, color=color_for_trainingset)
g.data = iris.iloc[:1]
# g.data = data[data['type'] == 'Target 1']
g.map_upper(sns.scatterplot, alpha=1, color='red')
g.map_lower(sns.scatterplot, alpha=1, color='red', zorder=3)
handles = [Line2D([], [], color='red', ls='', marker='o', label='target'),
Line2D([], [], color=color_for_trainingset, lw=3, label='training set')]
g.add_legend(handles=handles)
plt.show()

contextily making weird background maps

This is my code:
import pandas as pd
import geoplot as gplt
import geopandas as gpd
import geoplot.crs as gcrs
import contextily
df = pd.read_csv('dataframe_master.csv', index_col='id')
crs = {'init': 'epsg:4326'}
geometry = [geometry.Point(xy) for xy in zip(df['latitude'], df['longitude'])]
df_geo = gpd.GeoDataFrame(df_geo, crs=crs, geometry=geometry)
test = df_geo[:200000]
test = test.to_crs(epsg=3857)
ax = test.plot(marker='o', markersize=1)
contextily.add_basemap(ax)
plt.show()
And it generates this image:
image, which doesn't show a background map and seems a little distorted.
My coordinate data was originally made with the RD-coordinaten standard (EPSG:28992), which I converted to EPSG:4326 with this code:
lon_l = []
lat_l = []
p1 = Proj(init='epsg:28992')
p2 = Proj(proj='latlong',datum='WGS84')
for row in range(len(df)):
lon, lat, z = transform(p1, p2, df.iloc[row, 7], df.iloc[row, 8], 0.0)
lon_l.append(lon)
lat_l.append(lat)
I did a sanity check on the longitude latitude output by comparing to some online converters, and the output points to the correct locations.
I tried following this solution: https://gis.stackexchange.com/questions/348339/using-crs-epsg3857-but-misalignment-between-stamen-background-and-coordinates-o in case my conversion was missing the "towgs84"part, but the image still looked the same with a slightly different colour.
I figured it out! I should've listed longitude before latitude when building the geometry.
geometry = [geometry.Point(xy) for xy in zip(df['longitude'], df['latitude'])]

When using rasterize=True with datashader, how do I get transparency where count=0 to see the underlying tile?

Currently, when I do this:
import pandas as pd
import hvplot.pandas
df = pd.util.testing.makeDataFrame()
plot = df.hvplot.points('A', 'B', tiles=True, rasterize=True, geo=True,
aggregator='count')
I can't see the underlying tile source.
To see the underlying tile source philippjfr suggested setting the color bar limits slightly higher than 0 and set the min clipping_colors to transparent:
plot = plot.redim.range(**{'Count': (0.25, 1)})
plot = plot.opts('Image', clipping_colors={'min': 'transparent'})
Now the underlying tile source is viewable.
Full Code:
import pandas as pd
import hvplot.pandas
df = pd.util.testing.makeDataFrame()
plot = df.hvplot.points('A', 'B', tiles=True, rasterize=True, geo=True,
aggregator='count')
plot = plot.redim.range(**{'Count': (0.25, 1)})
plot = plot.opts('Image', clipping_colors={'min': 'transparent'})
plot

Keras Realtime Augmentation adding Noise and Contrast

Keras provides an ImageDataGenerator class for realtime augmentation, but it does not include contrast adjustment and addition of noise.
How can we apply a random level of noise and a random contrast adjustment during training? Could these functions be added to the 'preprocessing_function' parameter in the datagen?
Thank you.
You could indeed add noise with preprocessing_function.
Example script:
import random
import numpy as np
def add_noise(img):
'''Add random noise to an image'''
VARIABILITY = 50
deviation = VARIABILITY*random.random()
noise = np.random.normal(0, deviation, img.shape)
img += noise
np.clip(img, 0., 255.)
return img
# Prepare data-augmenting data generator
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
zoom_range=0.2,
preprocessing_function=add_noise,
)
# Load a single image as our example
from keras.preprocessing import image
img_path = 'cat_by_irene_mei_flickr.png'
img = image.load_img(img_path, target_size=(299,299))
# Generate distorted images
images = [img]
img_arr = image.img_to_array(img)
img_arr = img_arr.reshape((1,) + img_arr.shape)
for batch in datagen.flow(img_arr, batch_size=1):
images.append( image.array_to_img(batch[0]) )
if len(images) >= 4:
break
# Display
import matplotlib.pyplot as plt
f, xyarr = plt.subplots(2,2)
xyarr[0,0].imshow(images[0])
xyarr[0,1].imshow(images[1])
xyarr[1,0].imshow(images[2])
xyarr[1,1].imshow(images[3])
plt.show()
Example images generated by the script:
From the Keras docs:
preprocessing_function: function that will be implied on each input. The function will run before any other modification on it. The function should take one argument: one image (Numpy tensor with rank 3), and should output a Numpy tensor with the same shape.
So, I created a simple function and then used the image augmentation functions from the imgaug module. Note that imgaug requires images to be rank 4.
I found in this blog that you can do something as simple as:
from keras.layers import GaussianNoise
model.add(Dense(32))
model.add(GaussianNoise(0.1))
model.add(Activation('relu'))
model.add(Dense(32))
...
Unfortunately, I can't find an analogous way to adjust/augment the contrast. But you can, according to this post, augment the brightness with
from keras.preprocessing.image import ImageDataGenerator
ImageDataGenerator(brightness_range=[range_min,range_max])

How to select irregular shapes in a image

Using python code we are able to create image segments as shown in the screenshot. our requirement is how to select specific segment in the image and apply different color to it ?
The following is our python snippet
from skimage.segmentation import felzenszwalb, slic,quickshift
from skimage.segmentation import mark_boundaries
from skimage.util import img_as_float
import matplotlib.pyplot as plt
from skimage import measure
from skimage import restoration
from skimage import img_as_float
image = img_as_float(io.imread("leaf.jpg"))
segments = quickshift(image, ratio=1.0, kernel_size=20, max_dist=10,return_tree=False, sigma=0, convert2lab=True, random_seed=42)
fig = plt.figure("Superpixels -- %d segments" % (500))
ax = fig.add_subplot(1, 1, 1)
ax.imshow(mark_boundaries(image, segments))
plt.axis("off")
plt.show()
do this:
seg_num = 64 # desired segment to be colored
color = float64([1,0,0]) # red color
image[segments == 64] = color # assign color to the segment
You can use OpenCV python module - example:

Resources