This is my code:
import pandas as pd
import geoplot as gplt
import geopandas as gpd
import geoplot.crs as gcrs
import contextily
df = pd.read_csv('dataframe_master.csv', index_col='id')
crs = {'init': 'epsg:4326'}
geometry = [geometry.Point(xy) for xy in zip(df['latitude'], df['longitude'])]
df_geo = gpd.GeoDataFrame(df_geo, crs=crs, geometry=geometry)
test = df_geo[:200000]
test = test.to_crs(epsg=3857)
ax = test.plot(marker='o', markersize=1)
contextily.add_basemap(ax)
plt.show()
And it generates this image:
image, which doesn't show a background map and seems a little distorted.
My coordinate data was originally made with the RD-coordinaten standard (EPSG:28992), which I converted to EPSG:4326 with this code:
lon_l = []
lat_l = []
p1 = Proj(init='epsg:28992')
p2 = Proj(proj='latlong',datum='WGS84')
for row in range(len(df)):
lon, lat, z = transform(p1, p2, df.iloc[row, 7], df.iloc[row, 8], 0.0)
lon_l.append(lon)
lat_l.append(lat)
I did a sanity check on the longitude latitude output by comparing to some online converters, and the output points to the correct locations.
I tried following this solution: https://gis.stackexchange.com/questions/348339/using-crs-epsg3857-but-misalignment-between-stamen-background-and-coordinates-o in case my conversion was missing the "towgs84"part, but the image still looked the same with a slightly different colour.
I figured it out! I should've listed longitude before latitude when building the geometry.
geometry = [geometry.Point(xy) for xy in zip(df['longitude'], df['latitude'])]
Related
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 16 18:17:32 2023
#author: avnth
"""
import seaborn as sb
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import scale
from sklearn.metrics import silhouette_score
from sklearn.metrics import davies_bouldin_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler as sc
from mpl_toolkits import mplot3d
import plotly.express as px
dta=pd.read_csv("D:/XLRI/Term-4/ML/Assignment-2/Prpd_2.csv")
dta.head()
dta1=dta.drop("Cid",axis=1,inplace=False)
#dta1=dta1.iloc[:,1:4]
dta1=pd.DataFrame(dta1)
dta1.head()
dta1.describe()
dta1=pd.DataFrame(dta1)
dta1.describe()
ncl=[]
for i in range(1,15):
kn=KMeans(n_clusters=i)
kn.fit(dta1)
ncl.append(kn.inertia_)
plt.plot(range(1,15),ncl)
#silhoute method
sil = []
for n in range(2,15):
kn1=KMeans(n_clusters = n)
kn1.fit(dta1)
# labels = kn1.labels_
sil.append(silhouette_score(dta1,kn1.labels_, metric = 'euclidean'))
plt.plot(range(2,15),sil)
#Davies Bouldin Index method
db = []
K1 = range(2,8)
for l in K1:
kn2 = (KMeans(n_clusters = l) )
kn2.fit(dta1)
db.append(davies_bouldin_score(dta1,kn2.labels_))
plt.plot(range(2,8),db)
sa=sc()
sa.fit(dta1)
tdta1=sa.transform(dta1)
tdta1=pd.DataFrame(tdta1)
kmc=KMeans(n_clusters=6)
kmc.fit(tdta1)
clus=kmc.predict(tdta1)
dta["clus"]=clus
dta.head()
clus4=dta[dta.clus==4]
clus4.describe()
clus0=dta[dta.clus==0]
clus0.describe()
clus5=dta[dta.clus==5]
clus5.describe()
clus3=dta[dta.clus==3]
clus3.describe()
sb.scatterplot("Recency","Frequency",data=dta,hue="clus")
sb.scatterplot("Frequency","Money",data=dta,hue="clus")
# Creating dataset
z = dta.Recency
x = dta.Frequency
y = dta.Money
z.head()
x.head()
y.head()
# Creating figure
#fig = plt.figure()
#ax = fig.add_subplot(111,projection ="3d")
#dta=pd.DataFrame(dta)
#dta.head()
#for a in range(0,5):
# ax.scatter(dta.Frequency[dta.clus==a],dta.Recency[dta.clus==a],dta.Money[dta.clus==a],label=a,hue="clus")
#ax.legend()
#plt.title("simple 3D scatter plot")
#plt.show()
#df = px.data.iris()
#fig = px.scatter_3d(df, x='sepal_length', y='sepal_width', z='petal_width',color='petal_length',symbol='species')
#fig=plt.figure()
Hello Frieds,
I am newbie to python. Just learning. I have taken a dataset and clustered it. Now, I want to plot it in 3d scatter plot with a 4th dimension that is my cluster as color. For each cluster no new color should appear. So a data point will be plotted as x,y,z attribute but it will have color based on 4th column that is my cluster number. I know how to do it in 2d with hue. But I am unable to find similar thing in 3d plot. Any help will be appreicated. Atatching my code too.
I tried many libraries from online tutorial but I am not egtting exactly what I am looking for. I have attached a sample for how I want it to be plotted. Sample taken from plotly.com This is just replication how I want to plot.
enter image description here
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(z,x,y, marker=".", c=dta["clus"], s=50, cmap="RdBu")
plt.legend(clus)
plt.title("4D scatterplot")
ax.set_xlabel("Recency")
ax.set_ylabel("Frequency")
ax.set_zlabel("Money")
plt.show()
I am having trouble adding a basemap to my map. My geodataframe is created using X and Y coords of a bunch of points.
gdf = geo.GeoDataFrame(
df, geometry=gpd.points_from_xy(df['X'], df['Y']))
gdf.set_crs(epsg=3857)
Which look like this:
After using contexily to get a basemap, I cannot get the basemap to properly show up. The coords should be showing the bottom of the Mississippi River Basin.
ax = gdf.plot(color="red", figsize=(9, 9))
cx.add_basemap(ax, zoom=0, crs= gdf.crs)
Let me know if there is anything wrong with my code as to why it is not showing up.
Thanks!
It looks like your data is in WGS84/EPSG:4326 (i.e. lat/lon) coordinates. So I think you're confusing geopandas.GeoDataFrame.set_crs, which tells geopandas what the CRS of the data is, with geopandas.GeoDataFrame.to_crs, which transforms the data from the current CRS to the new one you specify. Also note that neither of these operations are in-place by default. So I think you want:
gdf = geo.GeoDataFrame(
df, geometry=gpd.points_from_xy(df['X'], df['Y'])
)
gdf = gdf.set_crs("epsg:4326")
gdf_mercator = gdf.to_crs("epsg:3857")
This really is same as #Michael Delgado answer. It's simpler to state the CRS at GeoDataFrame construction time. Also make sure you are using correct CRS
MWE
import geopandas as gpd
import geopandas as geo
import pandas as pd
import contextily as cx
# construct a dataframe with X and Y of some points in US
places = gpd.read_file(
gpd.datasets.get_path("naturalearth_cities"),
mask=gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")).loc[
lambda d: d["iso_a3"].eq("USA")
],
)
df = pd.DataFrame({"X": places.geometry.x, "Y": places.geometry.y})
# user code, state CRS at construction time
gdf = geo.GeoDataFrame(
df, geometry=gpd.points_from_xy(df["X"], df["Y"]), crs="epsg:4326"
)
ax = gdf.plot(color="red", figsize=(9, 9))
cx.add_basemap(ax, zoom=0, crs=gdf.crs)
I got a dataframe with the following columns Name (string), size (num), latitude (num), longitude (num), geometry (shapely.geometry.point.Point).
When i'm plotting my points on a map and are trying to annotate each point the annotation is not shown at all. My guess is that this is due to the projection im using.
Here are the lines of codes im running:
import geopandas as gpd
import geoplot as gplt
proj = gplt.crs.AlbersEqualArea()
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw={'projection': proj})
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))
gplt.pointplot(gdf, hue='size', s=15, ax=ax, cmap=palette, legend=True, zorder=10)
for idx, row in gdf.iterrows():
plt.annotate(s=row['Name'], xy=[row['latitude'],row['longitude']])
plt.show()
You need coordinate transformation in
plt.annotate(s=row['Name'], xy=[row['latitude'],row['longitude']])
The transformation should be
xtran = gplt.crs.ccrs.AlbersEqualArea()
Replace that line with
x, y = xtran.transform_point(row['longitude'], row['latitude'], ccrs.PlateCarree())
plt.annotate( s=row['Name'], xy=[x, y] )
I am trying to plot river basins on a map. In order to reduce the size of the resulting vector graphics, I am applying GeoSeries.simplify().
import cartopy
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import geopandas as gpd
# %%% Earth
fig = plt.figure()
latlon_proj = ccrs.PlateCarree()
axis_proj = ccrs.Orthographic()
ax = plt.axes(
projection=axis_proj
)
# %%% Major River Basins
mrb_basins = gpd.read_file('mrb_basins.json') # 520 entries
mrb_basins['geometry'] = mrb_basins['geometry'].simplify(0.1)
for shape in mrb_basins['geometry']:
feat = cartopy.feature.ShapelyFeature(
[shape],
latlon_proj,
facecolor='red',
)
ax.add_feature(feat)
mrb_basins.plot()
The problem is, the resulting map of the earth is fully covered by a red shape.
This is not the case, if I remove the line mrb_basins['geometry'] = mrb_basins['geometry'].simplify(0.1).
How can I simplify the geometries whilst keeping their integrity?
The data set of major river basins is available here.
GeoSeries.simplify() does not always return valid geometries due to the underlying simplification algorithm used by GEOS. And cartopy has trouble to plot invalid geometries.
You need to fix your geometries before passing them to cartopy. The simple trick is to call buffer(0).
mrb_basins['geometry'] = mrb_basins['geometry'].simplify(0.1).buffer(0)
Then your code works fine.
Currently, when I do this:
import pandas as pd
import hvplot.pandas
df = pd.util.testing.makeDataFrame()
plot = df.hvplot.points('A', 'B', tiles=True, rasterize=True, geo=True,
aggregator='count')
I can't see the underlying tile source.
To see the underlying tile source philippjfr suggested setting the color bar limits slightly higher than 0 and set the min clipping_colors to transparent:
plot = plot.redim.range(**{'Count': (0.25, 1)})
plot = plot.opts('Image', clipping_colors={'min': 'transparent'})
Now the underlying tile source is viewable.
Full Code:
import pandas as pd
import hvplot.pandas
df = pd.util.testing.makeDataFrame()
plot = df.hvplot.points('A', 'B', tiles=True, rasterize=True, geo=True,
aggregator='count')
plot = plot.redim.range(**{'Count': (0.25, 1)})
plot = plot.opts('Image', clipping_colors={'min': 'transparent'})
plot