In xarray there is a method called to_dataframe(), see:
http://xarray.pydata.org/en/stable/pandas.html
With this method a DataArray can be converted to a pandas DataFrame.
How can I convert a convert a xarray DataArray to a geopandas GeoDataFrame, so like the above but with polygons included of the gridcells?
This code will create a GDF with a geometry column containing a series of POINT objects corresponding to the lat/lon coordinates in my xarray.
import geopandas as gpd
import xarray as xr
xds = xr.open_dataset('yourfile.nc')
xarr = x['value_column']
df = xarr.to_dataframe().reset_index()
gdf = gpd.GeoDataFrame(
df.value_column, geometry=gpd.points_from_xy(df.lon,df.lat))
Related
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 16 18:17:32 2023
#author: avnth
"""
import seaborn as sb
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import scale
from sklearn.metrics import silhouette_score
from sklearn.metrics import davies_bouldin_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler as sc
from mpl_toolkits import mplot3d
import plotly.express as px
dta=pd.read_csv("D:/XLRI/Term-4/ML/Assignment-2/Prpd_2.csv")
dta.head()
dta1=dta.drop("Cid",axis=1,inplace=False)
#dta1=dta1.iloc[:,1:4]
dta1=pd.DataFrame(dta1)
dta1.head()
dta1.describe()
dta1=pd.DataFrame(dta1)
dta1.describe()
ncl=[]
for i in range(1,15):
kn=KMeans(n_clusters=i)
kn.fit(dta1)
ncl.append(kn.inertia_)
plt.plot(range(1,15),ncl)
#silhoute method
sil = []
for n in range(2,15):
kn1=KMeans(n_clusters = n)
kn1.fit(dta1)
# labels = kn1.labels_
sil.append(silhouette_score(dta1,kn1.labels_, metric = 'euclidean'))
plt.plot(range(2,15),sil)
#Davies Bouldin Index method
db = []
K1 = range(2,8)
for l in K1:
kn2 = (KMeans(n_clusters = l) )
kn2.fit(dta1)
db.append(davies_bouldin_score(dta1,kn2.labels_))
plt.plot(range(2,8),db)
sa=sc()
sa.fit(dta1)
tdta1=sa.transform(dta1)
tdta1=pd.DataFrame(tdta1)
kmc=KMeans(n_clusters=6)
kmc.fit(tdta1)
clus=kmc.predict(tdta1)
dta["clus"]=clus
dta.head()
clus4=dta[dta.clus==4]
clus4.describe()
clus0=dta[dta.clus==0]
clus0.describe()
clus5=dta[dta.clus==5]
clus5.describe()
clus3=dta[dta.clus==3]
clus3.describe()
sb.scatterplot("Recency","Frequency",data=dta,hue="clus")
sb.scatterplot("Frequency","Money",data=dta,hue="clus")
# Creating dataset
z = dta.Recency
x = dta.Frequency
y = dta.Money
z.head()
x.head()
y.head()
# Creating figure
#fig = plt.figure()
#ax = fig.add_subplot(111,projection ="3d")
#dta=pd.DataFrame(dta)
#dta.head()
#for a in range(0,5):
# ax.scatter(dta.Frequency[dta.clus==a],dta.Recency[dta.clus==a],dta.Money[dta.clus==a],label=a,hue="clus")
#ax.legend()
#plt.title("simple 3D scatter plot")
#plt.show()
#df = px.data.iris()
#fig = px.scatter_3d(df, x='sepal_length', y='sepal_width', z='petal_width',color='petal_length',symbol='species')
#fig=plt.figure()
Hello Frieds,
I am newbie to python. Just learning. I have taken a dataset and clustered it. Now, I want to plot it in 3d scatter plot with a 4th dimension that is my cluster as color. For each cluster no new color should appear. So a data point will be plotted as x,y,z attribute but it will have color based on 4th column that is my cluster number. I know how to do it in 2d with hue. But I am unable to find similar thing in 3d plot. Any help will be appreicated. Atatching my code too.
I tried many libraries from online tutorial but I am not egtting exactly what I am looking for. I have attached a sample for how I want it to be plotted. Sample taken from plotly.com This is just replication how I want to plot.
enter image description here
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(z,x,y, marker=".", c=dta["clus"], s=50, cmap="RdBu")
plt.legend(clus)
plt.title("4D scatterplot")
ax.set_xlabel("Recency")
ax.set_ylabel("Frequency")
ax.set_zlabel("Money")
plt.show()
I am having trouble adding a basemap to my map. My geodataframe is created using X and Y coords of a bunch of points.
gdf = geo.GeoDataFrame(
df, geometry=gpd.points_from_xy(df['X'], df['Y']))
gdf.set_crs(epsg=3857)
Which look like this:
After using contexily to get a basemap, I cannot get the basemap to properly show up. The coords should be showing the bottom of the Mississippi River Basin.
ax = gdf.plot(color="red", figsize=(9, 9))
cx.add_basemap(ax, zoom=0, crs= gdf.crs)
Let me know if there is anything wrong with my code as to why it is not showing up.
Thanks!
It looks like your data is in WGS84/EPSG:4326 (i.e. lat/lon) coordinates. So I think you're confusing geopandas.GeoDataFrame.set_crs, which tells geopandas what the CRS of the data is, with geopandas.GeoDataFrame.to_crs, which transforms the data from the current CRS to the new one you specify. Also note that neither of these operations are in-place by default. So I think you want:
gdf = geo.GeoDataFrame(
df, geometry=gpd.points_from_xy(df['X'], df['Y'])
)
gdf = gdf.set_crs("epsg:4326")
gdf_mercator = gdf.to_crs("epsg:3857")
This really is same as #Michael Delgado answer. It's simpler to state the CRS at GeoDataFrame construction time. Also make sure you are using correct CRS
MWE
import geopandas as gpd
import geopandas as geo
import pandas as pd
import contextily as cx
# construct a dataframe with X and Y of some points in US
places = gpd.read_file(
gpd.datasets.get_path("naturalearth_cities"),
mask=gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")).loc[
lambda d: d["iso_a3"].eq("USA")
],
)
df = pd.DataFrame({"X": places.geometry.x, "Y": places.geometry.y})
# user code, state CRS at construction time
gdf = geo.GeoDataFrame(
df, geometry=gpd.points_from_xy(df["X"], df["Y"]), crs="epsg:4326"
)
ax = gdf.plot(color="red", figsize=(9, 9))
cx.add_basemap(ax, zoom=0, crs=gdf.crs)
I got a dataframe with the following columns Name (string), size (num), latitude (num), longitude (num), geometry (shapely.geometry.point.Point).
When i'm plotting my points on a map and are trying to annotate each point the annotation is not shown at all. My guess is that this is due to the projection im using.
Here are the lines of codes im running:
import geopandas as gpd
import geoplot as gplt
proj = gplt.crs.AlbersEqualArea()
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw={'projection': proj})
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude))
gplt.pointplot(gdf, hue='size', s=15, ax=ax, cmap=palette, legend=True, zorder=10)
for idx, row in gdf.iterrows():
plt.annotate(s=row['Name'], xy=[row['latitude'],row['longitude']])
plt.show()
You need coordinate transformation in
plt.annotate(s=row['Name'], xy=[row['latitude'],row['longitude']])
The transformation should be
xtran = gplt.crs.ccrs.AlbersEqualArea()
Replace that line with
x, y = xtran.transform_point(row['longitude'], row['latitude'], ccrs.PlateCarree())
plt.annotate( s=row['Name'], xy=[x, y] )
I am making scatter plot in seaborn and I want to add some text to each point of scatter plot according to my data ("Countries" column in hap_educ and hap_rel tables). I think I need loop to do this but cannot figure out how to do it for seaborn. Here is code I use:
https://ibb.co/hZ9NBV0
https://ibb.co/ZYLdgkt
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
# Set up working directory
os.chdir(r'D:/PROJECT CSS/')
#importing data from xlsx files
educ = pd.read_excel(r'D:\PROJECT CSS\educ.xlsx')
happiness= pd.read_excel(r'D:\PROJECT CSS\happiness edited.xlsx')
religious=pd.read_excel(r'D:\PROJECT CSS\religious edited.xlsx')
#Merging data into 2 tables
hap_rel = pd.merge(religious, happiness, on ='Country')
hap_educ= pd.merge(educ, happiness, on ='Country')
p1=sns.regplot(x =hap_educ['Score'], y =hap_educ['Pupil teacher ratio'], data=hap_educ, label='Countries')
plt.xlabel("Index of happiness")
plt.ylabel("Pupil / teacher ratio")
p2=sns.regplot(x=hap_rel['Score'], y=hap_rel['Yes'], data=hap_rel)
plt.xlabel("Index of happiness")
plt.ylabel("Percent of religious people(1=100%)")
Expect to see each point to be Annotated with Country name from my table
The basic way to display GeoDataFrame in Altair:
import altair as alt
import geopandas as gpd
alt.renderers.enable('notebook')
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
data = alt.InlineData(values = world[world.continent=='Africa'].__geo_interface__, #geopandas to geojson
# root object type is "FeatureCollection" but we need its features
format = alt.DataFormat(property='features',type='json'))
alt.Chart(data).mark_geoshape(
).encode(
color='properties.pop_est:Q', # GeoDataFrame fields are accessible through a "properties" object
tooltip=['properties.name:N','properties.pop_est:Q']
).properties(
width=500,
height=300
)
But it will crush if I add column with Nan or DateTime values.
At first you can use world = alt.utils.sanitize_dataframe(world) to convert columns with JSON incompatible types.
Or you can use gpdvega module to simplify code.
import altair as alt
import geopandas as gpd
import gpdvega
alt.renderers.enable('notebook')
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
alt.Chart(world[world.continent=='Africa']).mark_geoshape(
).encode(
color='pop_est',
tooltip=['name','pop_est']
).properties(
width=500,
height=300
)
Just pip install gpdvega and import gpdvega. altair will work with GeoDataFrame as usual DataFrame. See details in documentation