I have this error :
No such file or directory: 'Survey.xlsx'
import pandas as pd
import streamlit as st
import plotly.express as px
from PIL import Image
st.set_page_config(page_title='Survey Results')
st.header('Survey Results 2021')
st.subheader('Was the tutorial helpful?')
### --- LOAD DATAFRAME
excel_file = 'Survey.xlsx'
sheet_name = 'DATA'
df = pd.read_excel(excel_file,
sheet_name=sheet_name,
usecols='B:D',
header=3)
df_participants = pd.read_excel(excel_file,
sheet_name= sheet_name,
usecols='F:G',
header=3)
df_participants.dropna(inplace=True)
What am I doing wrong that I can't find the file? The file is in the same folder as the script itself
Related
Here you can see the Python-code. As output then always appears: name 'Path' is not defined.
import tkinter as tk
from tkinter import filedialog
from tkinter import *
import pandas as pd
f = open("test_filedialog.txt", "w")
f.close()
def openfile():
Path=filedialog.askopenfilenames(title ="Datein auswählen", filetypes=[("CSV-Datein", "*.csv"),("alle Datein", "*.*")]).open("test_filedialog.txt", "w")
f = open("test_filedialog.txt", "w")
f.write(str(Path))
f.close()
x = tk.Tk()
title= x.title("Titelleiste")
label1=tk.Label(x, text="Hallo", width=25, height=5)
label1.pack()
button1=tk.Button (x, text="Exit",width= 10, height=1, command=x.destroy)
button1.pack()
button2=tk.Button (x, text= "open file", width= 15, height=2, command=openfile)
button2.pack()
x.mainloop()
#import matplotlib import pyplot as plt
daten=pd.read_csv(Path, header=None, sep=';',) #liest jetzt ausgewählte Spalten ein --> Die Frage ist wie wir das für Spaltenbereiche machen
df=daten.drop(range (2),axis=1)
new_header=df.iloc[0]
df=df[1:]
df.columns=new_header
auswahl=df.loc[:,158.936:159.01]
print(auswahl)
if __name__ == "__main__":
root = tkinter.Tk()
app = testApp(root)
root.mainloop()
`
I tried to define the 'Path' as an extra class. But unfortunately this did not lead to any result.
It would be nice if you have a solution for the problem. Maybe how to define the 'Path' as an extra class but also another solution would be ok :)
In python a variable defined inside a function does not have global scope. You shoud define a Path variable that is path to a csv file. You have error at line 38.
import pandas as pd
path = './data.csv' # path to csv file
df = pd.read_csv(path)
print(df.to_string())
Actually I should have tried to run the script.I have not fixed the error.
import tkinter as tk
from tkinter import filedialog
from tkinter import *
import pandas as pd
def read_csv(Path):
daten=pd.read_csv(Path, header=None, sep=',',)
print(daten)
# df=daten.drop(range (2),axis=1)
# new_header=df.iloc[0]
# df=df[1:]
# df.columns=new_header
# auswahl=df.loc[:,158.936:159.01]
# print(auswahl)
def openfile():
Path=filedialog.askopenfilenames(title ="Datein auswählen", filetypes=[("CSV-Datein", "*.csv"),("alle Datein", "*.*")])[0]
read_csv(Path) # do file operation in this function
x = tk.Tk()
title= x.title("Titelleiste")
label1=tk.Label(x, text="Hallo", width=25, height=5)
label1.pack()
button1=tk.Button (x, text="Exit",width= 10, height=1, command=x.destroy)
button1.pack()
button2=tk.Button (x, text= "open file", width= 15, height=2, command=openfile)
button2.pack()
x.mainloop()
#import matplotlib import pyplot as plt
if __name__ == "__main__":
root = tk()
# app = testApp(root)
root.mainloop()
The askopenfile function does not return an object but a tupple with file names. Also you should do file operations in openfile function. Hope it helps.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
( ds_train,ds_test),ds_info=tfds.load("mnist",split=['train',"test"],
shuffle_files=True,with_info=True)
def normalize(image,label):
image=tf.cast(image,tf.float32)
return image/255.0,label
AUTOTUNE=tf.data.experimental.AUTOTUNE
new_ds=new_ds=ds_train.map(normalize,num_parallel_calls=AUTOTUNE)
When I execute 'ds_train.map' it shows me Below error:
TypeError: in user code:
TypeError: tf__normalize() missing 1 required positional argument:
'label'
I have a json file with lat/long coordinates, which I try to convert to UTM ("x", "y") in PySpark.
The .json file looks like this:
{"positionmessage":{"latitude": 51.822872161865234,"longitude": 4.905614852905273}}
{"positionmessage":{"latitude": 51.819644927978516, "longitude": 4.961687088012695}}
I read the json file in pyspark and try to convert to UTM ('x', 'y'-coord) in PySpark with the following script:
import numpy as np
from pyspark.sql import SparkSession
from pyspark.sql.types import ArrayType, StructField, StructType, StringType, IntegerType, DateType, FloatType, TimestampType, DoubleType
from pyspark.sql.functions import *
appName = "PySpark"
master = "local"
file_name = "lat_lon.JSON"
# Create Spark session
spark = SparkSession.builder \
.appName(appName) \
.master(master) \
.getOrCreate()
schema = StructType([
StructField("positionmessage",
StructType([
StructField('latitude', DoubleType(), True),
StructField('longitude', DoubleType(), True),
]))])
df = spark.read.schema(schema).json(file_name).select("positionmessage.*")
Until here no problem; the problem arises when I try to convert to UTM coordinates using the pyproj package (which worked in Pandas).
from pyspark.sql.functions import array, pandas_udf, PandasUDFType
from pyproj import Proj
from pandas import Series
# using decorator 'pandas_udf' to wrap the function.
#pandas_udf('array<double>', PandasUDFType.SCALAR)
def get_utm(x):
pp = Proj(proj='utm',zone=31,ellps='WGS84', preserve_units=False)
return Series([ pp(e[0], e[1]) for e in x ])
df = df.withColumn('utm', get_utm(array('longitude','latitude'))) \
.selectExpr("*", "utm[0] as X", "utm[1] as Y")
df.show()
I get the problem: " python worker failed to connect back", but there does not seem to be a problem with the code itself. What can the problem be?
You can use a plain UDF rather than Pandas UDF:
#udf(returnType=ArrayType(DoubleType()))
def get_utm(long, lat):
pp = Proj(proj='utm', zone=31, ellps='WGS84', preserve_units=False)
return pp(long, lat)
result = df.withColumn('utm', get_utm('longitude','latitude')).selectExpr("*", "utm[0] as X", "utm[1] as Y")
I am trying to load images of varying sizes into a Dask DataFrame column and save the dataframe to HDF5 file format.
Here's the standard approach:
import glob
import dask.dataframe as dd
import pandas as pd
import numpy as np
from skimage.io import imread
dir = '/Users/petioptrv/Downloads/mask'
filenames = glob.glob(dir + '/*.png')[:5]
df = pd.DataFrame({"paths": filenames})
ddf = dd.from_pandas(df, npartitions=2)
ddf['images'] = ddf['paths'].apply(imread, meta=('images', np.uint8))
ddf.to_hdf('test.h5', '/data')
I get the following error message:
...
File "/Users/petioptrv/miniconda3/envs/dask/lib/python3.7/site-packages/pandas/io/pytables.py", line 2214, in set_atom_string
item=item, type=inferred_type
TypeError: Cannot serialize the column [images] because
its data contents are [mixed] object dtype
Essentially, PyTables detects that the column has an object dtype and checks if it's of type str. It's not, so it throws an exception.
I can probably hack it by opening the images into byte-arrays and converting those to strings, but that is far from the ideal scenario.
Try specifying the data_columns as suggested in this issue.
ddf.to_hdf('test.h5', '/data', format = 'table', data_columns = ['images'])
I have been attempting to download a plot created using plotly on google colaboratory. So far this is what I have attempted:
I have tried changing
files.download('foo.svg')
to
files.download('foo')
and I still get no results. I navigated to the files on Google colab and nothing shows there
import numpy as np
import pandas as pd
from plotly.offline import iplot
import plotly.graph_objs as go
from google.colab import files
def enable_plotly_in_cell():
import IPython
from plotly.offline import init_notebook_mode
display(IPython.core.display.HTML('''<script src="/static/components/requirejs/require.js"></script>'''))
init_notebook_mode(connected=False)
#this actually shows the plot
enable_plotly_in_cell()
N = 500
x = np.linspace(0, 1, N)
y = np.random.randn(N)
df = pd.DataFrame({'x': x, 'y': y})
df.head()
data = [
go.Scatter(
x=df['x'], # assign x as the dataframe column 'x'
y=df['y']
)
]
iplot(data,image = 'svg', filename = 'foo')
files.download('foo.svg')
This is the error I am getting:
OSErrorTraceback (most recent call last)
<ipython-input-18-31523eb02a59> in <module>()
29 iplot(data,image = 'svg', filename = 'foo')
30
---> 31 files.download('foo.svg')
32
/usr/local/lib/python2.7/dist-packages/google/colab/files.pyc in download(filename)
140 msg = 'Cannot find file: {}'.format(filename)
141 if _six.PY2:
--> 142 raise OSError(msg)
143 else:
144 raise FileNotFoundError(msg) # pylint: disable=undefined-variable
OSError: Cannot find file: foo.svg
To save vector or raster images (e.g. SVGs or PNGs) from Plotly figures you need to have Kaleido (preferred) or Orca (legacy) installed, which is actually possible using the following commands in Colab:
Kaleido:
!pip install kaleido
Orca:
!pip install plotly>=4.0.0
!wget https://github.com/plotly/orca/releases/download/v1.2.1/orca-1.2.1-x86_64.AppImage -O /usr/local/bin/orca
!chmod +x /usr/local/bin/orca
!apt-get install xvfb libgtk2.0-0 libgconf-2-4
Once either of the above is done you can use the following code to make, show and export a figure (using plotly version 4):
import plotly.graph_objects as go
fig = go.Figure( go.Scatter(x=[1,2,3], y=[1,3,2] ) )
fig.show()
fig.write_image("image.svg")
fig.write_image("image.png")
The files can then be downloaded with:
from google.colab import files
files.download('image.svg')
files.download('image.png')
Try this, it does work for me:
import plotly.graph_objects as go
fig = go.Figure(...) # plot your fig
go.Figure.write_html(fig,"file.html") # write as html or image
files.download("file.html") # download your file and give me a vote my answer