How to get rid of positional argument error while using map function in tensorflow dataset - tensorflow-datasets

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
( ds_train,ds_test),ds_info=tfds.load("mnist",split=['train',"test"],
shuffle_files=True,with_info=True)
def normalize(image,label):
image=tf.cast(image,tf.float32)
return image/255.0,label
AUTOTUNE=tf.data.experimental.AUTOTUNE
new_ds=new_ds=ds_train.map(normalize,num_parallel_calls=AUTOTUNE)
When I execute 'ds_train.map' it shows me Below error:
TypeError: in user code:
TypeError: tf__normalize() missing 1 required positional argument:
'label'

Related

Error when importing sklearn in pipeline component

When I run this simple pipeline (in GCP's Vertex AI Workbench) I get an error:
ModuleNotFoundError: No module named 'sklearn'
Here is my code:
from kfp.v2 import compiler
from kfp.v2.dsl import pipeline, component
from google.cloud import aiplatform
#component(
packages_to_install=["sklearn"],
base_image="python:3.9",
)
def test_sklearn():
import sklearn
#pipeline(
pipeline_root=PIPELINE_ROOT,
name="sklearn-pipeline",
)
def pipeline():
test_sklearn()
compiler.Compiler().compile(pipeline_func=pipeline, package_path="sklearn_pipeline.json")
job = aiplatform.PipelineJob(
display_name=PIPELINE_DISPLAY_NAME,
template_path="sklearn_pipeline.json",
pipeline_root=PIPELINE_ROOT,
location=REGION
)
job.run(service_account=SERVICE_ACCOUNT)
What do I do wrong? :)
It seems that the package name sklearn does not work after a version upgrade.You need to change the value of packages_to_install from "sklearn" to "scikit-learn" in the #component block.

AttributeError: 'NoneType' object has no attribute 'drvsupport' when using Fiona driver

when I run the following code:
import geopandas as gpd
from shapely.geometry import Point, Polygon
import pandas as pd
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
my_map = gpd.read_file('mymap.kml', driver='KML')
my_map
I get this error:
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
AttributeError: 'NoneType' object has no attribute 'drvsupport'
Can anyone please help to solve this issue?
Recent versions of geopandas import fiona dynamically, and gpd.io.file.fiona is initially None.
My fix was to change to:
from fiona.drvsupport import supported_drivers
supported_drivers['LIBKML'] = 'rw'
Using the latest version of python, geopandas and fiona this worked for me:
import fiona
fiona.drvsupport.supported_drivers['KML'] = 'rw'

STREAMLIT [Errno 2] No such file or directory: 'NAME.xlsx'

I have this error :
No such file or directory: 'Survey.xlsx'
import pandas as pd
import streamlit as st
import plotly.express as px
from PIL import Image
st.set_page_config(page_title='Survey Results')
st.header('Survey Results 2021')
st.subheader('Was the tutorial helpful?')
### --- LOAD DATAFRAME
excel_file = 'Survey.xlsx'
sheet_name = 'DATA'
df = pd.read_excel(excel_file,
sheet_name=sheet_name,
usecols='B:D',
header=3)
df_participants = pd.read_excel(excel_file,
sheet_name= sheet_name,
usecols='F:G',
header=3)
df_participants.dropna(inplace=True)
What am I doing wrong that I can't find the file? The file is in the same folder as the script itself

Cannot import Interaction Type Discord Components

I have done the import of Discord-Components via pip install --upgrade discord-componentsin Shell
And then I imported into the code (line of import from discord_components import DiscordComponents, Button, ButtonStyle, InteractionType)
but I cannot import InteractionType???
File "main.py", line 19, in <module>
from discord_components import DiscordComponents, Button, ButtonStyle, InteractionType
ImportError: cannot import name 'InteractionType' from 'discord_components' (/opt/virtualenvs/python3/lib/python3.8/site-packages/discord_components/__init__.py)
InteractionType is removed in 2.0.0 as stated in the latest (gitbook) docs.
Docs link: https://devkiki7000.gitbook.io/discord-components/change-log/2.0.0#breaking-changes
Use integers to replace them.
ChannelMessageWithSource = 4
DeferredChannelMessageWithSource = 5
DeferredUpdateMessage = 6

PySpark: How do I solve 'python worker failed to connect back' error when using pyproj package in Pandas UDF? (Converting lat/long to UTM coordinates)

I have a json file with lat/long coordinates, which I try to convert to UTM ("x", "y") in PySpark.
The .json file looks like this:
{"positionmessage":{"latitude": 51.822872161865234,"longitude": 4.905614852905273}}
{"positionmessage":{"latitude": 51.819644927978516, "longitude": 4.961687088012695}}
I read the json file in pyspark and try to convert to UTM ('x', 'y'-coord) in PySpark with the following script:
import numpy as np
from pyspark.sql import SparkSession
from pyspark.sql.types import ArrayType, StructField, StructType, StringType, IntegerType, DateType, FloatType, TimestampType, DoubleType
from pyspark.sql.functions import *
appName = "PySpark"
master = "local"
file_name = "lat_lon.JSON"
# Create Spark session
spark = SparkSession.builder \
.appName(appName) \
.master(master) \
.getOrCreate()
schema = StructType([
StructField("positionmessage",
StructType([
StructField('latitude', DoubleType(), True),
StructField('longitude', DoubleType(), True),
]))])
df = spark.read.schema(schema).json(file_name).select("positionmessage.*")
Until here no problem; the problem arises when I try to convert to UTM coordinates using the pyproj package (which worked in Pandas).
from pyspark.sql.functions import array, pandas_udf, PandasUDFType
from pyproj import Proj
from pandas import Series
# using decorator 'pandas_udf' to wrap the function.
#pandas_udf('array<double>', PandasUDFType.SCALAR)
def get_utm(x):
pp = Proj(proj='utm',zone=31,ellps='WGS84', preserve_units=False)
return Series([ pp(e[0], e[1]) for e in x ])
df = df.withColumn('utm', get_utm(array('longitude','latitude'))) \
.selectExpr("*", "utm[0] as X", "utm[1] as Y")
df.show()
I get the problem: " python worker failed to connect back", but there does not seem to be a problem with the code itself. What can the problem be?
You can use a plain UDF rather than Pandas UDF:
#udf(returnType=ArrayType(DoubleType()))
def get_utm(long, lat):
pp = Proj(proj='utm', zone=31, ellps='WGS84', preserve_units=False)
return pp(long, lat)
result = df.withColumn('utm', get_utm('longitude','latitude')).selectExpr("*", "utm[0] as X", "utm[1] as Y")

Resources