how can i make this script suitable for converting excel files with more than one sheet inside? - performance

import pandas as pd
from xlsx2csv import Xlsx2csv
from io import StringIO
def read_excel(path: str, sheet_name: str) -> pd.DataFrame:
buffer = StringIO() #to read and
Xlsx2csv(path, outputencoding="utf-8", sheet_name=sheet_name).convert(buffer)
buffer.seek(0)
df = pd.read_csv(buffer)
return df
how can i make this script suitable for converting excel files with more than one sheet inside? It works only for xlsx file with one sheet at the moment...

Do you really need to use xlsx2csv module? If not, you could try this with Pandas.
import pandas as pd
for sheet in ['Sheet1', 'Sheet2']:
df = pd.read_excel('sample.xlsx', sheetname=sheet)

Related

How to convert yolo annotations to coco format. Json?

I want to convert my labels in yolo format to coco format
I have tried
https://github.com/Taeyoung96/Yolo-to-COCO-format-converter
And
Pylabel
They all have a bugs.
I want to train on detectron 2 but it fails to load the dataset because of the wrong json file.
Thanks everybody
Could you try with this tool (disclaimer: I'm the author)? It is not (yet) a Python package so you need to downloads the repo first. This should ressemble something like:
from ObjectDetectionEval import *
from pathlib import Path
def main() -> None:
path = Path("/path/to/annotations/") # Where the .txt files are
names_file = Path("/path/to/classes.names")
save_file = Path("coco.json")
annotations = AnnotationSet.from_yolo(gts_path).map_labels(names)
# If you need to change the labels
# names = Annotation.parse_names_file(names_file)
# annotations.map_labels(names)
annotations.save_coco(save_file)
if __name__ == "__main__":
main()
If you need more control (coordinate format, images location and extension, etc.) you should use the more generic AnnotationSet.from_txt(). If it does not suit your needs you can easily implement your own parser using AnnotationSet.from_folder().

How do I create a prefetch dataset from a folder of images?

I am trying to input a dataset from Kaggle into this notebook from the Tensorflow docs in order to train a CycleGAN model. My current approach is to download the folders into my notebook and loop through the paths of each image and use cv2.imread(path) to add the uint8 image data to a list. But this doesn't work and I know my current approach is wrong because the code provided by google requires a Prefetch dataset.
Here's my current code (excluding the opencv part)
import os
# specify the img directory path
art_path = "/content/abstract-art-gallery/Abstract_gallery/Abstract_gallery/"
land_path = "/content/landscape-pictures/"
def grab_path(folder, i_count=100):
res = []
for file in range(i_count):
if os.listdir(folder)[0].endswith(('.jpg', '.png', 'jpeg')):
img_path = folder + os.listdir(folder)[0]
res.append(img_path)
return res
art_path, land_path = grab_path(art_path), grab_path(land_path)
print(art_path)
print(land_path)
The error in the code comes here:
train_horses = train_horses.cache().map(
preprocess_image_train, num_parallel_calls=AUTOTUNE).shuffle(
BUFFER_SIZE).batch(BATCH_SIZE)
Is there a simpler approach to this problem?
import pathlib
import tensorflow as tf
import numpy as np
#tf.autograph.experimental.do_not_convert
def read_image(path):
image_string = tf.io.read_file(path)
image = DataUtils.decode_image(image_string,(image_size))
return image
AUTO = tf.data.experimental.AUTOTUNE
paths = np.array([x for x in pathlib.Path(IMAGE_PATHS_DIR).rglob('*.jpg')])
dataset = tf.data.Dataset.from_tensor_slices((paths.astype(str)))
dataset = dataset.map(self.read_image)
dataset = dataset.shuffle(2048)
dataset = dataset.prefetch(AUTOTUNE)

How to get text file data into table format using pandas in jupyter notebook?

I have a data like this in a text file
subject:
'math','science','physics','chemistry'
marks:
'75','62','92','90'
How to print this data in table fomat usingpandas in jupyternote book ?
Just read all data and individually remove ', like so:
from io import StringIO
df_marks = pd.read_csv(StringIO("""'math','science','physics','chemistry'
'75','62','92','90'"""))
df_marks.columns = df_marks.columns.str.strip("'")
df_marks = df_marks.apply(lambda x: x.str.strip("'"))
df_marks

reading multiple images in python

I want to read multiple images in python I'm using this code but when I run it,nothing happens.
Could you tell me what is the problem?
import glob , cv2
import numpy as np
def read_img(img_list , img):
n=cv2.imread(img)
img_list.append(n)
return img_list
path = glob.glob("02291G0AR/*.bmp")
list_ = []
cv_image = [read_img(list_,img) for img in path]
print(cv_image)
"02291G0AR" is the folder where my images are save in. and it's near my code file
Perhaps it's just a matter of the print function not being the adequate one to use. I'd try:
for img in cv_image:
cv2.imshow('image',img)

how to read image from wand.image.Image without saving it to drive

what changes should i do in this code so i don't have to save image to disk in step [A] then again read it from disk in step [B]. as showing in code. can anyone help me this with changes in the code or some tips?
import io
import os
import six
from google.cloud import vision
from google.cloud import translate
from google.cloud.vision import types
import json
from wand.image import Image
client = vision.ImageAnnotatorClient()
sample_pdf = Image(filename='CMB72_CMB0720160.pdf[0]', resolution=500)
blank = Image(filename='Untitled.png')
all_ = sample_pdf.clone()
polling_ = sample_pdf.clone()
voters = sample_pdf.clone()
all_.crop(3000,2800,3800,3860)
polling_.crop(870,4330,2900,4500)
voters.crop(1300,4980,2000,5250)
blank.composite(all_,left=0,top=0)
blank.composite(voters,left=0,top=1100)
blank.composite(polling_,left=0,top=1420)
blank.save('CMB72_CMB0720122.jpg')---------------[A]
file_name = 'CMB72_CMB0720122.jpg'-------------|
with io.open(file_name,'rb') as image_file:----|>[B]
content = image_file.read()---------------|
image = types.Image(content= content)
image_context = vision.types.ImageContext(
language_hints=['hi'])
response = client.document_text_detection(image=image)
texts = response.text_annotations
file = open('jin.txt','w+',encoding='utf-8')
file.write(texts[0].description)
file.close()
Use the wand.image.Image.make_blob method.
content = blank.make_blob('JPEG')

Resources