VersionNotFoundError when writing S3 bucket from a Custom Dataset - kedro

I have a custom dataset which I am using to write a popmon report to S3 bucket
class ReportDataset(AbstractVersionedDataSet):
def __init__(self, filepath: str, version: Version = None, credentials: Dict[str, Any] = None):
_credentials = deepcopy(credentials) or {}
protocol, path = get_protocol_and_path(filepath)
self._protocol = protocol
self._fs = fsspec.filesystem(self._protocol, **_credentials)
super().__init__(
filepath=PurePosixPath(path),
version=version,
exists_function=self._fs.exists,
glob_function=self._fs.glob, )
def _load(self):
raise DataSetError("Write Only Datatset")
def _save(self, data) -> None:
"""Saves data to the specified filepath."""
save_path = get_filepath_str(self._get_save_path(), self._protocol)
save_dir = Path(save_path).parent
save_dir.mkdir(parents=True, exist_ok=True)
with open(save_path, "w+") as file:
file.write(data.to_html())
And getting below error:-
raise VersionNotFoundError(f"Did not find any versions for {self}")
kedro.io.core.VersionNotFoundError:Did not find any versions for ReportDataset(filepath=,protocol=s3, version=Version(load=None, save='2022-04-20T16.45.05.872Z'))
With the same code I am able to write to local folder location. I am using kedro==0.17.4
Any suggestions?

Related

How to save user data to database instead of a pickle or a json file when trying to post videos on YouTube using Django and data v3 api

I'm trying to upload videos to youtube using Django and MSSQL, I want to store the user data to DB so that I can log in from multiple accounts and post videos.
The official documentation provided by youtube implements a file system and after login, all the user data gets saved there, I don't want to store any data in a file as saving files to DB would be a huge risk and not a good practice. So how can I bypass this step and save data directly to DB and retrieve it when I want to post videos to a specific account?
In short, I want to replace the pickle file implementation with storing it in the database.
Here's my code
def youtubeAuthenticate():
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
client_secrets_file = "client_secrets.json"
creds = None
# the file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first time
if os.path.exists("token.pickle"):
with open("token.pickle", "rb") as token:
creds = pickle.load(token)
# if there are no (valid) credentials availablle, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(client_secrets_file, SCOPES)
creds = flow.run_local_server(port=0)
# save the credentials for the next run
with open("token.pickle", "wb") as token:
pickle.dump(creds, token)
return build(api_service_name, api_version, credentials=creds)
#api_view(['GET','POST'])
def postVideoYT(request):
youtube = youtubeAuthenticate()
print('yt',youtube)
try:
initialize_upload(youtube, request.data)
except HttpError as e:
print("An HTTP error %d occurred:\n%s" % (e.resp.status, e.content))
return Response("Hello")
def initialize_upload(youtube, options):
print('options', options)
print("title", options['title'])
# tags = None
# if options.keywords:
# tags = options.keywords.split(",")
body=dict(
snippet=dict(
title=options['title'],
description=options['description'],
tags=options['keywords'],
categoryId=options['categoryId']
),
status=dict(
privacyStatus=options['privacyStatus']
)
)
# # Call the API's videos.insert method to create and upload the video.
insert_request = youtube.videos().insert(
part=",".join(body.keys()),
body=body,
media_body=MediaFileUpload(options['file'], chunksize=-1, resumable=True)
)
path = pathlib.Path(options['file'])
ext = path.suffix
getSize = os.path.getsize(options['file'])
resumable_upload(insert_request,ext,getSize)
# This method implements an exponential backoff strategy to resume a
# failed upload.
def resumable_upload(insert_request, ext, getSize):
response = None
error = None
retry = 0
while response is None:
try:
print("Uploading file...")
status, response = insert_request.next_chunk()
if response is not None:
respData = response
if 'id' in response:
print("Video id '%s' was successfully uploaded." % response['id'])
else:
exit("The upload failed with an unexpected response: %s" % response)
except HttpError as e:
if e.resp.status in RETRIABLE_STATUS_CODES:
error = "A retriable HTTP error %d occurred:\n%s" % (e.resp.status,
e.content)
else:
raise
except RETRIABLE_EXCEPTIONS as e:
error = "A retriable error occurred: %s" % e
if error is not None:
print(error)
retry += 1
if retry > MAX_RETRIES:
exit("No longer attempting to retry.")
max_sleep = 2 ** retry
sleep_seconds = random.random() * max_sleep
print("Sleeping %f seconds and then retrying..." % sleep_seconds)
time.sleep(sleep_seconds)

How do I logout inactive users session in odoo14?

'''odoo'''
this is the odoo http.py file where i try to modify the code for session logout
def session_gc(session_store):
if random.random() < 0.001: ###!!! 0.001:
# we keep session one week
last_week = time.time() - 601 ###!!! 6060247
for fname in os.listdir(session_store.path):
path = os.path.join(session_store.path, fname)
try:
if os.path.getmtime(path) < last_week:
os.unlink(path)
except OSError:
pass
Create a py file eg session.py and add the following code:
import psycopg2
import os
import json
import random
import werkzeug.contrib.sessions
import time
import odoo
from odoo import http
from odoo.tools.func import lazy_property
def with_cursor(func):
def wrapper(self, *args, **kwargs):
tries = 0
while True:
tries += 1
try:
return func(self, *args, **kwargs)
except psycopg2.InterfaceError as e:
if tries>4:
raise e
self._open_connection()
return wrapper
class PGSessionStore(werkzeug.contrib.sessions.SessionStore):
# FIXME This class is NOT thread-safe. Only use in worker mode
def __init__(self, uri, session_class=None):
super(PGSessionStore, self).__init__(session_class)
self._uri = uri
self._open_connection()
self._setup_db()
def __del__(self):
self._cr.close()
def _open_connection(self):
cnx = odoo.sql_db.db_connect(self._uri, allow_uri=True)
self._cr = cnx.cursor()
self._cr.autocommit(True)
#with_cursor
def _setup_db(self):
self._cr.execute("""
CREATE TABLE IF NOT EXISTS http_sessions (
sid varchar PRIMARY KEY,
write_date timestamp without time zone NOT NULL,
payload text NOT NULL
)
""")
#with_cursor
def save(self, session):
payload = json.dumps(dict(session))
self._cr.execute("""
INSERT INTO http_sessions(sid, write_date, payload)
VALUES (%(sid)s, now() at time zone 'UTC', %(payload)s)
ON CONFLICT (sid)
DO UPDATE SET payload = %(payload)s,
write_date = now() at time zone 'UTC'
""", dict(sid=session.sid, payload=payload))
#with_cursor
def delete(self, session):
self._cr.execute("DELETE FROM http_sessions WHERE sid=%s", [session.sid])
#with_cursor
def get(self, sid):
self._cr.execute("UPDATE http_sessions SET write_date = now() at time zone 'UTC' WHERE sid=%s", [sid])
self._cr.execute("SELECT payload FROM http_sessions WHERE sid=%s", [sid])
try:
data = json.loads(self._cr.fetchone()[0])
except Exception:
return self.new()
return self.session_class(data, sid, False)
#with_cursor
def gc(self):
self._cr.execute(
"DELETE FROM http_sessions WHERE now() at time zone 'UTC' - write_date > '2 hours'"
)
def session_gc(session_store):
"""
Global cleaning of sessions using either the standard way (delete session files),
Or the DB way.
"""
if random.random() < 0.001:
# we keep session two hours
if hasattr(session_store, 'gc'):
session_store.gc()
return
two_hours = time.time() - 60*60*2
for fname in os.listdir(session_store.path):
path = os.path.join(session_store.path, fname)
try:
if os.path.getmtime(path) < two_hours:
os.unlink(path)
except OSError:
pass
class Root(http.Root):
#lazy_property
def session_store(self):
"""
Store sessions in DB rather than on FS if parameter permit so
"""
# Setup http sessions
session_db = odoo.tools.config.get('session_db')
if session_db:
return PGSessionStore(session_db, session_class=http.OpenERPSession)
path = odoo.tools.config.session_dir
return werkzeug.contrib.sessions.FilesystemSessionStore(path, session_class=http.OpenERPSession)
http.session_gc = session_gc
http.root = Root()
The following code will inherit from the http file in odoo module. The configured time for timeout is 2 hours and can be changed within the session_gc function.

AWS Sagemaker custom PyTorch model inference on raw image input

I am new to AWS Sagemaker. I have custom CV PyTorch model locally and deployed it to Sagemaker endpoint. I used custom inference.py code to define model_fn, input_fn, output_fn and predict_fn methods. So, I'm able to generate predictions on json input, which contains url to the image, the code is quite straigtforward:
def input_fn(request_body, content_type='application/json'):
logging.info('Deserializing the input data...')
image_transform = transforms.Compose([
transforms.Resize(size=(224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
if content_type:
if content_type == 'application/json':
input_data = json.loads(request_body)
url = input_data['url']
logging.info(f'Image url: {url}')
image_data = Image.open(requests.get(url, stream=True).raw)
return image_transform(image_data)
raise Exception(f'Requested unsupported ContentType in content_type {content_type}')
Then I am able to invoke endpoint with code:
client = boto3.client('runtime.sagemaker')
inp = {"url":url}
inp = json.loads(json.dumps(inp))
response = client.invoke_endpoint(EndpointName='ENDPOINT_NAME',
Body=json.dumps(inp),
ContentType='application/json')
The problem is, I see, that locally url request return slightly different image array comparing to the one on Sagemaker. Which is why on the same URL I obtain slightly different predictions. To check that at least model weights are the same I want to generate predictions on image itself, downloaded locally and to Sagemaker. But I fail trying to put image as input to endpoint. E.g.:
def input_fn(request_body, content_type='application/json'):
logging.info('Deserializing the input data...')
image_transform = transforms.Compose([
transforms.Resize(size=(224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
if content_type == 'application/x-image':
image_data = request_body
return image_transform(image_data)
raise Exception(f'Requested unsupported ContentType in content_type {content_type}')
Invoking endpoint I experience the error:
ParamValidationError: Parameter validation failed:
Invalid type for parameter Body, value: {'img': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=630x326 at 0x7F78A61461D0>}, type: <class 'dict'>, valid types: <class 'bytes'>, <class 'bytearray'>, file-like object
Does anybody know how to generate Sagemaker predictions by Pytorch model on images?
As always, after asking I found a solution. Actually, as the error suggested, I had to convert input to bytes or bytearray. For those who may need the solution:
from io import BytesIO
img = Image.open(open(PATH, 'rb'))
img_byte_arr = BytesIO()
img.save(img_byte_arr, format=img.format)
img_byte_arr = img_byte_arr.getvalue()
client = boto3.client('runtime.sagemaker')
response = client.invoke_endpoint(EndpointName='ENDPOINT_NAME
Body=img_byte_arr,
ContentType='application/x-image')
response_body = response['Body']
print(response_body.read())

I want to pick the file name from event logs using python lamda function

want to get file name from cloud watch log. which I've uploaded in s3 bucket. But it gives me Key error 'Records' I check in logs as well. Everytning in my code is according to logs event.
here is my code
def lambda_handler(event, context):
s3 = boto3.client('s3')
if event:
print("Event:", event)
for Records in event["Records"]:
file_obj = event["Records"][0]["s3"]["object"]["key"]
print("FileObj", file_obj)
filename = str(file_obj['s3']['object']['key'])
print("Filename:", filename)
fileObj = s3.get_object(Bucket = "prcbucket", key=filename)
print("FileObj", fileObj)
The following should be sufficient to retrieve the key
def lambda_handler(event, context):
key = event['Records'][0]['s3']['object']['key']
print key

Gstreamer critical on Opencv3.0 with VideoCapture [duplicate]

I'm working on a GStreamer-based program using Python and the GObject introspection bindings. I'm trying to build this pipeline:
videomixer name=mix ! autovideosink \
uridecodebin uri=v4l2:///dev/video0 ! mix.
The pipeline works perfectly using gst-launch-1.0, but my Python program gives the errors:
(minimal.py:12168): GStreamer-CRITICAL **: gst_element_link_pads_full: assertion 'GST_IS_ELEMENT (src)' failed
on_error(): (GError('Internal data flow error.',), 'gstbasesrc.c(2865): gst_base_src_loop (): /GstPipeline:pipeline0/GstURIDecodeBin:uridecodebin0/GstV4l2Src:source:\nstreaming task paused, reason not-linked (-1)')
My code:
#!/usr/bin/python3
import gi
gi.require_version('Gst', '1.0')
from gi.repository import GObject, Gst, Gtk, GdkX11, GstVideo
GObject.threads_init()
Gst.init(None)
class Source:
def __init__(self, uri, pipeline, mixer):
self.uri = uri
self.pipeline = pipeline
self.mixer = mixer
self.src = Gst.ElementFactory.make('uridecodebin', None)
self.pipeline.add(self.src)
self.src.set_property('uri', uri)
self.src.connect('pad-added', self.on_pad_added, self.src, self.mixer)
def on_pad_added(self, element, pad, src, dest):
name = pad.query_caps(None).to_string()
print('on_pad_added:', name)
if name.startswith('video/'):
src.link(dest)
class Main:
def __init__(self):
self.window = Gtk.Window()
self.window.connect('destroy', self.quit)
self.window.set_default_size(1280, 720)
self.drawingarea = Gtk.DrawingArea()
self.window.add(self.drawingarea)
self.pipeline = Gst.Pipeline()
self.bus = self.pipeline.get_bus()
self.bus.add_signal_watch()
self.bus.connect('message::error', self.on_error)
self.bus.enable_sync_message_emission()
self.bus.connect('sync-message::element', self.on_sync_message)
self.mixer = Gst.ElementFactory.make('videomixer', None)
self.sink = Gst.ElementFactory.make('autovideosink', None)
self.pipeline.add(self.mixer)
self.pipeline.add(self.sink)
self.mixer.link(self.sink)
video = Source('v4l2:///dev/video0', self.pipeline, self.mixer)
def run(self):
self.window.show_all()
# You need to get the XID after window.show_all(). You shouldn't get it
# in the on_sync_message() handler because threading issues will cause
# segfaults there.
self.xid = self.drawingarea.get_property('window').get_xid()
self.pipeline.set_state(Gst.State.PLAYING)
Gtk.main()
def quit(self, window):
self.pipeline.set_state(Gst.State.NULL)
Gtk.main_quit()
def on_sync_message(self, bus, msg):
if msg.get_structure().get_name() == 'prepare-window-handle': msg.src.set_window_handle(self.xid)
def on_error(self, bus, msg):
print('on_error():', msg.parse_error())
main = Main()
main.run()
I figured out the problem, I was linking the dynamically-created pad incorrectly:
src.link(dest)
Should have been:
pad.link(dest.get_compatible_pad(pad, None))
If the element is not added with the pipeline, then this error will occur. Ensure that the problematic element is added with the pipeline.

Resources