I have a custom dataset which I am using to write a popmon report to S3 bucket
class ReportDataset(AbstractVersionedDataSet):
def __init__(self, filepath: str, version: Version = None, credentials: Dict[str, Any] = None):
_credentials = deepcopy(credentials) or {}
protocol, path = get_protocol_and_path(filepath)
self._protocol = protocol
self._fs = fsspec.filesystem(self._protocol, **_credentials)
super().__init__(
filepath=PurePosixPath(path),
version=version,
exists_function=self._fs.exists,
glob_function=self._fs.glob, )
def _load(self):
raise DataSetError("Write Only Datatset")
def _save(self, data) -> None:
"""Saves data to the specified filepath."""
save_path = get_filepath_str(self._get_save_path(), self._protocol)
save_dir = Path(save_path).parent
save_dir.mkdir(parents=True, exist_ok=True)
with open(save_path, "w+") as file:
file.write(data.to_html())
And getting below error:-
raise VersionNotFoundError(f"Did not find any versions for {self}")
kedro.io.core.VersionNotFoundError:Did not find any versions for ReportDataset(filepath=,protocol=s3, version=Version(load=None, save='2022-04-20T16.45.05.872Z'))
With the same code I am able to write to local folder location. I am using kedro==0.17.4
Any suggestions?
Related
I'm trying to upload videos to youtube using Django and MSSQL, I want to store the user data to DB so that I can log in from multiple accounts and post videos.
The official documentation provided by youtube implements a file system and after login, all the user data gets saved there, I don't want to store any data in a file as saving files to DB would be a huge risk and not a good practice. So how can I bypass this step and save data directly to DB and retrieve it when I want to post videos to a specific account?
In short, I want to replace the pickle file implementation with storing it in the database.
Here's my code
def youtubeAuthenticate():
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
api_service_name = "youtube"
api_version = "v3"
client_secrets_file = "client_secrets.json"
creds = None
# the file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first time
if os.path.exists("token.pickle"):
with open("token.pickle", "rb") as token:
creds = pickle.load(token)
# if there are no (valid) credentials availablle, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(client_secrets_file, SCOPES)
creds = flow.run_local_server(port=0)
# save the credentials for the next run
with open("token.pickle", "wb") as token:
pickle.dump(creds, token)
return build(api_service_name, api_version, credentials=creds)
#api_view(['GET','POST'])
def postVideoYT(request):
youtube = youtubeAuthenticate()
print('yt',youtube)
try:
initialize_upload(youtube, request.data)
except HttpError as e:
print("An HTTP error %d occurred:\n%s" % (e.resp.status, e.content))
return Response("Hello")
def initialize_upload(youtube, options):
print('options', options)
print("title", options['title'])
# tags = None
# if options.keywords:
# tags = options.keywords.split(",")
body=dict(
snippet=dict(
title=options['title'],
description=options['description'],
tags=options['keywords'],
categoryId=options['categoryId']
),
status=dict(
privacyStatus=options['privacyStatus']
)
)
# # Call the API's videos.insert method to create and upload the video.
insert_request = youtube.videos().insert(
part=",".join(body.keys()),
body=body,
media_body=MediaFileUpload(options['file'], chunksize=-1, resumable=True)
)
path = pathlib.Path(options['file'])
ext = path.suffix
getSize = os.path.getsize(options['file'])
resumable_upload(insert_request,ext,getSize)
# This method implements an exponential backoff strategy to resume a
# failed upload.
def resumable_upload(insert_request, ext, getSize):
response = None
error = None
retry = 0
while response is None:
try:
print("Uploading file...")
status, response = insert_request.next_chunk()
if response is not None:
respData = response
if 'id' in response:
print("Video id '%s' was successfully uploaded." % response['id'])
else:
exit("The upload failed with an unexpected response: %s" % response)
except HttpError as e:
if e.resp.status in RETRIABLE_STATUS_CODES:
error = "A retriable HTTP error %d occurred:\n%s" % (e.resp.status,
e.content)
else:
raise
except RETRIABLE_EXCEPTIONS as e:
error = "A retriable error occurred: %s" % e
if error is not None:
print(error)
retry += 1
if retry > MAX_RETRIES:
exit("No longer attempting to retry.")
max_sleep = 2 ** retry
sleep_seconds = random.random() * max_sleep
print("Sleeping %f seconds and then retrying..." % sleep_seconds)
time.sleep(sleep_seconds)
'''odoo'''
this is the odoo http.py file where i try to modify the code for session logout
def session_gc(session_store):
if random.random() < 0.001: ###!!! 0.001:
# we keep session one week
last_week = time.time() - 601 ###!!! 6060247
for fname in os.listdir(session_store.path):
path = os.path.join(session_store.path, fname)
try:
if os.path.getmtime(path) < last_week:
os.unlink(path)
except OSError:
pass
Create a py file eg session.py and add the following code:
import psycopg2
import os
import json
import random
import werkzeug.contrib.sessions
import time
import odoo
from odoo import http
from odoo.tools.func import lazy_property
def with_cursor(func):
def wrapper(self, *args, **kwargs):
tries = 0
while True:
tries += 1
try:
return func(self, *args, **kwargs)
except psycopg2.InterfaceError as e:
if tries>4:
raise e
self._open_connection()
return wrapper
class PGSessionStore(werkzeug.contrib.sessions.SessionStore):
# FIXME This class is NOT thread-safe. Only use in worker mode
def __init__(self, uri, session_class=None):
super(PGSessionStore, self).__init__(session_class)
self._uri = uri
self._open_connection()
self._setup_db()
def __del__(self):
self._cr.close()
def _open_connection(self):
cnx = odoo.sql_db.db_connect(self._uri, allow_uri=True)
self._cr = cnx.cursor()
self._cr.autocommit(True)
#with_cursor
def _setup_db(self):
self._cr.execute("""
CREATE TABLE IF NOT EXISTS http_sessions (
sid varchar PRIMARY KEY,
write_date timestamp without time zone NOT NULL,
payload text NOT NULL
)
""")
#with_cursor
def save(self, session):
payload = json.dumps(dict(session))
self._cr.execute("""
INSERT INTO http_sessions(sid, write_date, payload)
VALUES (%(sid)s, now() at time zone 'UTC', %(payload)s)
ON CONFLICT (sid)
DO UPDATE SET payload = %(payload)s,
write_date = now() at time zone 'UTC'
""", dict(sid=session.sid, payload=payload))
#with_cursor
def delete(self, session):
self._cr.execute("DELETE FROM http_sessions WHERE sid=%s", [session.sid])
#with_cursor
def get(self, sid):
self._cr.execute("UPDATE http_sessions SET write_date = now() at time zone 'UTC' WHERE sid=%s", [sid])
self._cr.execute("SELECT payload FROM http_sessions WHERE sid=%s", [sid])
try:
data = json.loads(self._cr.fetchone()[0])
except Exception:
return self.new()
return self.session_class(data, sid, False)
#with_cursor
def gc(self):
self._cr.execute(
"DELETE FROM http_sessions WHERE now() at time zone 'UTC' - write_date > '2 hours'"
)
def session_gc(session_store):
"""
Global cleaning of sessions using either the standard way (delete session files),
Or the DB way.
"""
if random.random() < 0.001:
# we keep session two hours
if hasattr(session_store, 'gc'):
session_store.gc()
return
two_hours = time.time() - 60*60*2
for fname in os.listdir(session_store.path):
path = os.path.join(session_store.path, fname)
try:
if os.path.getmtime(path) < two_hours:
os.unlink(path)
except OSError:
pass
class Root(http.Root):
#lazy_property
def session_store(self):
"""
Store sessions in DB rather than on FS if parameter permit so
"""
# Setup http sessions
session_db = odoo.tools.config.get('session_db')
if session_db:
return PGSessionStore(session_db, session_class=http.OpenERPSession)
path = odoo.tools.config.session_dir
return werkzeug.contrib.sessions.FilesystemSessionStore(path, session_class=http.OpenERPSession)
http.session_gc = session_gc
http.root = Root()
The following code will inherit from the http file in odoo module. The configured time for timeout is 2 hours and can be changed within the session_gc function.
I am new to AWS Sagemaker. I have custom CV PyTorch model locally and deployed it to Sagemaker endpoint. I used custom inference.py code to define model_fn, input_fn, output_fn and predict_fn methods. So, I'm able to generate predictions on json input, which contains url to the image, the code is quite straigtforward:
def input_fn(request_body, content_type='application/json'):
logging.info('Deserializing the input data...')
image_transform = transforms.Compose([
transforms.Resize(size=(224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
if content_type:
if content_type == 'application/json':
input_data = json.loads(request_body)
url = input_data['url']
logging.info(f'Image url: {url}')
image_data = Image.open(requests.get(url, stream=True).raw)
return image_transform(image_data)
raise Exception(f'Requested unsupported ContentType in content_type {content_type}')
Then I am able to invoke endpoint with code:
client = boto3.client('runtime.sagemaker')
inp = {"url":url}
inp = json.loads(json.dumps(inp))
response = client.invoke_endpoint(EndpointName='ENDPOINT_NAME',
Body=json.dumps(inp),
ContentType='application/json')
The problem is, I see, that locally url request return slightly different image array comparing to the one on Sagemaker. Which is why on the same URL I obtain slightly different predictions. To check that at least model weights are the same I want to generate predictions on image itself, downloaded locally and to Sagemaker. But I fail trying to put image as input to endpoint. E.g.:
def input_fn(request_body, content_type='application/json'):
logging.info('Deserializing the input data...')
image_transform = transforms.Compose([
transforms.Resize(size=(224, 224)),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
if content_type == 'application/x-image':
image_data = request_body
return image_transform(image_data)
raise Exception(f'Requested unsupported ContentType in content_type {content_type}')
Invoking endpoint I experience the error:
ParamValidationError: Parameter validation failed:
Invalid type for parameter Body, value: {'img': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=630x326 at 0x7F78A61461D0>}, type: <class 'dict'>, valid types: <class 'bytes'>, <class 'bytearray'>, file-like object
Does anybody know how to generate Sagemaker predictions by Pytorch model on images?
As always, after asking I found a solution. Actually, as the error suggested, I had to convert input to bytes or bytearray. For those who may need the solution:
from io import BytesIO
img = Image.open(open(PATH, 'rb'))
img_byte_arr = BytesIO()
img.save(img_byte_arr, format=img.format)
img_byte_arr = img_byte_arr.getvalue()
client = boto3.client('runtime.sagemaker')
response = client.invoke_endpoint(EndpointName='ENDPOINT_NAME
Body=img_byte_arr,
ContentType='application/x-image')
response_body = response['Body']
print(response_body.read())
want to get file name from cloud watch log. which I've uploaded in s3 bucket. But it gives me Key error 'Records' I check in logs as well. Everytning in my code is according to logs event.
here is my code
def lambda_handler(event, context):
s3 = boto3.client('s3')
if event:
print("Event:", event)
for Records in event["Records"]:
file_obj = event["Records"][0]["s3"]["object"]["key"]
print("FileObj", file_obj)
filename = str(file_obj['s3']['object']['key'])
print("Filename:", filename)
fileObj = s3.get_object(Bucket = "prcbucket", key=filename)
print("FileObj", fileObj)
The following should be sufficient to retrieve the key
def lambda_handler(event, context):
key = event['Records'][0]['s3']['object']['key']
print key
I'm working on a GStreamer-based program using Python and the GObject introspection bindings. I'm trying to build this pipeline:
videomixer name=mix ! autovideosink \
uridecodebin uri=v4l2:///dev/video0 ! mix.
The pipeline works perfectly using gst-launch-1.0, but my Python program gives the errors:
(minimal.py:12168): GStreamer-CRITICAL **: gst_element_link_pads_full: assertion 'GST_IS_ELEMENT (src)' failed
on_error(): (GError('Internal data flow error.',), 'gstbasesrc.c(2865): gst_base_src_loop (): /GstPipeline:pipeline0/GstURIDecodeBin:uridecodebin0/GstV4l2Src:source:\nstreaming task paused, reason not-linked (-1)')
My code:
#!/usr/bin/python3
import gi
gi.require_version('Gst', '1.0')
from gi.repository import GObject, Gst, Gtk, GdkX11, GstVideo
GObject.threads_init()
Gst.init(None)
class Source:
def __init__(self, uri, pipeline, mixer):
self.uri = uri
self.pipeline = pipeline
self.mixer = mixer
self.src = Gst.ElementFactory.make('uridecodebin', None)
self.pipeline.add(self.src)
self.src.set_property('uri', uri)
self.src.connect('pad-added', self.on_pad_added, self.src, self.mixer)
def on_pad_added(self, element, pad, src, dest):
name = pad.query_caps(None).to_string()
print('on_pad_added:', name)
if name.startswith('video/'):
src.link(dest)
class Main:
def __init__(self):
self.window = Gtk.Window()
self.window.connect('destroy', self.quit)
self.window.set_default_size(1280, 720)
self.drawingarea = Gtk.DrawingArea()
self.window.add(self.drawingarea)
self.pipeline = Gst.Pipeline()
self.bus = self.pipeline.get_bus()
self.bus.add_signal_watch()
self.bus.connect('message::error', self.on_error)
self.bus.enable_sync_message_emission()
self.bus.connect('sync-message::element', self.on_sync_message)
self.mixer = Gst.ElementFactory.make('videomixer', None)
self.sink = Gst.ElementFactory.make('autovideosink', None)
self.pipeline.add(self.mixer)
self.pipeline.add(self.sink)
self.mixer.link(self.sink)
video = Source('v4l2:///dev/video0', self.pipeline, self.mixer)
def run(self):
self.window.show_all()
# You need to get the XID after window.show_all(). You shouldn't get it
# in the on_sync_message() handler because threading issues will cause
# segfaults there.
self.xid = self.drawingarea.get_property('window').get_xid()
self.pipeline.set_state(Gst.State.PLAYING)
Gtk.main()
def quit(self, window):
self.pipeline.set_state(Gst.State.NULL)
Gtk.main_quit()
def on_sync_message(self, bus, msg):
if msg.get_structure().get_name() == 'prepare-window-handle': msg.src.set_window_handle(self.xid)
def on_error(self, bus, msg):
print('on_error():', msg.parse_error())
main = Main()
main.run()
I figured out the problem, I was linking the dynamically-created pad incorrectly:
src.link(dest)
Should have been:
pad.link(dest.get_compatible_pad(pad, None))
If the element is not added with the pipeline, then this error will occur. Ensure that the problematic element is added with the pipeline.