I am trying to deploy a ML model using Streamlit and Pycaret on Heroku.
When I try deploying the app, I get the following error:
ModuleNotFoundError: No module named 'pycaret.internal'
Traceback:
File "/app/.heroku/python/lib/python3.6/site-packages/streamlit/ScriptRunner.py", line 322, in _run_script
exec(code, module.__dict__)
File "/app/Final.py", line 11, in <module>
tuned_cat=joblib.load('Cat.pkl')
File "/app/.heroku/python/lib/python3.6/site-packages/joblib/numpy_pickle.py", line 585, in load
obj = _unpickle(fobj, filename, mmap_mode)
File "/app/.heroku/python/lib/python3.6/site-packages/joblib/numpy_pickle.py", line 504, in _unpickle
obj = unpickler.load()
File "/app/.heroku/python/lib/python3.6/pickle.py", line 1050, in load
dispatch[key[0]](self)
File "/app/.heroku/python/lib/python3.6/pickle.py", line 1338, in load_global
klass = self.find_class(module, name)
File "/app/.heroku/python/lib/python3.6/pickle.py", line 1388, in find_class
__import__(module, level=0)
I have the following dependencies in the requirements.txt file:
pycaret==1.0.0
streamlit==0.58.0
I had to go with the pycaret version 1.0 because when I chose the latest version the slug size on Heroku would go beyond 500M and it would not deploy. Compiled slug size: 552M is too large (max is 500M).
Link to Github
Final.py:
import streamlit as st
import joblib
from pycaret.classification import *
tuned_cat=joblib.load('Cat.pkl')
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('CPU')
def run():
add_selectbox = st.sidebar.selectbox(
"What would you like to do?",
("Online Prediction","Batch Prediction"))
st.sidebar.info('This app is created to predict if the applicant should be granted a loan or not.')
st.title("Loan Prediction App")
if add_selectbox == 'Online Prediction':
gender = st.selectbox('Gender',['Female','Male'])
married = st.selectbox('Married',['No','Yes'])
depend = st.selectbox('Dependents',['0','1','2','3+'])
edu = st.selectbox('Education',['Graduate','Not Graduate'])
self = st.selectbox('Self Employed',['No','Yes'])
app_inc = st.number_input ('Applicant Income')
co_inc = st.number_input ('Coapplicant Income')
amt = st.number_input ('Loan Amount')
term = st.number_input ('Loan Amount Term')
credit = st.selectbox('Credit History',['0','1'])
prop_are = st.selectbox('Property Area',['Rural','Semiurban','Urban'])
output=""
test_df = pd.DataFrame()
test_df['Gender']= [gender]
test_df['Married']=[married]
test_df['Dependents']=[depend]
test_df['Education']=[edu]
test_df['Self_Employed']=[self]
test_df['ApplicantIncome']=[app_inc]
test_df['CoapplicantIncome']=[co_inc]
test_df['LoanAmount']=[amt]
test_df['Loan_Amount_Term']=[term]
test_df['Credit_History']=[credit]
test_df['Property_Area']=[prop_are]
if st.button("Predict"):
Cat_pred=predict_model(tuned_cat,data=test_df)['Label']
output = Cat_pred.values
if(output==0):
text="Rejected"
st.error(text)
elif(output==1):
text="Approved"
st.success(text)
if add_selectbox == 'Batch Prediction':
file_upload = st.file_uploader("Upload excel file for predictions", type=["xlsx"])
if file_upload is not None:
data = pd.read_excel(file_upload)
st.success('File uploaded successfully!')
Cat_pred=predict_model(tuned_cat,data=data)['Label']
data['Prediction']=Cat_pred
st.write(data)
st.markdown(get_table_download_link(data), unsafe_allow_html=True)
import base64
def get_table_download_link(df):
"""Generates a link allowing the data in a given panda dataframe to be downloaded
in: dataframe
out: href string
"""
csv = df.to_csv(index=False)
b64 = base64.b64encode(
csv.encode()
).decode() # some strings <-> bytes conversions necessary here
return f'Download csv file'
if __name__ == '__main__':
run()
Related
I'm trying to remove some directorys from my desktop.
The name of the directorys follow the format year+month (Ex.: 201808), i need to remove the directorys 2+ months before (Ex.: 201705, 201709, 201806).
Using the os.remove(path), the python return a error:
Traceback (most recent call last):
File "C:\Users\Usuario\Desktop\teste.py", line 36, in <module>
os.remove(caminhoPastaFinal)
PermissionError: [WinError 5] Acesso negado: 'C:\\Users\\Usuario\\Desktop\\Área de testes\\pasta1\\pasta2\\pasta3\\pasta4\\201712'
I tried to run the script as admin in the CMD, but i got the same error.
I'm using Windows 10.
Why i don't have permission to do that?
Follow the code:
import os
from datetime import *
def verificarNome(nomePasta):
mes=nomePasta[-2:]
ano=nomePasta[:-2]
if ano<anoAtual:
return True
elif mes<=mesAtual:
return True
return False
dataAtual = datetime.now()
anoAtual = str(dataAtual.year)
mesAtual = dataAtual.month
if mesAtual < 10:
mesAtual = "0"+str(mesAtual-2)
else:
mesAtual = str(mesAtual-2)
caminhoPai = 'C:\\Users\\Usuario\\Desktop\\Área de testes'
for caminhoPasta in os.listdir(caminhoPai): #Logo farei uma função recursiva que diminua esse código, mas ainda tenho que estudá-las
caminhoFilho1 = caminhoPai+"\\"+caminhoPasta
for caminhoPasta2 in os.listdir(caminhoFilho1):
caminhoFilho2 = caminhoFilho1+"\\"+caminhoPasta2
for caminhoPasta3 in os.listdir(caminhoFilho2):
caminhoFilho3 = caminhoFilho2+"\\"+caminhoPasta3
for caminhoPasta4 in os.listdir(caminhoFilho3):
caminhoFilho4 = caminhoFilho3+"\\"+caminhoPasta4
arrayPastasVerificar = os.listdir(caminhoFilho4)
for pastaFinal in arrayPastasVerificar:
if verificarNome(pastaFinal):
caminhoPastaFinal = caminhoFilho4+"\\"+pastaFinal
os.remove(caminhoPastaFinal)
import os
import shutil
import datetime
month = datetime.date.today().replace(day=1)
oneMonthBefore= (month - datetime.timedelta(days=1)).replace(day=1)
twoMonthBefore= (oneMonthBefore - datetime.timedelta(days=1)).replace(day=1)
root = 'C:\\Users\\Admin\\Desktop\\Directorys'
for path, dirs, files in os.walk(root):
for dir in dirs[:]:
try:
dirDate = datetime.datetime.strptime(dir, '%Y%m')
except ValueError:
continue
if dirDate < twoMonthBefore:
dirs.remove(dir)
shutil.rmtree(os.path.join(path, dir))
Here is what I have, I know this works without encryption and I can run
ansible-vault edit common.yml
with
ANSIBLE_VAULT_PASSWORD_FILE=~/.vault_pass.txt
set in the env.
from collections import namedtuple
from ansible.parsing.dataloader import DataLoader
from ansible.vars import VariableManager
from ansible.inventory import Inventory
from ansible.playbook import Playbook
from ansible.executor.playbook_executor import PlaybookExecutor
variable_manager = VariableManager()
loader = DataLoader()
inventory = Inventory(loader=loader, variable_manager=variable_manager, host_list='playbooks/hosts')
playbook_path = 'playbooks/' + PROJECT + '.yml'
Options = namedtuple('Options', ['connection', 'forks', 'become', 'become_method', 'become_user', 'check', 'listhosts', 'listtasks', 'listtags', 'syntax', 'module_path', 'vault_password_file'])
options = Options(connection='ssh', forks=5, become=None, become_method=None, become_user=None, check=False, listhosts=False, listtasks=False, listtags=False, syntax=False, module_path="", vault_password_file=os.environ['ANSIBLE_VAULT_PASSWORD_FILE'])
variable_manager.extra_vars = {'CAP_VERSION': CAP_VERSION, 'cluster': PROJECT + '-' + ENVIRONMENT, 'environ': ENVIRONMENT, 'rpm': rpmSource, 'VRSN': ARTI_BRANCH }
passwords = {}
pbex = PlaybookExecutor(playbooks=[playbook_path], inventory=inventory, variable_manager=variable_manager, loader=loader, options=options, passwords=passwords)
results = pbex.run()
It fails to decrypt the common.yml
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/opt/ansible/ansible/lib/ansible/executor/playbook_executor.py", line 125, in run
all_vars = self._variable_manager.get_vars(loader=self._loader, play=play)
File "/opt/ansible/ansible/lib/ansible/vars/__init__.py", line 304, in get_vars
data = preprocess_vars(loader.load_from_file(vars_file))
File "/opt/ansible/ansible/lib/ansible/parsing/dataloader.py", line 119, in load_from_file
(file_data, show_content) = self._get_file_contents(file_name)
File "/opt/ansible/ansible/lib/ansible/parsing/dataloader.py", line 178, in _get_file_contents
data = self._vault.decrypt(data, filename=b_file_name)
File "/opt/ansible/ansible/lib/ansible/parsing/vault/__init__.py", line 264, in decrypt
raise AnsibleError(msg)
ansible.errors.AnsibleError: Decryption failed on /ansible/playbooks/vars/common.yml
In ansible 2.2.2 (not sure about other versions since the API can change frequently):
You can manually set the password in the python script like so:
loader = DataLoader()
loader.set_vault_password('mypass')
Or you could load the password from your vault password file:
import os
loader = DataLoader()
with open('{}/.vault_pass.txt'.format(os.path.expanduser('~')), 'r') as file:
loader.set_vault_password(file.read().splitlines()[0])
You can skip importing os and just put in your absolute path to the .vault_pass.txt file.
If you are sure your ANSIBLE_VAULT_PASSWORD_FILE is set in env:
import os
loader = DataLoader()
with open(os.environ['ANSIBLE_VAULT_PASSWORD_FILE'], 'r') as file:
loader.set_vault_password(file.read().splitlines()[0])
I am new to multiprocessing in python.I am extracting some features from a list of 70,000 URLs. I have them from 2 different files. After the feature extraction process I pass the result to a list and then to a CSV file.
The code runs but then stops with the error.I tried to catch the error but it produced another one.
Python version = 3.5
from feature_extractor import Feature_extraction
import pandas as pd
from pandas.core.frame import DataFrame
import sys
from multiprocessing.dummy import Pool as ThreadPool
import threading as thread
from multiprocessing import Process,Manager,Array
import time
class main():
lst = None
def __init__(self):
manager = Manager()
self.lst = manager.list()
self.dostuff()
self.read_lst()
def feature_extraction(self,url):
if self.lst is None:
self.lst = []
features = Feature_extraction(url)
self.lst.append(features.get_features())
print(len(self.lst))
def Pool(self,url):
pool = ThreadPool(8)
results = pool.map(self.feature_extraction, url)
def dostuff(self):
df = pd.read_csv('verified_online.csv',encoding='latin-1')
df['label'] = df['phish_id'] * 0
mal_urls = df['url']
df2 = pd.read_csv('new.csv')
df2['label'] = df['phish_id']/df['phish_id']
ben_urls = df2['urls']
t = Process(target=self.Pool,args=(mal_urls,))
t2 = Process(target=self.Pool,args=(ben_urls,))
t.start()
t2.start()
t.join()
t2.join
def read_lst(self):
nw_df = DataFrame(list(self.lst))
nw_df.columns = ['Redirect count','ssl_classification','url_length','hostname_length','subdomain_count','at_sign_in_url','exe_extension_in_request_url','exe_extension_in_landing_url',
'ip_as_domain_name','no_of_slashes_in requst_url','no_of_slashes_in_landing_url','no_of_dots_in_request_url','no_of_dots_in_landing_url','tld_value','age_of_domain',
'age_of_last_modified','content_length','same_landing_and_request_ip','same_landing_and_request_url']
frames = [df['label'],df2['label']]
new_df = pd.concat(frames)
new_df = new_df.reset_index()
nw_df['label'] = new_df['label']
nw_df.to_csv('dataset.csv', sep=',', encoding='latin-1')
if __name__ == '__main__':
start_time = time.clock()
try:
main()
except BrokenPipeError:
print("broken pipe....")
pass
print (time.clock() - start_time, "seconds")
Error Traceback
Process Process-3:
Traceback (most recent call last):
File "F:\Continuum\Anaconda3\lib\multiprocessing\connection.py", line 312, in _recv_bytes
nread, err = ov.GetOverlappedResult(True)
BrokenPipeError: [WinError 109] The pipe has been ended
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "F:\Continuum\Anaconda3\lib\multiprocessing\process.py", line 249, in _bootstrap
self.run()
File "F:\Continuum\Anaconda3\lib\multiprocessing\process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "H:\Projects\newoproject\src\main.py", line 33, in Pool
results = pool.map(self.feature_extraction, url)
File "F:\Continuum\Anaconda3\lib\multiprocessing\pool.py", line 260, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "F:\Continuum\Anaconda3\lib\multiprocessing\pool.py", line 608, in get
raise self._value
File "F:\Continuum\Anaconda3\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "F:\Continuum\Anaconda3\lib\multiprocessing\pool.py", line 44, in mapstar
return list(map(*args))
File "H:\Projects\newoproject\src\main.py", line 26, in feature_extraction
self.lst.append(features.get_features())
File "<string>", line 2, in append
File "F:\Continuum\Anaconda3\lib\multiprocessing\managers.py", line 717, in _callmethod
kind, result = conn.recv()
File "F:\Continuum\Anaconda3\lib\multiprocessing\connection.py", line 250, in recv
buf = self._recv_bytes()
File "F:\Continuum\Anaconda3\lib\multiprocessing\connection.py", line 321, in _recv_bytes
raise EOFError
EOFError
My response is late and does not address the posted problem directly; but hopefully will provide a clue to others who encounter similar errors.
Errors that I encountered:
BrokenPipeError
WinError 109 The pipe has been ended &
WinError 232 The pipe is being closed
Observed with Python 36 on Windows 7, when:
(1) the same async function was submitted multiple times, each time with a different instance of a multiprocessing data store, a Queue in my case (multiprocessing.Manager().Queue())
AND
(2) the references to the Queues were saved in short-life local variables in the enveloping function.
The errors were occurring despite the fact that the Queues, shared with the successfully spawned and executing async-functions, had items and would still be in active use (put() & get()) at the time of exception.
The error consistently occurred when the same async_func was called the 2nd time with a 2nd instance of the Queue. Immediately after apply_async() of the function, the connection to the 1st Queue supplied to the async_func the 1st time, would get broken.
The issue got resolved when the references to the Queues were saved in non-overlapping (like a Queue-list) & longer-life variables (like variables returned to functions higher in the call-stack) in the enveloping function.
I finally understood example how to replace pickle with dill from the following discussion: pickle-dill.
For example, the following code worked for me
import os
import dill
import multiprocessing
def run_dill_encoded(what):
fun, args = dill.loads(what)
return fun(*args)
def apply_async(pool, fun, args):
return pool.apply_async(run_dill_encoded, (dill.dumps((fun, args)),))
if __name__ == '__main__':
pool = multiprocessing.Pool(5)
results = [apply_async(pool, lambda x: x*x, args=(x,)) for x in range(1,7)]
output = [p.get() for p in results]
print(output)
I tried to apply the same philosophy to pymongo. The following code
import os
import dill
import multiprocessing
import pymongo
def run_dill_encoded(what):
fun, args = dill.loads(what)
return fun(*args)
def apply_async(pool, fun, args):
return pool.apply_async(run_dill_encoded, (dill.dumps((fun, args)),))
def write_to_db(value_to_insert):
client = pymongo.MongoClient('localhost', 27017)
db = client['somedb']
collection = db['somecollection']
result = collection.insert_one({"filed1": value_to_insert})
client.close()
if __name__ == '__main__':
pool = multiprocessing.Pool(5)
results = [apply_async(pool, write_to_db, args=(x,)) for x in ['one', 'two', 'three']]
output = [p.get() for p in results]
print(output)
produces error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Python34\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "C:\...\temp2.py", line 10, in run_dill_encoded
return fun(*args)
File "C:\...\temp2.py", line 21, in write_to_db
client = pymongo.MongoClient('localhost', 27017)
NameError: name 'pymongo' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/.../temp2.py", line 32, in <module>
output = [p.get() for p in results]
File "C:/.../temp2.py", line 32, in <listcomp>
output = [p.get() for p in results]
File "C:\Python34\lib\multiprocessing\pool.py", line 599, in get
raise self._value
NameError: name 'pymongo' is not defined
Process finished with exit code 1
What is wrong?
As I mentioned in the comments, you need to put an import pymongo inside the function write_to_db. This is because when the function is serialized, it does not take along any of the global references with it when it is shipped to the other process space.
I have already looked at and tried the resolutions to this question that others have posted. One user said that to try and change my setup.py file from:
from distutils.core import setup
import py2exe
setup(console=["dev.py"])
to
from distutils.core import setup
import py2exe
setup(service=["dev.py"])
I got the following results:
running py2exe
*** searching for required modules ***
Traceback (most recent call last):
File "C:\Python27\Scripts\distutils-setup.py", line 5, in <module>
setup(service=["C:\Python27\Scripts\dev.py"])
File "C:\Python27\lib\distutils\core.py", line 152, in setup
dist.run_commands()
File "C:\Python27\lib\distutils\dist.py", line 953, in run_commands
self.run_command(cmd)
File "C:\Python27\lib\distutils\dist.py", line 972, in run_command
cmd_obj.run()
File "C:\Python27\lib\site-packages\py2exe\build_exe.py", line 243, in run
self._run()
File "C:\Python27\lib\site-packages\py2exe\build_exe.py", line 296, in _run
self.find_needed_modules(mf, required_files, required_modules)
File "C:\Python27\lib\site-packages\py2exe\build_exe.py", line 1274, in
find_needed_modules
mf.import_hook(mod)
File "C:\Python27\lib\site-packages\py2exe\mf.py", line 719, in import_hook
return Base.import_hook(self,name,caller,fromlist,level)
File "C:\Python27\lib\site-packages\py2exe\mf.py", line 136, in import_hook
q, tail = self.find_head_package(parent, name)
File "C:\Python27\lib\site-packages\py2exe\mf.py", line 204, in find_head_package
raise ImportError, "No module named " + qname
ImportError: No module named dev
Now, when I run py2exe with "console" in my setup script it works fine, but the service doesn't start and I get the error. When I run py2exe with "service" in my setup script py2exe doesn't run and tells me it can't find my module.
I have tried to re-install py2exe to no resolution. I have also tried to change:
def SvcDoRun(self):
servicemanager.LogMsg(servicemanager.EVENTLOG_INFORMATION_TYPE,
servicemanager.PYS_SERVICE_STARTED,
(self._svc_name_,''))
to
def SvcDoRun(self):
self.ReportServiceStatus(win32service.SERVICE_RUNNING)
win32event.WaitForSingleObject(self.hWaitStop, win32event.INFINITE)
Didn't make a difference either. CAN ANYONE HELP ME PLEASE? Here is what I am working on. It monitors a server and spits back a text file every 60 seconds which I use to monitor my servers at any given minute. Any help you guys and gals can give would be great.
import win32serviceutil
import win32service
import win32event
import servicemanager
import socket
import wmi
import _winreg
from time import sleep
import os
class SrvMonSvc (win32serviceutil.ServiceFramework):
_svc_name_ = "SrvMonSvc"
_svc_display_name_ = "Server Monitor"
def __init__(self,args):
win32serviceutil.ServiceFramework.__init__(self,args)
self.hWaitStop = win32event.CreateEvent(None,0,0,None)
socket.setdefaulttimeout(60)
def SvcStop(self):
self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
win32event.SetEvent(self.hWaitStop)
def SvcDoRun(self):
servicemanager.LogMsg(servicemanager.EVENTLOG_INFORMATION_TYPE,
servicemanager.PYS_SERVICE_STARTED,
(self._svc_name_,''))
self.main()
def main(self):
host = wmi.WMI(namespace="root/default").StdRegProv
try:
result, api = host.GetStringValue(
hDefKey = _winreg.HKEY_LOCAL_MACHINE,
sSubKeyName = "SOFTWARE\Server Monitor",
sValueName = "API")
if api == None:
raise Exception
else:
pass
except:
exit()
while 1 == 1:
with open("C:/test.txt", "wb") as b:
computer = wmi.WMI(computer="exsan100")
for disk in computer.Win32_LogicalDisk (DriveType=3):
name = disk.caption
size = round(float(disk.Size)/1073741824, 2)
free = round(float(disk.FreeSpace)/1073741824, 2)
used = round(float(size), 2) - round(float(free), 2)
for mem in computer.Win32_OperatingSystem():
a_mem = (int(mem.FreePhysicalMemory)/1024)
for me in computer.Win32_ComputerSystem():
t_mem = (int(me.TotalPhysicalMemory)/1048576)
u_mem = t_mem - a_mem
for cpu in computer.Win32_Processor():
load = cpu.LoadPercentage
print >>b, api
print >>b, name
print >>b, size
print >>b, used
print >>b, t_mem
print >>b, u_mem
print >>b, load
b.close()
date_list = []
stamp = time.strftime("%c",time.localtime(time.time()))
date_list.append(stamp)
name = re.sub(r"[^\w\s]", "",date_list[0])
os.rename("C:/test.txt", ("C:/%s.txt" % name))
try:
sleep(60.00)
except:
exit()
if __name__ == '__main__':
win32serviceutil.HandleCommandLine(SrvMonSvc)
Have you progressed from your original problem. I had similar problem with a python service and found out that it was missing DLLs since the 'System Path' (not the user path) was not complete.
Running pythonservice.exe with -debug from the command prompt was not a problem because it used correct PATH environment variable, but if your service is installed as a System service it's worth checking out if the System Path variable has all the paths for the required DLLs (MSVC, Python, System32). For me it was missing the python DLLs path, after that it worked again.