(Skip to hash-tags if in a hurry)
This program will only work if it ends on the image showing.
I want to use it as a function inside another looping program, but it will not work. It will display the stats of the Pokemon(p.whatever), but the image will not show. The image will show in IDLE Python 3.4, but not the terminal. I've been stuck on this for months.
Here is the program that works(in IDLE Python 3.4, not the terminal):
import pykemon
print('What are you looking for?')
askedpokemon = input()
pokemonInDatabase = False
while pokemonInDatabase == False:
pokemonInDatabase = True
try:
if ('1' in askedpokemon) or ('2' in askedpokemon) or ('3' in askedpokemon) or ('4' in askedpokemon) or ('5' in askedpokemon) or ('6' in askedpokemon) or ('7' in askedpokemon) or ('8' in askedpokemon) or ('9' in askedpokemon):
p = (pykemon.get(pokemon_id = askedpokemon))
else:
askedpokemon = askedpokemon.lower()
p = (pykemon.get(pokemon = askedpokemon))
#Turns askedpokemon into number
askedpokemon = p.resource_uri
askedpokemon = askedpokemon.replace('/api/v1/pokemon/',' ')
askedpokemon = askedpokemon.replace('/',' ')
askedpokemon = askedpokemon.strip()
except pykemon.exceptions.ResourceNotFoundError:
print(askedpokemon + " is not a valid Pokemon name or id number.")
print('Try another')
askedpokemon = input()
pokemonInDatabase = False
print (p)
pTypes = (p.types)
for key, value in pTypes.items() :
pTypes = str(key)
print (' Type: ' + pTypes)
print (' HP: ' + str(p.hp))
print (' Attack: ' + str(p.attack))
print ('Defense: ' + str(p.defense))
print (' Sp Atk: ' + str(p.sp_atk))
print (' Sp Def: ' + str(p.sp_def))
print (' Speed: ' + str(p.speed))
print ('Exp Yield: ' + str(p.exp))
#######################################################
import time
import urllib
import urllib.request
import tkinter as tk
root = tk.Tk()
url = "http://assets22.pokemon.com/assets/cms2/img/pokedex/full/526.png"
if len(askedpokemon) < 3:
if len(askedpokemon) == 2:
askedpokemon = ('0' + askedpokemon)
if len(askedpokemon) == 1:
askedpokemon = ('00' + askedpokemon)
url = url.replace('526', askedpokemon)
u = urllib.request.urlopen(url)
raw_data = u.read()
u.close()
import base64
b64_data = base64.encodestring(raw_data)
image = tk.PhotoImage(data=b64_data)
label = tk.Label(image=image)
label.pack()
##########################################################
Below is the working program with its modules.
https://drive.google.com/file/d/0B3Q4wQpL0nDUYWFFSjV3cUhXVWc/view?usp=sharing
Here is an mcve that illustrates the problem. Call the file tem.py.
import tkinter as tk
root = tk.Tk()
image = tk.PhotoImage(file='python.png')
label = tk.Label(image=image)
label.pack()
When you run in a terminal, this runs, but the root window closes after label.pack(), before you can see it. Either put root.mainloop() at the end of the code or run with python -i tem.py (as IDLE, in effect, does). The -i says to switch from batch to interactive mode after the end of the program instead of closing. IDLE does this so one can interact with the live program before it is closed.
Related
Have the python code with pytesseract & multiprocessing. When I start the code manually from PyCharm it works fine with any number of threads. When I start the code with Win Task Sheduler with 'threads=1' it works fine.
However if I start the code with Win Task Sheduler with 'threads=2' or more than 2, it finishes without processing the images and without any errors.
I've got log messages like this. Script starts but does nothing and there is no any errors in Win logs
2020-05-24 13:09:31,834;START
2020-05-24 13:09:31,834;threads: 2
2020-05-24 13:10:31,832;START
2020-05-24 13:10:31,832;threads: 2
2020-05-24 13:11:31,851;START
2020-05-24 13:11:31,851;threads: 2
Code
from PIL import Image
import pytesseract
from pytesseract import Output
import datetime
from glob import glob
import os
import multiprocessing as multiprocessing
import cv2
import logging
def loggerinit(name, filename, overwrite):
logger = logging.getLogger(name)
logger.setLevel(logging.INFO)
# create the logging file handler
fh = logging.FileHandler(filename, encoding = 'UTF-8')
formatter = logging.Formatter('%(asctime)s;%(message)s')
fh.setFormatter(formatter)
# add handler to logger object
logger.addHandler(fh)
return logger
def getfiles(dirname, mask):
return glob(os.path.join(dirname, mask))
def tess_file(fname):
img = cv2.imread(fname)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
im_for_T = Image.fromarray(img)
pytesseract.pytesseract.tesseract_cmd = 'C://Tesseract-OCR//tesseract.exe'
TESSDATA_PREFIX = 'C:/Program Files/Tesseract-OCR/tessdata'
try:
os.environ['OMP_THREAD_LIMIT'] = '1'
tess_data = pytesseract.image_to_osd(im_for_T, output_type=Output.DICT)
return fname, tess_data
except:
return fname, None
if __name__ == '__main__':
logger = loggerinit('tess', 'tess.log', False)
files = getfiles('Croped', '*.jpg')
t1 = datetime.datetime.now()
logger.info('START')
threads = 2
logger.info('threads: ' + str(threads))
p = multiprocessing.Pool(threads)
results = p.map(tess_file,files)
e = []
for r in results:
if type(r) == type(None):
e.append('OCR error: ' + r)
else:
print(r[0],". rotate: ",r[1]['rotate'])
p.close()
p.join()
t2 = datetime.datetime.now()
delta = (t2 - t1).total_seconds()
print('Total time: ', delta)
print('Files: ', len(files))
logger.info('Files: ' + str(len(files)))
logger.info('Stop.' + 'Total time: ' + str(delta))
# Print error if exist
for ee in e:
print(ee)
Whats wrong? How can I fix this issue?
I have used this website over a hundred times and it has helped me so much with my coding (in python, arduino, terminal commands and Window's prompt). I thought I would put up some knowledge that I found, for things that Stack overflow could not help me with but my be helpful for others in a similar situation. So have a look at the code below. I hope if helps people with creating their own backup code. I am most proud with the "while '\r\n' in output" part of the below code. :
output = child0.readline()
while '\r\n' in output:
msg.log(output.replace('\r\n', ''), logMode + 's')
output = child0.readline()
This helps find the EOF when the program has finished running. Hence you can output the terminal program's output as the program is running.
I will be adding a Windows version to this code too. Possibly with robocopy.
Any questions with the below code, please do not hesitate to ask. NB: I changed people's names and removed my username and passwords.
#!/usr/bin/python
# Written by irishcream24, amateur coder
import subprocess
import sys
import os.path
import logAndError # my own library to handle errors and log events
from inspect import currentframe as CF # help with logging
from inspect import getframeinfo as GFI # help with logging
import threading
import fcntl
import pexpect
import time
import socket
import time as t
from sys import platform
if platform == "win32":
import msvcrt
portSearch = "Uno"
portResultPosition = 1
elif platform == "darwin":
portSearch = "usb"
portResultPosition = 0
else:
print 'Unknown operating system'
print 'Ending Program...'
sys.exit()
# Check if another instance of the program is running, if so, then stop second.
pid_file = 'program.pid'
fp = open(pid_file, 'w')
try:
fcntl.lockf(fp, fcntl.LOCK_EX | fcntl.LOCK_NB)
except IOError:
# another instance is running
print "Program already running, stopping the second instance..."
sys.exit(1)
# Determine where main program files are stored
directory = os.path.dirname(os.path.realpath(__file__))
# To print stderr to both screen and file
errCounter = 0
exitFlag = [0]
class tee:
def __init__(self, _fd1, _fd2):
self.fd1 = _fd1
self.fd2 = _fd2
def __del__(self):
if self.fd1 != sys.stdout and self.fd1 != sys.stderr :
self.fd1.close()
if self.fd2 != sys.stdout and self.fd2 != sys.stderr :
self.fd2.close()
def write(self, text):
global errCounter
global exitFlag
if errCounter == 0:
self.fd1.write('%s: ' %t.strftime("%d/%m/%y %H:%M"))
self.fd2.write('%s: ' %t.strftime("%d/%m/%y %H:%M"))
errCounter = 1
exitFlag[0] = 1
self.fd1.write(text)
self.fd2.write(text)
def flush(self):
self.fd1.flush()
self.fd2.flush()
# Error and log handling
errMode = 'pf' # p = print to screen, f = print to file, e = end program
errorFileAddress = '%s/errorFile.txt' %directory
outputlog = open(errorFileAddress, "a")
sys.stderr = tee(sys.stderr, outputlog)
logFileAddress = '%s/log.txt' %directory
logMode = 'pf' # p = print to screen, f = print to file
msg = logAndError.logAndError(errorFileAddress, logFileAddress)
# Set computer to be backed up
sourceComputer = 'DebbieMac'
try:
sourceComputer = sys.argv[1]
except:
print 'No source argument given.'
if sourceComputer == 'SamMac' or sourceComputer == 'DebbieMac' or sourceComputer == 'mediaCentre' or sourceComputer == 'garageComputer':
pass
else:
msg.error('incorrect source computer supplied!', errMode, GFI(CF()).lineno, exitFlag)
sys.exit()
# Source and destination setup
backupRoute = 'network'
try:
backupRoute = sys.argv[2]
except:
print 'No back up route argument given.'
if backupRoute == 'network' or backupRoute == 'direct' or backupRoute == 'testNetwork' or backupRoute == 'testDirect':
pass
else:
msg.error('incorrect backup route supplied!', errMode, GFI(CF()).lineno, exitFlag)
sys.exit()
# Source, destination and exclude dictionaries
v = {
'SamMac network source' : '/Users/SamJones',
'SamMac network destination' : '/Volumes/Seagate/Sam_macbook_backup/Backups',
'SamMac direct source' : '/Users/SamJones',
'SamMac direct destination' : '/Volumes/Seagate\ Backup\ Plus\ Drive/Sam_macbook_backup/Backups',
'SamMac testNetwork source' : '/Users/SamJones/Documents/Arduino/arduino_sketches-master',
'SamMac testNetwork destination' : '/Volumes/Seagate/Sam_macbook_backup/Arduino',
'SamMac exclude' : ['.*', '.Trash', 'Library', 'Pictures'],
'DebbieMac network source' : '/Users/DebbieJones',
'DebbieMac network destination' : '/Volumes/Seagate/Debbie_macbook_backup/Backups',
'DebbieMac direct source' : '/Users/DebbieJones',
'DebbieMac direct destination' : '/Volumes/Seagate\ Backup\ Plus\ Drive/Debbie_macbook_backup/Backups',
'DebbieMac testNetwork source': '/Users/DebbieJones/testFolder',
'DebbieMac testNetwork destination' : '/Volumes/Seagate/Debbie_macbook_backup',
'DebbieMac testDirect source' : '/Users/DebbieJones/testFolder',
'DebbieMac testDirect destination' : '/Volumes/Seagate\ Backup\ Plus\ Drive/Debbie_macbook_backup',
'DebbieMac exclude' : ['.*', '.Trash', 'Library', 'Pictures']
}
# Main threading code
class mainThreadClass(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
PIDMessage = 'Starting backup PID: %s'%os.getpid()
msg.log(PIDMessage, logMode)
mainThread()
msg.log('Process completed successfully\n', logMode)
def mainThread():
if platform == "win32":
pass
elif platform == "darwin":
if 'network' in backupRoute:
# Connect to SeagateBackup
if os.path.ismount('/Volumes/Seagate') == False:
msg.log('Mounting Seagate Backup Hub', logMode)
commandM = 'mount volume'
smbPoint = '"smb://username:password#mediacentre/Seagate"'
childM = pexpect.spawn("%s '%s %s'" %('osascript -e', commandM, smbPoint), timeout = None)
childM.expect(pexpect.EOF)
else:
msg.log('Seagate already mounted', logMode)
# Use rsync to backup files
commandR = 'rsync -avb '
for s in v['%s exclude' %sourceComputer]:
commandR = commandR + "--exclude '%s' " %s
commandR = commandR + '--delete --backup-dir="../PreviousBackups/%s" ' %time.strftime("%d-%m-%y %H%M")
commandR = commandR + '%s %s' %(v['%s %s source' %(sourceComputer, backupRoute)], v['%s %s destination' %(sourceComputer, backupRoute)])
msg.log(commandR, logMode)
msg.log('Running rsync...rsync output below', logMode)
child0 = pexpect.spawn(commandR, timeout = None)
# Handling command output
# If no '\r\n' in readline() output, then EOF reached
output = child0.readline()
while '\r\n' in output:
msg.log(output.replace('\r\n', ''), logMode + 's')
output = child0.readline()
return
if __name__ == '__main__':
# Create new threads
threadMain = mainThreadClass()
# Start new Threads
threadMain.start()
logAndError.py
# to handle errors
import time
import sys
import threading
class logAndError:
def __init__(self, errorAddress, logAddress):
self.errorAddress = errorAddress
self.logAddress = logAddress
self.lock = threading.RLock()
def error(self, message, errMode, lineNumber=None, exitFlag=[0]):
message = '%s: %s' %(time.strftime("%d/%m/%y %H:%M"), message)
# p = print to screen, f = print to file, e = end program
if 'p' in errMode:
print message
if 'f' in errMode and 'e' not in errMode:
errorFile = open(self.errorAddress, 'a')
errorFile.write('%s\n' %message)
errorFile.close()
return
def log(self, logmsg, logMode):
with self.lock:
logmsg2 = '%s: %s' %(time.strftime("%d/%m/%y %H:%M"), logmsg)
if 'p' in logMode:
# s = simple (no date stamp)
if 's' in logMode:
print logmsg
else:
print logmsg2
if 'f' in logMode:
if 's' in logMode:
logFile = open(self.logAddress, 'a')
logFile.write('%s\n' %logmsg)
logFile.close()
else:
logFile = open(self.logAddress, 'a')
logFile.write('%s\n' %logmsg2)
logFile.close()
return
As suggested here I use it to hide my command prompt in my setup.py file. It does hide my command prompt but the app does not work. Basically I am trying to make a Windows native Microsoft MSI for my GUI that I have built for youtube-dl command line tool that is used to consume media from some of the most popular video hosting sites. Any help is much appreciated. Here is my app.py:-
from tkinter import *
from tkinter import ttk
from tkinter import messagebox
from tkinter import filedialog
from tkinter.ttk import Progressbar
import youtube_dl
import threading
import os
download_folder = os.path.expanduser("~")+"/Downloads/"
download_folder_chosen = ""
window = Tk()
window.title("IOB Youtube Downloader")
window.geometry('510x100')
def my_hook(d):
if d:
if d['status'] == 'downloading':
percent_done = d['_percent_str']
percent_done = percent_done.replace(" ", "")
percent_done = percent_done.replace("%", "")
bar['value'] = percent_done
bar.grid(column=1, row=2, pady=15)
bar_lbl.configure(text=percent_done + "%")
bar_lbl.grid(column=1, row=3)
txt['state'] = DISABLED
btn['state'] = DISABLED
if d['status'] == 'finished':
bar.grid_forget()
txt['state'] = NORMAL
btn['state'] = NORMAL
bar_lbl.configure(text="Download Completed !!!")
bar_lbl.grid(column=1, row=2)
messagebox.showinfo('IOB Youtube Downloader', 'Download Complete')
if d['status'] == 'error':
print("\n"*10)
print(d)
messagebox.showerror('IOB Youtube Downloader', 'Download Error')
else:
bar_lbl.configure(text="Download Error. Please try again !!!")
bar_lbl.grid(column=1, row=2)
def start_thread():
t1 = threading.Thread(target=clicked, args=())
t1.start()
def clicked():
res = txt.get()
if download_folder_chosen != "":
location = download_folder_chosen + "/"
else:
location = download_folder
ydl_opts = {
'progress_hooks': [my_hook],
'format': 'best',
'outtmpl': location + u'%(title)s-%(id)s.%(ext)s',
}
try:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([res])
except:
messagebox.showerror('IOB Youtube Downloader', 'Download Error')
def choose_directory():
global download_folder_chosen
current_directory = filedialog.askdirectory()
download_folder_chosen = current_directory
messagebox.showinfo('IOB Youtube Downloader', 'Download Location:- ' + download_folder_chosen)
style = ttk.Style()
style.theme_use('default')
style.configure("blue.Horizontal.TProgressbar", background='blue')
bar = Progressbar(window, length=200, style='black.Horizontal.TProgressbar')
bar_lbl = Label(window, text="")
lbl = Label(window, text="Paste URL")
lbl.grid(column=0, row=0)
txt = Entry(window,width=60)
txt.grid(column=1, row=0)
btn = Button(window, text="Download", command=start_thread)
btn.grid(column=2, row=0)
btn2 = Button(window, text="...", command=choose_directory)
btn2.grid(column=3, row=0)
window.iconbitmap('favicon.ico')
window.mainloop()
And here is my setup.py file that I use to build the bundle exe using cx_Freeze.
from cx_Freeze import setup, Executable
import sys
import os
base = None
if sys.platform == 'win32':
base = "Win32GUI"
os.environ["TCL_LIBRARY"] = r"C:\Python35\tcl\tcl8.6"
os.environ["TK_LIBRARY"] = r"C:\Python35\tcl\tk8.6"
setup(
name = "IOB Youtube Downloader",
options = {"build_exe": {"packages":["tkinter",], "include_files":[r"C:\Python35\DLLs\tk86t.dll", r"C:\Python35\DLLs\tcl86t.dll", r"E:\Youtube_Downloader\Src\favicon.ico"]}},
version = "1.0",
author = "IO-Bridges",
description = "Download videos from all popular video streaming sites.",
executables = [Executable
(
r"downloader.py",
# base=base, <---- Here setting the base
shortcutName="IOB Youtube Downloader",
shortcutDir="DesktopFolder",
icon="favicon.ico"
)]
)
I need to print ZPL with dynamic content.
I Need your help. I am dynamic content , please help me.
is this word possible to print. Note the content is dynamic.
ZPL Code please.
If you want to type bold you can use this
^FO340,128^FDbold^FS
^FO339,128^FDbold^FS
Another option (External fonts usage for underline, italic and bold)
http://labelary.com/docs.html
There is no simple way to bold or italicize text withing ZPL. The fonts the printer has are very basic and can't be changed like that.
Complex font settings (italic, bold, serif ) are actually sent as compressed images to ZPL printers (you can check this with ZebraDesigner).
The format is called Z64, which is based on LZ77.
These two pages contain interesting code in Java to write a converter :
http://www.jcgonzalez.com/img-to-zpl-online
https://gist.github.com/trevarj/1255e5cbc08fb3f79c3f255e25989a18
...still I'm not sure whether the CRC part of the conversion will remain the same in the future, as this is probably vendor-dependent.
Here is a Python port of the first script :
import cv2
import base64
import matplotlib.pyplot as plt
import io
import numpy
blacklimit=int(50* 768/100)
compress=False
total=0
width_byte=0
mapCode = dict()
LOCAL_PATH="C://DEV//printer//zebra_debug.txt"
'''
class StringBuilder(object):
def __init__(self):
self._stringio = io.StringIO()
def __str__(self):
return self._stringio.getvalue()
def getvalue(self):
return self._stringio.getvalue()
def append(self, *objects, sep=' ', end=''):
print(*objects, sep=sep, end=end, file=self._stringio)
'''
def init_map_code():
global mapCode
mapCode[1] = "G"
mapCode[2] = "H"
mapCode[3] = "I"
mapCode[4] = "J"
mapCode[5] = "K"
mapCode[6] = "L"
mapCode[7] = "M"
mapCode[8] = "N"
mapCode[9] = "O"
mapCode[10] = "P"
mapCode[11] = "Q"
mapCode[12] = "R"
mapCode[13] = "S"
mapCode[14] = "T"
mapCode[15] = "U"
mapCode[16] = "V"
mapCode[17] = "W"
mapCode[18] = "X"
mapCode[19] = "Y"
mapCode[20] = "g"
mapCode[40] = "h"
mapCode[60] = "i"
mapCode[80] = "j"
mapCode[100] = "k"
mapCode[120] = "l"
mapCode[140] = "m"
mapCode[160] = "n"
mapCode[180] = "o"
mapCode[200] = "p"
mapCode[220] = "q"
mapCode[240] = "r"
mapCode[260] = "s"
mapCode[280] = "t"
mapCode[300] = "u"
mapCode[320] = "v"
mapCode[340] = "w"
mapCode[360] = "x"
mapCode[380] = "y"
mapCode[400] = "z"
def numberToBase(n, b):
if n == 0:
return [0]
digits = []
while n:
digits.append(int(n % b))
n //= b
return digits[::-1]
def four_byte_binary(binary_str):
decimal=int(binary_str, 2)
if decimal>15:
returned=hex(decimal).upper()
returned=returned[2:]
else:
#returned=hex(decimal).upper()+"0"
returned=hex(decimal).upper()
if binary_str!="00000000":
print("cut="+returned)
returned=returned[2:]
returned="0"+returned
if binary_str!="00000000":
print("low10="+returned)
#
if binary_str!="00000000":
print(binary_str+"\t"+str(decimal)+"\t"+returned+"\t")
return returned
def createBody(img):
global blacklimit
global width_byte
global total
height, width, colmap = img.shape
print(height)
print(width)
print(colmap)
rgb = 0
index=0
aux_binary_char=['0', '0', '0', '0', '0', '0', '0', '0']
sb=[]
if(width%8>0):
width_byte=int((width/8)+1)
else:
width_byte=width/8
total=width_byte*height
print(height)
print("\n")
print(width)
print("\n")
i=0
for h in range(0, height):
for w in range(0, width):
color = img[h,w]
#print(color)
#print(w)
blue=color[0]
green=color[1]
red=color[2]
blue=blue & 0xFF
green=green & 0xFF
red=red & 0xFF
"""
blue=np.uint8(blue)
green=np.unit8(green)
red=np.unit8(red)
"""
#print(bin(blue))
auxchar='1'
total_color=red+green+blue
if(total_color> blacklimit):
#print('above_black_limit')
auxchar='0'
aux_binary_char[index]=auxchar
index=index+1
if(index==8 or w==(width-1)):
if "".join(aux_binary_char) !="00000000":
print(i)
sb.append(four_byte_binary("".join(aux_binary_char)))
i=i+1
aux_binary_char=['0', '0', '0', '0', '0', '0', '0', '0']
index=0
#print(h)
sb.append("\n")
#print(sb)
print(blacklimit)
return ''.join(sb)
def encode_hex_ascii(code):
global width_byte
global mapCode
max_linea=width_byte*2
sb_code=[]
sb_linea=[]
previous_line=1
counter=1
aux = code[0]
first_char=False
for i in range(1, len(code)):
if(first_char):
aux=code[i]
first_char=False
continue
if(code[i]=="\n"):
if(counter>= max_linea and aux=='0'):
sb_linea.append(",")
elif(counter>= max_linea and aux=='F'):
sb_linea.append("!")
elif(counter>20):
multi20=int((counter/20))*20
resto20=counter%20
sb_linea.append(mapCode[multi20])
if(resto20!=0):
sb_linea.append(mapCode[resto20] +aux)
else:
sb_linea.append(aux)
else:
sb_linea.append(mapCode[counter] +aux)
counter=1
first_char=True
if(''.join(sb_linea)==previous_line):
sb_code.append(":")
else:
sb_code.append(''.join(sb_linea))
previous_line=''.join(sb_linea)
sb_linea=[]
continue
if aux==code[i]:
counter=counter+1
else:
if counter>20:
multi20=int((counter/20))*20
resto20=counter%20
sb_linea.append(mapCode[multi20])
if resto20!=0:
sb_linea.append(mapCode[resto20] + aux)
else:
sb_linea.append(aux)
else:
sb_linea.append(mapCode[counter] + aux)
counter=1
aux=code[i]
return ''.join(sb_code)
def head_doc():
global total
global width_byte
return "^XA " + "^FO0,0^GFA,"+ str(int(total)) + ","+ str(int(total)) + "," + str(int(width_byte)) +", "
def foot_doc():
return "^FS"+ "^XZ"
def process(img):
global compress
init_map_code()
cuerpo=createBody(img)
print("CUERPO\n")
print(cuerpo)
print("\n")
if compress:
cuerpo=encode_hex_ascii(cuerpo)
print("COMPRESS\n")
print(cuerpo)
print("\n")
return head_doc() + cuerpo + foot_doc()
img = cv2.imread("C:\\Users\\ftheeten\\Pictures\\out.jpg", cv2.IMREAD_COLOR )
compress=True
blacklimit ==int(50* 768/100)
test=process(img)
file=open(LOCAL_PATH, 'w')
file.write(test)
file.close()
I'm working with Graphite monitoring using Carbon and Ceres as the storage method. I have some problems with correcting bad data. It seems that (due to various problems) I've ended up with overlapping files. That is, since Carbon / Ceres stores the data as timestamp#interval.slice, I can have two or more files with overlapping time ranges.
There are two kinds of overlaps:
File A: +------------+ orig file
File B: +-----+ subset
File C: +---------+ overlap
This is causing problems because the existing tools available (ceres-maintenance defrag and rollup) don't cope with these overlaps. Instead, they skip the directory and move on. This is a problem, obviously.
I've created a script that fixes this problem, as follows:
For subsets, just delete the subset file.
For overlaps, using the file system 'truncate' on the orig file at the point where the next file starts. While it is possible to cut off the start of the overlap file and rename it properly, I would suggest that this is fraught with danger.
I've found that it's possible to do this in two ways:
Walk the dirs and iterate over the files, fixing as you go, and find the file subsets, remove them;
Walk the dirs and fix all the problems in a dir before moving on. This is BY FAR the faster approach, since the dir walk is hugely time consuming.
Code:
#!/usr/bin/env python2.6
################################################################################
import io
import os
import time
import sys
import string
import logging
import unittest
import datetime
import random
import zmq
import json
import socket
import traceback
import signal
import select
import simplejson
import cPickle as pickle
import re
import shutil
import collections
from pymongo import Connection
from optparse import OptionParser
from pprint import pprint, pformat
################################################################################
class SliceFile(object):
def __init__(self, fname):
self.name = fname
basename = fname.split('/')[-1]
fnArray = basename.split('#')
self.timeStart = int(fnArray[0])
self.freq = int(fnArray[1].split('.')[0])
self.size = None
self.numPoints = None
self.timeEnd = None
self.deleted = False
def __repr__(self):
out = "Name: %s, tstart=%s tEnd=%s, freq=%s, size=%s, npoints=%s." % (
self.name, self.timeStart, self.timeEnd, self.freq, self.size, self.numPoints)
return out
def setVars(self):
self.size = os.path.getsize(self.name)
self.numPoints = int(self.size / 8)
self.timeEnd = self.timeStart + (self.numPoints * self.freq)
################################################################################
class CeresOverlapFixup(object):
def __del__(self):
import datetime
self.writeLog("Ending at %s" % (str(datetime.datetime.today())))
self.LOGFILE.flush()
self.LOGFILE.close()
def __init__(self):
self.verbose = False
self.debug = False
self.LOGFILE = open("ceresOverlapFixup.log", "a")
self.badFilesList = set()
self.truncated = 0
self.subsets = 0
self.dirsExamined = 0
self.lastStatusTime = 0
def getOptionParser(self):
return OptionParser()
def getOptions(self):
parser = self.getOptionParser()
parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False, help="debug mode for this program, writes debug messages to logfile." )
parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="verbose mode for this program, prints a lot to stdout." )
parser.add_option("-b", "--basedir", action="store", type="string", dest="basedir", default=None, help="base directory location to start converting." )
(options, args) = parser.parse_args()
self.debug = options.debug
self.verbose = options.verbose
self.basedir = options.basedir
assert self.basedir, "must provide base directory."
# Examples:
# ./updateOperations/1346805360#60.slice
# ./updateOperations/1349556660#60.slice
# ./updateOperations/1346798040#60.slice
def getFileData(self, inFilename):
ret = SliceFile(inFilename)
ret.setVars()
return ret
def removeFile(self, inFilename):
os.remove(inFilename)
#self.writeLog("removing file: %s" % (inFilename))
self.subsets += 1
def truncateFile(self, fname, newSize):
if self.verbose:
self.writeLog("Truncating file, name=%s, newsize=%s" % (pformat(fname), pformat(newSize)))
IFD = None
try:
IFD = os.open(fname, os.O_RDWR|os.O_CREAT)
os.ftruncate(IFD, newSize)
os.close(IFD)
self.truncated += 1
except:
self.writeLog("Exception during truncate: %s" % (traceback.format_exc()))
try:
os.close(IFD)
except:
pass
return
def printStatus(self):
now = self.getNowTime()
if ((now - self.lastStatusTime) > 10):
self.writeLog("Status: time=%d, Walked %s dirs, subsetFilesRemoved=%s, truncated %s files." % (now, self.dirsExamined, self.subsets, self.truncated))
self.lastStatusTime = now
def fixupThisDir(self, inPath, inFiles):
# self.writeLog("Fixing files in dir: %s" % (inPath))
if not '.ceres-node' in inFiles:
# self.writeLog("--> Not a slice directory, skipping.")
return
self.dirsExamined += 1
sortedFiles = sorted(inFiles)
sortedFiles = [x for x in sortedFiles if ((x != '.ceres-node') and (x.count('#') > 0)) ]
lastFile = None
fileObjList = []
for thisFile in sortedFiles:
wholeFilename = os.path.join(inPath, thisFile)
try:
curFile = self.getFileData(wholeFilename)
fileObjList.append(curFile)
except:
self.badFilesList.add(wholeFilename)
self.writeLog("ERROR: file %s, %s" % (wholeFilename, traceback.format_exc()))
# name is timeStart, really.
fileObjList = sorted(fileObjList, key=lambda thisObj: thisObj.name)
while fileObjList:
self.printStatus()
changes = False
firstFile = fileObjList[0]
removedFiles = []
for curFile in fileObjList[1:]:
if (curFile.timeEnd <= firstFile.timeEnd):
# have subset file. elim.
self.removeFile(curFile.name)
removedFiles.append(curFile.name)
self.subsets += 1
changes = True
if self.verbose:
self.writeLog("Subset file situation. First=%s, overlap=%s" % (firstFile, curFile))
fileObjList = [x for x in fileObjList if x.name not in removedFiles]
if (len(fileObjList) < 2):
break
secondFile = fileObjList[1]
# LT is right. FirstFile's timeEnd is always the first open time after first is done.
# so, first starts#100, len=2, end=102, positions used=100,101. second start#102 == OK.
if (secondFile.timeStart < firstFile.timeEnd):
# truncate first file.
# file_A (last): +---------+
# file_B (curr): +----------+
# solve by truncating previous file at startpoint of current file.
newLenFile_A_seconds = int(secondFile.timeStart - firstFile.timeStart)
newFile_A_datapoints = int(newLenFile_A_seconds / firstFile.freq)
newFile_A_bytes = int(newFile_A_datapoints) * 8
if (not newFile_A_bytes):
fileObjList = fileObjList[1:]
continue
assert newFile_A_bytes, "Must have size. newLenFile_A_seconds=%s, newFile_A_datapoints=%s, newFile_A_bytes=%s." % (newLenFile_A_seconds, newFile_A_datapoints, newFile_A_bytes)
self.truncateFile(firstFile.name, newFile_A_bytes)
if self.verbose:
self.writeLog("Truncate situation. First=%s, overlap=%s" % (firstFile, secondFile))
self.truncated += 1
fileObjList = fileObjList[1:]
changes = True
if not changes:
fileObjList = fileObjList[1:]
def getNowTime(self):
return time.time()
def walkDirStructure(self):
startTime = self.getNowTime()
self.lastStatusTime = startTime
updateStatsDict = {}
self.okayFiles = 0
emptyFiles = 0
for (thisPath, theseDirs, theseFiles) in os.walk(self.basedir):
self.printStatus()
self.fixupThisDir(thisPath, theseFiles)
self.dirsExamined += 1
endTime = time.time()
# time.sleep(11)
self.printStatus()
self.writeLog( "now = %s, started at %s, elapsed time = %s seconds." % (startTime, endTime, endTime - startTime))
self.writeLog( "Done.")
def writeLog(self, instring):
print instring
print >> self.LOGFILE, instring
self.LOGFILE.flush()
def main(self):
self.getOptions()
self.walkDirStructure()