How to break a Windows oplock only on DeleteFile? - windows

Here is my scenario:
I have an application that will constantly have a file open for reading.
Another application that I don't have control over will constantly be writing to that file. Once in a while, it will delete that file. When that happens, I want to close the handles in my application so that the deletion happens successfully.
I'm following the techniques from https://devblogs.microsoft.com/oldnewthing/20130415-00/?p=4663 and currently have:
(Note: I'm using pywin32 as a wrapper for the Windows API)
import win32file
import win32con
import win32event
import pywintypes
import winioctlcon
from oplock import *
in_buffer = REQUEST_OPLOCK_INPUT_BUFFER()
in_buffer.StructureVersion = REQUEST_OPLOCK_CURRENT_VERSION
in_buffer.StructureLength = sizeof(in_buffer)
in_buffer.RequestOplockLevel = OPLOCK_LEVEL_CACHE_READ | OPLOCK_LEVEL_CACHE_WRITE | OPLOCK_LEVEL_CACHE_HANDLE
in_buffer.Flags = REQUEST_OPLOCK_INPUT_FLAG_REQUEST
out_buffer = REQUEST_OPLOCK_OUTPUT_BUFFER()
out_buffer.StructureVersion = REQUEST_OPLOCK_CURRENT_VERSION
out_buffer.StructureLength = sizeof(out_buffer)
overlapped = pywintypes.OVERLAPPED()
overlapped.hEvent = win32event.CreateEvent(None, False, False, None)
f1 = win32file.CreateFile("test.txt", win32file.GENERIC_READ, win32con.FILE_SHARE_READ | win32con.FILE_SHARE_WRITE | win32con.FILE_SHARE_DELETE, None, win32con.OPEN_EXISTING, win32file.FILE_FLAG_OVERLAPPED, None)
win32file.DeviceIoControl(f1, FSCTL_REQUEST_OPLOCK, in_buffer, out_buffer, overlapped)
win32file.GetOverlappedResult(f1, overlapped, True)
Some definitions here:
from ctypes import *
from ctypes.wintypes import *
class REQUEST_OPLOCK_INPUT_BUFFER(Structure):
_fields_ = [
("StructureVersion", WORD),
("StructureLength", WORD),
("RequestOplockLevel", DWORD),
("Flags", DWORD)
]
class REQUEST_OPLOCK_OUTPUT_BUFFER(Structure):
_fields_ = [
("StructureVersion", WORD),
("StructureLength", WORD),
("OriginalOplockLevel", DWORD),
("NewOplockLevel", DWORD),
("Flags", DWORD),
("AccessMode", DWORD),
("ShareMode", WORD)
]
def CTL_CODE(DeviceType, Function, Method, Access):
return (DeviceType << 16) | (Access << 14) | (Function << 2) | Method
FILE_DEVICE_FILE_SYSTEM = 0x00000009
METHOD_BUFFERED = 0x0
FILE_ANY_ACCESS = 0x0
FSCTL_REQUEST_OPLOCK = CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 144, METHOD_BUFFERED, FILE_ANY_ACCESS)
REQUEST_OPLOCK_CURRENT_VERSION = 0x0
REQUEST_OPLOCK_INPUT_FLAG_REQUEST = 0x1
REQUEST_OPLOCK_INPUT_FLAG_ACK = 0x2
REQUEST_OPLOCK_INPUT_FLAG_COMPLETE_ACK_ON_CLOSE = 0x4
REQUEST_OPLOCK_OUTPUT_FLAG_ACK_REQUIRED = 0x1
REQUEST_OPLOCK_OUTPUT_FLAG_MODES_PROVIDED = 0x2
OPLOCK_LEVEL_CACHE_READ = 0x1
OPLOCK_LEVEL_CACHE_HANDLE = 0x2
OPLOCK_LEVEL_CACHE_WRITE = 0x4`
The oplock seems to work but I'm not sure how to change the sensitivity so that it only applies to when the file is deleted since it is breaking even on file open and writes. I've tried modifying the oplock level and tried with 2 handles, but that didn't seem to work.

Related

How does the shortcut property sheet change icons instantly?

When you change the icon in the shortcut property sheet, the shortcut icon changes instantly.
When you run the Python script multiple times, you see the shortcut icons change each time to a random icon. However, when you comment out the create_internet_shortcut call in main, the icon of the manually written Internet shortcut "Bing.url" stops changing. There must be some additional processing that happens in the COM method after SHChangeNotify, which alone is not enough.
What do I need to do to make the Internet shortcut icon change when the URL file is written manually without using COM?
I have also tried calling SHChangeNotify 4 different times, but that didn't work. See the update_all_icons function and commented out call in main.
Install:
pip install pywin32
Code:
#!/usr/bin/env python
from typing import Any, Tuple
import os
import random
import traceback
from pathlib import Path
from win32com import storagecon
from win32comext.shell import shell, shellcon
import win32gui
import pythoncom
import pywintypes
system_icon_file = r"%SystemRoot%\System32\shell32.dll"
system_icon_max_index = 326
desktop_path = Path(r"~\Desktop").expanduser()
def create_internet_shortcut(path: str, target: str = None) -> None:
shortcut = pythoncom.CoCreateInstance(
shell.CLSID_InternetShortcut,
None,
pythoncom.CLSCTX_INPROC_SERVER,
shell.IID_IUniformResourceLocator
)
persist_file = shortcut.QueryInterface(pythoncom.IID_IPersistFile)
persist_file.Load(path)
if not isinstance(target, str):
target = shortcut.GetURL()
if target is None:
target = "https://www.bing.com/"
shortcut.SetURL(target)
property_set_storage = shortcut.QueryInterface(pythoncom.IID_IPropertySetStorage)
property_storage = property_set_storage.Open(shell.FMTID_Intshcut, storagecon.STGM_READWRITE)
icon_file = system_icon_file
icon_index = random.randint(0, system_icon_max_index)
property_storage.WriteMultiple((shellcon.PID_IS_ICONFILE, shellcon.PID_IS_ICONINDEX), (icon_file, icon_index))
property_storage.Commit(storagecon.STGC_DEFAULT)
persist_file.Save(path, 0)
def create_link_shortcut(path: str, target: str = "%SystemDrive%") -> None:
shortcut = pythoncom.CoCreateInstance(
shell.CLSID_ShellLink,
None,
pythoncom.CLSCTX_INPROC_SERVER,
shell.IID_IShellLink
)
persist_file = shortcut.QueryInterface(pythoncom.IID_IPersistFile)
try:
persist_file.Load(path)
except pywintypes.com_error:
traceback.print_exc()
shortcut.SetPath(target)
icon_file = system_icon_file
icon_index = random.randint(0, system_icon_max_index)
shortcut.SetIconLocation(icon_file, icon_index)
persist_file.Save(path, 0)
def write_internet_shortcut(path: str, target: str = None) -> None:
if not isinstance(target, str):
target = "https://www.bing.com/"
icon_file = system_icon_file
icon_index = random.randint(0, system_icon_max_index)
text = f"""[InternetShortcut]
URL={target}
IconFile={icon_file}
IconIndex={icon_index}"""
with open(path, "w", encoding="utf8") as file:
file.write(text)
def update_icon(path: str) -> None:
shell.SHChangeNotify(shellcon.SHCNE_UPDATEITEM, shellcon.SHCNF_PATHW | shellcon.SHCNF_FLUSHNOWAIT, path, None)
def update_all_icons(path: str) -> None:
result, info = shell.SHGetFileInfo(
path,
0,
shellcon.SHGFI_ICON | shellcon.SHGFI_DISPLAYNAME | shellcon.SHGFI_TYPENAME
)
hIcon, iIcon, dwAttributes, displayName, typeName = info
print("Info:", info)
if iIcon > 0:
shell.SHChangeNotify(shellcon.SHCNE_UPDATEIMAGE, shellcon.SHCNF_DWORD | shellcon.SHCNF_FLUSHNOWAIT, iIcon, None)
shell.SHChangeNotify(shellcon.SHCNE_ASSOCCHANGED, shellcon.SHCNF_DWORD | shellcon.SHCNF_FLUSHNOWAIT, iIcon, None)
shell.SHChangeNotify(shellcon.SHCNE_UPDATEITEM, shellcon.SHCNF_PATHW | shellcon.SHCNF_FLUSHNOWAIT, path, None)
shell.SHChangeNotify(shellcon.SHCNE_UPDATEDIR, shellcon.SHCNF_PATHW | shellcon.SHCNF_FLUSHNOWAIT, str(desktop_path.resolve()), None)
win32gui.DestroyIcon(hIcon)
def main(*args: Tuple[Any, ...]) -> None:
create_internet_shortcut(str(desktop_path.joinpath("COM Internet.url").resolve()))
create_link_shortcut(str(desktop_path.joinpath("COM link.lnk").resolve()))
bing_file_path = str(desktop_path.joinpath("Bing.url").resolve())
write_internet_shortcut(bing_file_path)
update_icon(bing_file_path)
# update_all_icons(bing_file_path)
if __name__ == "__main__":
main()

cx_freeze tkinter application not working with multiprocessing in Windows

I created a basic Tkinter GUI that has one button that when you click on it, it is supposed to use multiprocessing in order to open up 5 Chrome instances of "https://www.google.com." When I compile the script using cx_freeze, I click on the new exe and the button does nothing. Here is the code:
main.py
from tkinter import *
from selenium import webdriver
import os, multiprocessing
import numpy as np
def func():
print("bot started")
home = "https://www.google.com"
chromeOptions = webdriver.ChromeOptions()
user_txt_path = []
user_txt_path.append('\\chromedriver.exe')
filename = os.path.dirname(sys.argv[0])
path_to_chromedriver = str(os.path.abspath(filename) + user_txt_path[0])
driver = webdriver.Chrome(chrome_options=chromeOptions, executable_path=path_to_chromedriver) #DRIVER DRIVER!
driver.get_cookies()
driver.get(home)
def callback():
num = 5
for n in np.arange(num):
p = multiprocessing.Process(target=func)
p.start()
master = Tk()
b = Button(master, text="OK", command=callback)
b.pack()
mainloop()
setup.py
from cx_Freeze import setup, Executable
import sys
import os
from pathlib import Path
user_txt_path = []
base = "Console"#"Win32GUI"
user_txt_path.append('chromedriver.exe')
import os.path
PYTHON_INSTALL_DIR = os.path.dirname(os.path.dirname(os.__file__))
os.environ['TCL_LIBRARY'] = os.path.join(PYTHON_INSTALL_DIR, 'tcl', 'tcl8.6')
os.environ['TK_LIBRARY'] = os.path.join(PYTHON_INSTALL_DIR, 'tcl', 'tk8.6')
buildOptions = {"include_files": [user_txt_path[0], 'tcl86t.dll', 'tk86t.dll'], "packages": ['encodings', "os"], "includes": ['numpy.core._methods', 'numpy.lib.format'], "excludes": []}
executables = [
Executable('main.py', base=base)
]
home_path = str(Path.home())
pathname = str(os.path.dirname(sys.argv[0]))
setup(name='numbers',
version = '1.0',
#description = 'test',
#shortcutName="TachySloth",
#shortcutDir="DesktopFolder",
options = dict(build_exe = buildOptions),
executables = executables
)
Note that you need all of the include_files in your parent directory with main.py.

I am trying to bundle my Python(3.5.3) Tkinter app using cx_Freeze(5.1.1). When I hide my command prompt the app doesn't work.

As suggested here I use it to hide my command prompt in my setup.py file. It does hide my command prompt but the app does not work. Basically I am trying to make a Windows native Microsoft MSI for my GUI that I have built for youtube-dl command line tool that is used to consume media from some of the most popular video hosting sites. Any help is much appreciated. Here is my app.py:-
from tkinter import *
from tkinter import ttk
from tkinter import messagebox
from tkinter import filedialog
from tkinter.ttk import Progressbar
import youtube_dl
import threading
import os
download_folder = os.path.expanduser("~")+"/Downloads/"
download_folder_chosen = ""
window = Tk()
window.title("IOB Youtube Downloader")
window.geometry('510x100')
def my_hook(d):
if d:
if d['status'] == 'downloading':
percent_done = d['_percent_str']
percent_done = percent_done.replace(" ", "")
percent_done = percent_done.replace("%", "")
bar['value'] = percent_done
bar.grid(column=1, row=2, pady=15)
bar_lbl.configure(text=percent_done + "%")
bar_lbl.grid(column=1, row=3)
txt['state'] = DISABLED
btn['state'] = DISABLED
if d['status'] == 'finished':
bar.grid_forget()
txt['state'] = NORMAL
btn['state'] = NORMAL
bar_lbl.configure(text="Download Completed !!!")
bar_lbl.grid(column=1, row=2)
messagebox.showinfo('IOB Youtube Downloader', 'Download Complete')
if d['status'] == 'error':
print("\n"*10)
print(d)
messagebox.showerror('IOB Youtube Downloader', 'Download Error')
else:
bar_lbl.configure(text="Download Error. Please try again !!!")
bar_lbl.grid(column=1, row=2)
def start_thread():
t1 = threading.Thread(target=clicked, args=())
t1.start()
def clicked():
res = txt.get()
if download_folder_chosen != "":
location = download_folder_chosen + "/"
else:
location = download_folder
ydl_opts = {
'progress_hooks': [my_hook],
'format': 'best',
'outtmpl': location + u'%(title)s-%(id)s.%(ext)s',
}
try:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([res])
except:
messagebox.showerror('IOB Youtube Downloader', 'Download Error')
def choose_directory():
global download_folder_chosen
current_directory = filedialog.askdirectory()
download_folder_chosen = current_directory
messagebox.showinfo('IOB Youtube Downloader', 'Download Location:- ' + download_folder_chosen)
style = ttk.Style()
style.theme_use('default')
style.configure("blue.Horizontal.TProgressbar", background='blue')
bar = Progressbar(window, length=200, style='black.Horizontal.TProgressbar')
bar_lbl = Label(window, text="")
lbl = Label(window, text="Paste URL")
lbl.grid(column=0, row=0)
txt = Entry(window,width=60)
txt.grid(column=1, row=0)
btn = Button(window, text="Download", command=start_thread)
btn.grid(column=2, row=0)
btn2 = Button(window, text="...", command=choose_directory)
btn2.grid(column=3, row=0)
window.iconbitmap('favicon.ico')
window.mainloop()
And here is my setup.py file that I use to build the bundle exe using cx_Freeze.
from cx_Freeze import setup, Executable
import sys
import os
base = None
if sys.platform == 'win32':
base = "Win32GUI"
os.environ["TCL_LIBRARY"] = r"C:\Python35\tcl\tcl8.6"
os.environ["TK_LIBRARY"] = r"C:\Python35\tcl\tk8.6"
setup(
name = "IOB Youtube Downloader",
options = {"build_exe": {"packages":["tkinter",], "include_files":[r"C:\Python35\DLLs\tk86t.dll", r"C:\Python35\DLLs\tcl86t.dll", r"E:\Youtube_Downloader\Src\favicon.ico"]}},
version = "1.0",
author = "IO-Bridges",
description = "Download videos from all popular video streaming sites.",
executables = [Executable
(
r"downloader.py",
# base=base, <---- Here setting the base
shortcutName="IOB Youtube Downloader",
shortcutDir="DesktopFolder",
icon="favicon.ico"
)]
)

Reading memory and Access is Denied

I need to access all memory of a running process in my local Windows 7-64bit. I am new to winapi.
Here is my problem; Whenever I try to Open a process and reads its memory, I get Access is Denied error.
I searched and found something. It is said that If I run the main process as Administrator and use PROCESS_ALL_ACCESS on OpenProcess, I would have enough right to do it as it is said. OK, I did it. but nothing is changed. On reading memory, I still get Access is Denied.
So, I kept searching and found another thing which is enabling SeDebugPrivilege. I have also done that but nothing is changed. I still get the error.
I've read the quest and his answer here;
Windows Vista/Win7 Privilege Problem: SeDebugPrivilege & OpenProcess .
But as I said, I am really new to winapi. I could not solve my problem yet. Is there anything which which I need to configure in my local operating system?
Here is my Python code with pywin32;
from _ctypes import byref, sizeof, Structure
from ctypes import windll, WinError, c_buffer, c_void_p, create_string_buffer
from ctypes.wintypes import *
import win32security
import win32api
import gc
import ntsecuritycon
from struct import Struct
from win32con import PROCESS_ALL_ACCESS
from struct import calcsize
MEMORY_STATES = {0x1000: "MEM_COMMIT", 0x10000: "MEM_FREE", 0x2000: "MEM_RESERVE"}
MEMORY_PROTECTIONS = {0x10: "PAGE_EXECUTE", 0x20: "PAGE_EXECUTE_READ", 0x40: "PAGEEXECUTE_READWRITE",
0x80: "PAGE_EXECUTE_WRITECOPY", 0x01: "PAGE_NOACCESS", 0x04: "PAGE_READWRITE",
0x08: "PAGE_WRITECOPY"}
MEMORY_TYPES = {0x1000000: "MEM_IMAGE", 0x40000: "MEM_MAPPED", 0x20000: "MEM_PRIVATE"}
class MEMORY_BASIC_INFORMATION(Structure):
_fields_ = [
("BaseAddress", c_void_p),
("AllocationBase", c_void_p),
("AllocationProtect", DWORD),
("RegionSize", UINT),
("State", DWORD),
("Protect", DWORD),
("Type", DWORD)
]
class SYSTEM_INFO(Structure):
_fields_ = [("wProcessorArchitecture", WORD),
("wReserved", WORD),
("dwPageSize", DWORD),
("lpMinimumApplicationAddress", DWORD),
("lpMaximumApplicationAddress", DWORD),
("dwActiveProcessorMask", DWORD),
("dwNumberOfProcessors", DWORD),
("dwProcessorType", DWORD),
("dwAllocationGranularity", DWORD),
("wProcessorLevel", WORD),
("wProcessorRevision", WORD)]
class PyMEMORY_BASIC_INFORMATION:
def __init__(self, MBI):
self.MBI = MBI
self.set_attributes()
def set_attributes(self):
self.BaseAddress = self.MBI.BaseAddress
self.AllocationBase = self.MBI.AllocationBase
self.AllocationProtect = MEMORY_PROTECTIONS.get(self.MBI.AllocationProtect, self.MBI.AllocationProtect)
self.RegionSize = self.MBI.RegionSize
self.State = MEMORY_STATES.get(self.MBI.State, self.MBI.State)
# self.Protect = self.MBI.Protect # uncomment this and comment next line if you want to do a bitwise check on Protect.
self.Protect = MEMORY_PROTECTIONS.get(self.MBI.Protect, self.MBI.Protect)
self.Type = MEMORY_TYPES.get(self.MBI.Type, self.MBI.Type)
ASSUME_ALIGNMENT = True
class TARGET:
"""Given a ctype (initialized or not) this coordinates all the information needed to read, write and compare."""
def __init__(self, ctype):
self.alignment = 1
self.ctype = ctype
# size of target data
self.size = sizeof(ctype)
self.type = ctype._type_
# get the format type needed for struct.unpack/pack.
while hasattr(self.type, "_type_"):
self.type = self.type._type_
# string_buffers and char arrays have _type_ 'c'
# but that makes it slightly slower to unpack
# so swap is for 's'.
if self.type == "c":
self.type = "s"
# calculate byte alignment. this speeds up scanning substantially
# because we can read and compare every alignment bytes
# instead of every single byte.
# although if we are scanning for a string the alignment is defaulted to 1 \
# (im not sure if this is correct).
elif ASSUME_ALIGNMENT:
# calc alignment
divider = 1
for i in xrange(4):
divider *= 2
if not self.size % divider:
self.alignment = divider
# size of target ctype.
self.type_size = calcsize(self.type)
# length of target / array length.
self.length = self.size / self.type_size
self.value = getattr(ctype, "raw", ctype.value)
# the format string used for struct.pack/unpack.
self.format = str(self.length) + self.type
# efficient packer / unpacker for our own format.
self.packer = Struct(self.format)
def get_packed(self):
"""Gets the byte representation of the ctype value for use with WriteProcessMemory."""
return self.packer.pack(self.value)
def __str__(self):
return str(self.ctype)[:10] + "..." + " <" + str(self.value)[:10] + "..." + ">"
class Memory(object):
def __init__(self, process_handle, target):
self._process_handle = process_handle
self._target = target
self.found = []
self.__scann_process()
def __scann_process(self):
"""scans a processes pages for the target value."""
si = SYSTEM_INFO()
psi = byref(si)
windll.kernel32.GetSystemInfo(psi)
base_address = si.lpMinimumApplicationAddress
max_address = si.lpMaximumApplicationAddress
page_address = base_address
while page_address < max_address:
page_address = self.__scan_page(page_address)
if len(self.found) >= 60000000:
print("[Warning] Scan ended early because too many addresses were found to hold the target data.")
break
gc.collect()
return self.found
def __scan_page(self, page_address):
"""Scans the entire page for TARGET instance and returns the next page address and found addresses."""
information = self.VirtualQueryEx(page_address)
base_address = information.BaseAddress
region_size = information.RegionSize
next_region = base_address + region_size
size = self._target.size
target_value = self._target.value
step = self._target.alignment
unpacker = self._target.packer.unpack
if information.Type != "MEM_PRIVATE" or \
region_size < size or \
information.State != "MEM_COMMIT" or \
information.Protect not in ["PAGE_EXECUTE_READ", "PAGEEXECUTE_READWRITE", "PAGE_READWRITE"]:
return next_region
page_bytes = self.ReadMemory(base_address, region_size)
for i in xrange(0, (region_size - size), step):
partial = page_bytes[i:i + size]
if unpacker(partial)[0] == target_value:
self.found.append(base_address + i)
del page_bytes # free the buffer
return next_region
def ReadMemory(self, address, size):
cbuffer = c_buffer(size)
success = windll.kernel32.ReadProcessMemory(
self._process_handle,
address,
cbuffer,
size,
0)
assert success, "ReadMemory Failed with success == %s and address == %s and size == %s.\n%s" % (
success, address, size, WinError(win32api.GetLastError()))
return cbuffer.raw
def VirtualQueryEx(self, address):
MBI = MEMORY_BASIC_INFORMATION()
MBI_pointer = byref(MBI)
size = sizeof(MBI)
success = windll.kernel32.VirtualQueryEx(
self._process_handle,
address,
MBI_pointer,
size)
assert success, "VirtualQueryEx Failed with success == %s.\n%s" % (
success, WinError(win32api.GetLastError())[1])
assert success == size, "VirtualQueryEx Failed because not all data was written."
return PyMEMORY_BASIC_INFORMATION(MBI)
def AdjustPrivilege(priv):
flags = win32security.TOKEN_ADJUST_PRIVILEGES | win32security.TOKEN_QUERY
p = win32api.GetCurrentProcess()
htoken = win32security.OpenProcessToken(p, flags)
id = win32security.LookupPrivilegeValue(None, priv)
newPrivileges = [(id, win32security.SE_PRIVILEGE_ENABLED)]
win32security.AdjustTokenPrivileges(htoken, 0, newPrivileges)
win32api.CloseHandle(htoken)
def OpenProcess(pid=win32api.GetCurrentProcessId()):
# ntsecuritycon.SE_DEBUG_NAME = "SeDebugPrivilege"
AdjustPrivilege(ntsecuritycon.SE_DEBUG_NAME)
phandle = windll.kernel32.OpenProcess( \
PROCESS_ALL_ACCESS,
0,
pid)
assert phandle, "Failed to open process!\n%s" % WinError(win32api.GetLastError())[1]
return phandle
PID = 22852
process_handle = OpenProcess(PID)
Memory(process_handle, TARGET(create_string_buffer("1456")))
Here is the error I always get;
AssertionError: ReadMemory Failed with success == 0 and address == 131072 and size == 4096.
[Error 5] Access is denied.
I do not know what information else about my code and my personal Windows 7 operating system, I should provide to you. If you need to know more, please ask it from me, I will provide it to solve that problem.
I guess, this is about a lack of configuration in my operating system , not about pywin32. I'll be waiting for your solutions.

Graphite / Carbon / Ceres node overlap

I'm working with Graphite monitoring using Carbon and Ceres as the storage method. I have some problems with correcting bad data. It seems that (due to various problems) I've ended up with overlapping files. That is, since Carbon / Ceres stores the data as timestamp#interval.slice, I can have two or more files with overlapping time ranges.
There are two kinds of overlaps:
File A: +------------+ orig file
File B: +-----+ subset
File C: +---------+ overlap
This is causing problems because the existing tools available (ceres-maintenance defrag and rollup) don't cope with these overlaps. Instead, they skip the directory and move on. This is a problem, obviously.
I've created a script that fixes this problem, as follows:
For subsets, just delete the subset file.
For overlaps, using the file system 'truncate' on the orig file at the point where the next file starts. While it is possible to cut off the start of the overlap file and rename it properly, I would suggest that this is fraught with danger.
I've found that it's possible to do this in two ways:
Walk the dirs and iterate over the files, fixing as you go, and find the file subsets, remove them;
Walk the dirs and fix all the problems in a dir before moving on. This is BY FAR the faster approach, since the dir walk is hugely time consuming.
Code:
#!/usr/bin/env python2.6
################################################################################
import io
import os
import time
import sys
import string
import logging
import unittest
import datetime
import random
import zmq
import json
import socket
import traceback
import signal
import select
import simplejson
import cPickle as pickle
import re
import shutil
import collections
from pymongo import Connection
from optparse import OptionParser
from pprint import pprint, pformat
################################################################################
class SliceFile(object):
def __init__(self, fname):
self.name = fname
basename = fname.split('/')[-1]
fnArray = basename.split('#')
self.timeStart = int(fnArray[0])
self.freq = int(fnArray[1].split('.')[0])
self.size = None
self.numPoints = None
self.timeEnd = None
self.deleted = False
def __repr__(self):
out = "Name: %s, tstart=%s tEnd=%s, freq=%s, size=%s, npoints=%s." % (
self.name, self.timeStart, self.timeEnd, self.freq, self.size, self.numPoints)
return out
def setVars(self):
self.size = os.path.getsize(self.name)
self.numPoints = int(self.size / 8)
self.timeEnd = self.timeStart + (self.numPoints * self.freq)
################################################################################
class CeresOverlapFixup(object):
def __del__(self):
import datetime
self.writeLog("Ending at %s" % (str(datetime.datetime.today())))
self.LOGFILE.flush()
self.LOGFILE.close()
def __init__(self):
self.verbose = False
self.debug = False
self.LOGFILE = open("ceresOverlapFixup.log", "a")
self.badFilesList = set()
self.truncated = 0
self.subsets = 0
self.dirsExamined = 0
self.lastStatusTime = 0
def getOptionParser(self):
return OptionParser()
def getOptions(self):
parser = self.getOptionParser()
parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False, help="debug mode for this program, writes debug messages to logfile." )
parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="verbose mode for this program, prints a lot to stdout." )
parser.add_option("-b", "--basedir", action="store", type="string", dest="basedir", default=None, help="base directory location to start converting." )
(options, args) = parser.parse_args()
self.debug = options.debug
self.verbose = options.verbose
self.basedir = options.basedir
assert self.basedir, "must provide base directory."
# Examples:
# ./updateOperations/1346805360#60.slice
# ./updateOperations/1349556660#60.slice
# ./updateOperations/1346798040#60.slice
def getFileData(self, inFilename):
ret = SliceFile(inFilename)
ret.setVars()
return ret
def removeFile(self, inFilename):
os.remove(inFilename)
#self.writeLog("removing file: %s" % (inFilename))
self.subsets += 1
def truncateFile(self, fname, newSize):
if self.verbose:
self.writeLog("Truncating file, name=%s, newsize=%s" % (pformat(fname), pformat(newSize)))
IFD = None
try:
IFD = os.open(fname, os.O_RDWR|os.O_CREAT)
os.ftruncate(IFD, newSize)
os.close(IFD)
self.truncated += 1
except:
self.writeLog("Exception during truncate: %s" % (traceback.format_exc()))
try:
os.close(IFD)
except:
pass
return
def printStatus(self):
now = self.getNowTime()
if ((now - self.lastStatusTime) > 10):
self.writeLog("Status: time=%d, Walked %s dirs, subsetFilesRemoved=%s, truncated %s files." % (now, self.dirsExamined, self.subsets, self.truncated))
self.lastStatusTime = now
def fixupThisDir(self, inPath, inFiles):
# self.writeLog("Fixing files in dir: %s" % (inPath))
if not '.ceres-node' in inFiles:
# self.writeLog("--> Not a slice directory, skipping.")
return
self.dirsExamined += 1
sortedFiles = sorted(inFiles)
sortedFiles = [x for x in sortedFiles if ((x != '.ceres-node') and (x.count('#') > 0)) ]
lastFile = None
fileObjList = []
for thisFile in sortedFiles:
wholeFilename = os.path.join(inPath, thisFile)
try:
curFile = self.getFileData(wholeFilename)
fileObjList.append(curFile)
except:
self.badFilesList.add(wholeFilename)
self.writeLog("ERROR: file %s, %s" % (wholeFilename, traceback.format_exc()))
# name is timeStart, really.
fileObjList = sorted(fileObjList, key=lambda thisObj: thisObj.name)
while fileObjList:
self.printStatus()
changes = False
firstFile = fileObjList[0]
removedFiles = []
for curFile in fileObjList[1:]:
if (curFile.timeEnd <= firstFile.timeEnd):
# have subset file. elim.
self.removeFile(curFile.name)
removedFiles.append(curFile.name)
self.subsets += 1
changes = True
if self.verbose:
self.writeLog("Subset file situation. First=%s, overlap=%s" % (firstFile, curFile))
fileObjList = [x for x in fileObjList if x.name not in removedFiles]
if (len(fileObjList) < 2):
break
secondFile = fileObjList[1]
# LT is right. FirstFile's timeEnd is always the first open time after first is done.
# so, first starts#100, len=2, end=102, positions used=100,101. second start#102 == OK.
if (secondFile.timeStart < firstFile.timeEnd):
# truncate first file.
# file_A (last): +---------+
# file_B (curr): +----------+
# solve by truncating previous file at startpoint of current file.
newLenFile_A_seconds = int(secondFile.timeStart - firstFile.timeStart)
newFile_A_datapoints = int(newLenFile_A_seconds / firstFile.freq)
newFile_A_bytes = int(newFile_A_datapoints) * 8
if (not newFile_A_bytes):
fileObjList = fileObjList[1:]
continue
assert newFile_A_bytes, "Must have size. newLenFile_A_seconds=%s, newFile_A_datapoints=%s, newFile_A_bytes=%s." % (newLenFile_A_seconds, newFile_A_datapoints, newFile_A_bytes)
self.truncateFile(firstFile.name, newFile_A_bytes)
if self.verbose:
self.writeLog("Truncate situation. First=%s, overlap=%s" % (firstFile, secondFile))
self.truncated += 1
fileObjList = fileObjList[1:]
changes = True
if not changes:
fileObjList = fileObjList[1:]
def getNowTime(self):
return time.time()
def walkDirStructure(self):
startTime = self.getNowTime()
self.lastStatusTime = startTime
updateStatsDict = {}
self.okayFiles = 0
emptyFiles = 0
for (thisPath, theseDirs, theseFiles) in os.walk(self.basedir):
self.printStatus()
self.fixupThisDir(thisPath, theseFiles)
self.dirsExamined += 1
endTime = time.time()
# time.sleep(11)
self.printStatus()
self.writeLog( "now = %s, started at %s, elapsed time = %s seconds." % (startTime, endTime, endTime - startTime))
self.writeLog( "Done.")
def writeLog(self, instring):
print instring
print >> self.LOGFILE, instring
self.LOGFILE.flush()
def main(self):
self.getOptions()
self.walkDirStructure()

Resources