Reading memory and Access is Denied - winapi

I need to access all memory of a running process in my local Windows 7-64bit. I am new to winapi.
Here is my problem; Whenever I try to Open a process and reads its memory, I get Access is Denied error.
I searched and found something. It is said that If I run the main process as Administrator and use PROCESS_ALL_ACCESS on OpenProcess, I would have enough right to do it as it is said. OK, I did it. but nothing is changed. On reading memory, I still get Access is Denied.
So, I kept searching and found another thing which is enabling SeDebugPrivilege. I have also done that but nothing is changed. I still get the error.
I've read the quest and his answer here;
Windows Vista/Win7 Privilege Problem: SeDebugPrivilege & OpenProcess .
But as I said, I am really new to winapi. I could not solve my problem yet. Is there anything which which I need to configure in my local operating system?
Here is my Python code with pywin32;
from _ctypes import byref, sizeof, Structure
from ctypes import windll, WinError, c_buffer, c_void_p, create_string_buffer
from ctypes.wintypes import *
import win32security
import win32api
import gc
import ntsecuritycon
from struct import Struct
from win32con import PROCESS_ALL_ACCESS
from struct import calcsize
MEMORY_STATES = {0x1000: "MEM_COMMIT", 0x10000: "MEM_FREE", 0x2000: "MEM_RESERVE"}
MEMORY_PROTECTIONS = {0x10: "PAGE_EXECUTE", 0x20: "PAGE_EXECUTE_READ", 0x40: "PAGEEXECUTE_READWRITE",
0x80: "PAGE_EXECUTE_WRITECOPY", 0x01: "PAGE_NOACCESS", 0x04: "PAGE_READWRITE",
0x08: "PAGE_WRITECOPY"}
MEMORY_TYPES = {0x1000000: "MEM_IMAGE", 0x40000: "MEM_MAPPED", 0x20000: "MEM_PRIVATE"}
class MEMORY_BASIC_INFORMATION(Structure):
_fields_ = [
("BaseAddress", c_void_p),
("AllocationBase", c_void_p),
("AllocationProtect", DWORD),
("RegionSize", UINT),
("State", DWORD),
("Protect", DWORD),
("Type", DWORD)
]
class SYSTEM_INFO(Structure):
_fields_ = [("wProcessorArchitecture", WORD),
("wReserved", WORD),
("dwPageSize", DWORD),
("lpMinimumApplicationAddress", DWORD),
("lpMaximumApplicationAddress", DWORD),
("dwActiveProcessorMask", DWORD),
("dwNumberOfProcessors", DWORD),
("dwProcessorType", DWORD),
("dwAllocationGranularity", DWORD),
("wProcessorLevel", WORD),
("wProcessorRevision", WORD)]
class PyMEMORY_BASIC_INFORMATION:
def __init__(self, MBI):
self.MBI = MBI
self.set_attributes()
def set_attributes(self):
self.BaseAddress = self.MBI.BaseAddress
self.AllocationBase = self.MBI.AllocationBase
self.AllocationProtect = MEMORY_PROTECTIONS.get(self.MBI.AllocationProtect, self.MBI.AllocationProtect)
self.RegionSize = self.MBI.RegionSize
self.State = MEMORY_STATES.get(self.MBI.State, self.MBI.State)
# self.Protect = self.MBI.Protect # uncomment this and comment next line if you want to do a bitwise check on Protect.
self.Protect = MEMORY_PROTECTIONS.get(self.MBI.Protect, self.MBI.Protect)
self.Type = MEMORY_TYPES.get(self.MBI.Type, self.MBI.Type)
ASSUME_ALIGNMENT = True
class TARGET:
"""Given a ctype (initialized or not) this coordinates all the information needed to read, write and compare."""
def __init__(self, ctype):
self.alignment = 1
self.ctype = ctype
# size of target data
self.size = sizeof(ctype)
self.type = ctype._type_
# get the format type needed for struct.unpack/pack.
while hasattr(self.type, "_type_"):
self.type = self.type._type_
# string_buffers and char arrays have _type_ 'c'
# but that makes it slightly slower to unpack
# so swap is for 's'.
if self.type == "c":
self.type = "s"
# calculate byte alignment. this speeds up scanning substantially
# because we can read and compare every alignment bytes
# instead of every single byte.
# although if we are scanning for a string the alignment is defaulted to 1 \
# (im not sure if this is correct).
elif ASSUME_ALIGNMENT:
# calc alignment
divider = 1
for i in xrange(4):
divider *= 2
if not self.size % divider:
self.alignment = divider
# size of target ctype.
self.type_size = calcsize(self.type)
# length of target / array length.
self.length = self.size / self.type_size
self.value = getattr(ctype, "raw", ctype.value)
# the format string used for struct.pack/unpack.
self.format = str(self.length) + self.type
# efficient packer / unpacker for our own format.
self.packer = Struct(self.format)
def get_packed(self):
"""Gets the byte representation of the ctype value for use with WriteProcessMemory."""
return self.packer.pack(self.value)
def __str__(self):
return str(self.ctype)[:10] + "..." + " <" + str(self.value)[:10] + "..." + ">"
class Memory(object):
def __init__(self, process_handle, target):
self._process_handle = process_handle
self._target = target
self.found = []
self.__scann_process()
def __scann_process(self):
"""scans a processes pages for the target value."""
si = SYSTEM_INFO()
psi = byref(si)
windll.kernel32.GetSystemInfo(psi)
base_address = si.lpMinimumApplicationAddress
max_address = si.lpMaximumApplicationAddress
page_address = base_address
while page_address < max_address:
page_address = self.__scan_page(page_address)
if len(self.found) >= 60000000:
print("[Warning] Scan ended early because too many addresses were found to hold the target data.")
break
gc.collect()
return self.found
def __scan_page(self, page_address):
"""Scans the entire page for TARGET instance and returns the next page address and found addresses."""
information = self.VirtualQueryEx(page_address)
base_address = information.BaseAddress
region_size = information.RegionSize
next_region = base_address + region_size
size = self._target.size
target_value = self._target.value
step = self._target.alignment
unpacker = self._target.packer.unpack
if information.Type != "MEM_PRIVATE" or \
region_size < size or \
information.State != "MEM_COMMIT" or \
information.Protect not in ["PAGE_EXECUTE_READ", "PAGEEXECUTE_READWRITE", "PAGE_READWRITE"]:
return next_region
page_bytes = self.ReadMemory(base_address, region_size)
for i in xrange(0, (region_size - size), step):
partial = page_bytes[i:i + size]
if unpacker(partial)[0] == target_value:
self.found.append(base_address + i)
del page_bytes # free the buffer
return next_region
def ReadMemory(self, address, size):
cbuffer = c_buffer(size)
success = windll.kernel32.ReadProcessMemory(
self._process_handle,
address,
cbuffer,
size,
0)
assert success, "ReadMemory Failed with success == %s and address == %s and size == %s.\n%s" % (
success, address, size, WinError(win32api.GetLastError()))
return cbuffer.raw
def VirtualQueryEx(self, address):
MBI = MEMORY_BASIC_INFORMATION()
MBI_pointer = byref(MBI)
size = sizeof(MBI)
success = windll.kernel32.VirtualQueryEx(
self._process_handle,
address,
MBI_pointer,
size)
assert success, "VirtualQueryEx Failed with success == %s.\n%s" % (
success, WinError(win32api.GetLastError())[1])
assert success == size, "VirtualQueryEx Failed because not all data was written."
return PyMEMORY_BASIC_INFORMATION(MBI)
def AdjustPrivilege(priv):
flags = win32security.TOKEN_ADJUST_PRIVILEGES | win32security.TOKEN_QUERY
p = win32api.GetCurrentProcess()
htoken = win32security.OpenProcessToken(p, flags)
id = win32security.LookupPrivilegeValue(None, priv)
newPrivileges = [(id, win32security.SE_PRIVILEGE_ENABLED)]
win32security.AdjustTokenPrivileges(htoken, 0, newPrivileges)
win32api.CloseHandle(htoken)
def OpenProcess(pid=win32api.GetCurrentProcessId()):
# ntsecuritycon.SE_DEBUG_NAME = "SeDebugPrivilege"
AdjustPrivilege(ntsecuritycon.SE_DEBUG_NAME)
phandle = windll.kernel32.OpenProcess( \
PROCESS_ALL_ACCESS,
0,
pid)
assert phandle, "Failed to open process!\n%s" % WinError(win32api.GetLastError())[1]
return phandle
PID = 22852
process_handle = OpenProcess(PID)
Memory(process_handle, TARGET(create_string_buffer("1456")))
Here is the error I always get;
AssertionError: ReadMemory Failed with success == 0 and address == 131072 and size == 4096.
[Error 5] Access is denied.
I do not know what information else about my code and my personal Windows 7 operating system, I should provide to you. If you need to know more, please ask it from me, I will provide it to solve that problem.
I guess, this is about a lack of configuration in my operating system , not about pywin32. I'll be waiting for your solutions.

Related

Fine tuning Bert for NER attempt on Mac OS

I'm using a MacBook Air/OS Monterey 12.5 (There are updates available; Ventura 13.1
Python version 3.10.8 and also tried using 3.11
Pylance has pointed that all the imports I was trying to execute were not being resolved so I changed the VS Code interpreter to Python 3.10.
Anyways, here's the code:
import pandas as pd
import torch
import numpy as np
from tqdm import tqdm
from transformers import BertTokenizerFast
from transformers import BertForTokenClassification
from torch.utils.data import Dataset, DataLoader
df = pd.read_csv('ner.csv')
labels = [i.split() for i in df['labels'].values.tolist()]
unique_labels = set()
for lb in labels:
[unique_labels.add(i) for i in lb if i not in unique_labels]
# print(unique_labels)
labels_to_ids = {k: v for v, k in enumerate(sorted(unique_labels))}
ids_to_labels = {v: k for v, k in enumerate(sorted(unique_labels))}
# print(labels_to_ids)
text = df['text'].values.tolist()
example = text[36]
#print(example)
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
text_tokenized = tokenizer(example, padding='max_length', max_length=512, truncation=True, return_tensors='pt')
'''
print(text_tokenized)
print(tokenizer.decode(text_tokenized.input_ids[0]))
'''
def align_label_example(tokenized_input, labels):
word_ids = tokenized_input.word_ids()
previous_word_idx = None
label_ids = []
for word_idx in word_ids:
if word_idx is None:
label_ids.append(-100)
elif word_idx != previous_word_idx:
try:
label_ids.append(labels_to_ids[labels[word_idx]])
except:
label_ids.append(-100)
else:
label_ids.append(labels_to_ids[labels[word_idx]] if label_all_tokens else -100)
previous_word_idx = word_idx
return label_ids;
label = labels[36]
label_all_tokens = False
new_label = align_label_example(text_tokenized, label)
'''
print(new_label)
print(tokenizer.convert_ids_to_tokens(text_tokenized['input_ids'][0]))
'''
def align_label(texts, labels):
tokenized_inputs = tokenizer(texts, padding='max_length', max_length=512, truncation=True)
word_ids = tokenized_inputs.word_ids()
previous_word_idx = None
label_ids = []
for word_idx in word_ids:
if word_idx is None:
label_ids.append(-100)
elif word_idx != previous_word_idx:
try:
label_ids.append(labels_to_ids[labels[word_idx]])
except:
label_ids.append(-100)
else:
try:
label_ids.append(labels_to_ids[labels[word_idx]] if label_all_tokens else -100)
except:
label_ids.append(-100)
previous_word_idx = word_idx
return label_ids
class DataSequence(torch.utils.data.Dataset):
def __init__(self, df):
lb = [i.split() for i in df['labels'].values.tolist()]
txt = df['text'].values.tolist()
self.texts = [tokenizer(str(i),
padding='max_length', max_length=512, truncation=True, return_tensors='pt') for i in txt]
self.labels = [align_label(i,j) for i,j in zip(txt, lb)]
def __len__(self):
return len(self.labels)
def get_batch_labels(self, idx):
return torch.LongTensor(self.labels[idx])
def __getitem__(self, idx):
batch_data = self.get_batch_data(idx)
batch_labels = self.get_batch_labels(idx)
return batch_data, batch_labels
df = df[0:1000]
df_train, df_val, df_test = np.split(df.sample(frac=1, random_state=42),
[int(.8 * len(df)), int(.9 * len(df))])
class BertModel(torch.nn.Module):
def __init__(self):
super(BertModel, self).__init__()
self.bert = BertForTokenClassification.from_pretrained('bert-base-cased', num_labels=len(unique_labels))
def forward(self, input_id, mask, label):
output = self.bert(input_ids=input_id, attention_mask=mask, labels=label, return_dict=False)
return output
def train_loop(model, df_train, df_val):
train_dataset = DataSequence(df_train)
val_dataset = DataSequence(df_val)
train_dataloader = DataLoader(train_dataset, num_workers=4, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, num_workers=4, batch_size=BATCH_SIZE)
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)
if use_cuda:
model = model.cuda()
best_acc = 0
best_loss = 1000
for epoch_num in range(EPOCHS):
total_acc_train = 0
total_loss_train = 0
model.train()
for train_data, train_label in tqdm(train_dataloader):
train_label = train_label.to(device)
mask = train_data['attention_mask'].squeeze(1).to(device)
input_id = train_data['input_ids'].squeeze(1).to(device)
optimizer.zero_grad()
loss, logits = model(input_id, mask, train_label)
for i in range(logits.shape[0]):
logits_clean = logits[i][train_label[i] != -100]
label_clean = train_label[i][train_label[i] != -100]
predictions = logits_clean.argmax(dim=1)
acc = (predictions == label_clean).float().mean()
total_acc_train += acc
total_loss_train += loss.item()
loss.backward()
optimizer.step()
model.eval()
total_acc_val = 0
total_loss_val = 0
for val_data, val_label in val_dataloader:
val_label = val_label.to(device)
mask = val_data['attention_mask'].squeeze(1).to(device)
input_id = val_data['input_ids'].squeeze(1).to(device)
loss, logits = model(input_id, mask, val_label)
for i in range(logits.shape[0]):
logits_clean = logits[i][val_label[i] != -100]
label_clean = val_label[i][val_label[i] != -100]
predictions = logits_clean.argmax(dim=1)
acc = (predictions == label_clean).float().mean()
total_acc_val += acc
total_loss_val += loss.item()
val_accuracy = total_acc_val / len(df_val)
val_loss = total_loss_val / len(df_val)
print(
f'Epochs: {epoch_num + 1} | Loss: {total_loss_train / len(df_train): .3f} | Accuracy: {total_acc_train / len(df_train): .3f} | Val_Loss: {total_loss_val / len(df_val): .3f} | Accuracy: {total_acc_val / len(df_val): .3f}')
LEARNING_RATE = 5e-3
EPOCHS = 5
BATCH_SIZE = 2
model = BertModel()
train_loop(model, df_train, df_val)
And the debugger says:
Exception has occurred: RuntimeError (note: full exception trace is shown but execution is paused at: <module>)
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
File "/Users/filipedonatti/Projects/pyCodes/second_try.py", line 141, in train_loop
for train_data, train_label in tqdm(train_dataloader):
File "/Users/filipedonatti/Projects/pyCodes/second_try.py", line 197, in <module>
train_loop(model, df_train, df_val)
File "<string>", line 1, in <module> (Current frame)
By the way,
Despite using Mac, I have downloaded Anaconda-Navigator, however I've been trying and executing this code on VS Code. I've downloaded numpy, torch, datasets and other libraries through Brew with the pip3 command.
I'm at a loss, I can run the code on a google collab notebook or Jupiter notebook, and I know training models and such in my humble Mac would not be advised, but I am just exercising this so I can train and use the model in a much more powerful machine.
Please help me with this issue, I've been trying to find a solution for days.
Peace and happy holidays.
I've tried solving the issue by writing:
if __name__ == '__main__':
freeze_support()
I've tried using this:
import parallelTestModule
extractor = parallelTestModule.ParallelExtractor()
extractor.runInParallel(numProcesses=2, numThreads=4)
So...
It turns out the correct way to solve this is to implement a function to train the loop as such:
def run():
model = BertModel()
torch.multiprocessing.freeze_support()
print('loop')
train_loop(model, df_train, df_val)
if __name__ == '__main__':
run()
Redefining that train_loop line in the end. Issue solved. For more see this link: https://github.com/pytorch/pytorch/issues/5858

Is it useless to use the Lock() in multiprocess Pool()? [duplicate]

I am having troubles with the multiprocessing module. I am using a Pool of workers with its map method to concurrently analyze lots of files. Each time a file has been processed I would like to have a counter updated so that I can keep track of how many files remains to be processed. Here is sample code:
import os
import multiprocessing
counter = 0
def analyze(file):
# Analyze the file.
global counter
counter += 1
print counter
if __name__ == '__main__':
files = os.listdir('/some/directory')
pool = multiprocessing.Pool(4)
pool.map(analyze, files)
I cannot find a solution for this.
The problem is that the counter variable is not shared between your processes: each separate process is creating it's own local instance and incrementing that.
See this section of the documentation for some techniques you can employ to share state between your processes. In your case you might want to share a Value instance between your workers
Here's a working version of your example (with some dummy input data). Note it uses global values which I would really try to avoid in practice:
from multiprocessing import Pool, Value
from time import sleep
counter = None
def init(args):
''' store the counter for later use '''
global counter
counter = args
def analyze_data(args):
''' increment the global counter, do something with the input '''
global counter
# += operation is not atomic, so we need to get a lock:
with counter.get_lock():
counter.value += 1
print counter.value
return args * 10
if __name__ == '__main__':
#inputs = os.listdir(some_directory)
#
# initialize a cross-process counter and the input lists
#
counter = Value('i', 0)
inputs = [1, 2, 3, 4]
#
# create the pool of workers, ensuring each one receives the counter
# as it starts.
#
p = Pool(initializer = init, initargs = (counter, ))
i = p.map_async(analyze_data, inputs, chunksize = 1)
i.wait()
print i.get()
Counter class without the race-condition bug:
class Counter(object):
def __init__(self):
self.val = multiprocessing.Value('i', 0)
def increment(self, n=1):
with self.val.get_lock():
self.val.value += n
#property
def value(self):
return self.val.value
A extremly simple example, changed from jkp's answer:
from multiprocessing import Pool, Value
from time import sleep
counter = Value('i', 0)
def f(x):
global counter
with counter.get_lock():
counter.value += 1
print("counter.value:", counter.value)
sleep(1)
return x
with Pool(4) as p:
r = p.map(f, range(1000*1000))
Faster Counter class without using the built-in lock of Value twice
class Counter(object):
def __init__(self, initval=0):
self.val = multiprocessing.RawValue('i', initval)
self.lock = multiprocessing.Lock()
def increment(self):
with self.lock:
self.val.value += 1
#property
def value(self):
return self.val.value
https://eli.thegreenplace.net/2012/01/04/shared-counter-with-pythons-multiprocessing
https://docs.python.org/2/library/multiprocessing.html#multiprocessing.sharedctypes.Value
https://docs.python.org/2/library/multiprocessing.html#multiprocessing.sharedctypes.RawValue
Here is a solution to your problem based on a different approach from that proposed in the other answers. It uses message passing with multiprocessing.Queue objects (instead of shared memory with multiprocessing.Value objects) and process-safe (atomic) built-in increment and decrement operators += and -= (instead of introducing custom increment and decrement methods) since you asked for it.
First, we define a class Subject for instantiating an object that will be local to the parent process and whose attributes are to be incremented or decremented:
import multiprocessing
class Subject:
def __init__(self):
self.x = 0
self.y = 0
Next, we define a class Proxy for instantiating an object that will be the remote proxy through which the child processes will request the parent process to retrieve or update the attributes of the Subject object. The interprocess communication will use two multiprocessing.Queue attributes, one for exchanging requests and one for exchanging responses. Requests are of the form (sender, action, *args) where sender is the sender name, action is the action name ('get', 'set', 'increment', or 'decrement' the value of an attribute), and args is the argument tuple. Responses are of the form value (to 'get' requests):
class Proxy(Subject):
def __init__(self, request_queue, response_queue):
self.__request_queue = request_queue
self.__response_queue = response_queue
def _getter(self, target):
sender = multiprocessing.current_process().name
self.__request_queue.put((sender, 'get', target))
return Decorator(self.__response_queue.get())
def _setter(self, target, value):
sender = multiprocessing.current_process().name
action = getattr(value, 'action', 'set')
self.__request_queue.put((sender, action, target, value))
#property
def x(self):
return self._getter('x')
#property
def y(self):
return self._getter('y')
#x.setter
def x(self, value):
self._setter('x', value)
#y.setter
def y(self, value):
self._setter('y', value)
Then, we define the class Decorator to decorate the int objects returned by the getters of a Proxy object in order to inform its setters whether the increment or decrement operators += and -= have been used by adding an action attribute, in which case the setters request an 'increment' or 'decrement' operation instead of a 'set' operation. The increment and decrement operators += and -= call the corresponding augmented assignment special methods __iadd__ and __isub__ if they are defined, and fall back on the assignment special methods __add__ and __sub__ which are always defined for int objects (e.g. proxy.x += value is equivalent to proxy.x = proxy.x.__iadd__(value) which is equivalent to proxy.x = type(proxy).x.__get__(proxy).__iadd__(value) which is equivalent to type(proxy).x.__set__(proxy, type(proxy).x.__get__(proxy).__iadd__(value))):
class Decorator(int):
def __iadd__(self, other):
value = Decorator(other)
value.action = 'increment'
return value
def __isub__(self, other):
value = Decorator(other)
value.action = 'decrement'
return value
Then, we define the function worker that will be run in the child processes and request the increment and decrement operations:
def worker(proxy):
proxy.x += 1
proxy.y -= 1
Finally, we define a single request queue to send requests to the parent process, and multiple response queues to send responses to the child processes:
if __name__ == '__main__':
subject = Subject()
request_queue = multiprocessing.Queue()
response_queues = {}
processes = []
for index in range(4):
sender = 'child {}'.format(index)
response_queues[sender] = multiprocessing.Queue()
proxy = Proxy(request_queue, response_queues[sender])
process = multiprocessing.Process(
target=worker, args=(proxy,), name=sender)
processes.append(process)
running = len(processes)
for process in processes:
process.start()
while subject.x != 4 or subject.y != -4:
sender, action, *args = request_queue.get()
print(sender, 'requested', action, *args)
if action == 'get':
response_queues[sender].put(getattr(subject, args[0]))
elif action == 'set':
setattr(subject, args[0], args[1])
elif action == 'increment':
setattr(subject, args[0], getattr(subject, args[0]) + args[1])
elif action == 'decrement':
setattr(subject, args[0], getattr(subject, args[0]) - args[1])
for process in processes:
process.join()
The program is guaranteed to terminate when += and -= are process-safe. If you remove process-safety by commenting the corresponding __iadd__ or __isub__ of Decorator then the program will only terminate by chance (e.g. proxy.x += value is equivalent to proxy.x = proxy.x.__iadd__(value) but falls back to proxy.x = proxy.x.__add__(value) if __iadd__ is not defined, which is equivalent to proxy.x = proxy.x + value which is equivalent to proxy.x = type(proxy).x.__get__(proxy) + value which is equivalent to type(proxy).x.__set__(proxy, type(proxy).x.__get__(proxy) + value), so the action attribute is not added and the setter requests a 'set' operation instead of an 'increment' operation).
Example process-safe session (atomic += and -=):
child 0 requested get x
child 0 requested increment x 1
child 0 requested get y
child 0 requested decrement y 1
child 3 requested get x
child 3 requested increment x 1
child 3 requested get y
child 2 requested get x
child 3 requested decrement y 1
child 1 requested get x
child 2 requested increment x 1
child 2 requested get y
child 2 requested decrement y 1
child 1 requested increment x 1
child 1 requested get y
child 1 requested decrement y 1
Example process-unsafe session (non-atomic += and -=):
child 2 requested get x
child 1 requested get x
child 0 requested get x
child 2 requested set x 1
child 2 requested get y
child 1 requested set x 1
child 1 requested get y
child 2 requested set y -1
child 1 requested set y -1
child 0 requested set x 1
child 0 requested get y
child 0 requested set y -2
child 3 requested get x
child 3 requested set x 2
child 3 requested get y
child 3 requested set y -3 # the program stalls here
A more sophisticated solution based on the lock-free atomic operations, as given by example on atomics library README:
from multiprocessing import Process, shared_memory
import atomics
def fn(shmem_name: str, width: int, n: int) -> None:
shmem = shared_memory.SharedMemory(name=shmem_name)
buf = shmem.buf[:width]
with atomics.atomicview(buffer=buf, atype=atomics.INT) as a:
for _ in range(n):
a.inc()
del buf
shmem.close()
if __name__ == "__main__":
# setup
width = 4
shmem = shared_memory.SharedMemory(create=True, size=width)
buf = shmem.buf[:width]
total = 10_000
# run processes to completion
p1 = Process(target=fn, args=(shmem.name, width, total // 2))
p2 = Process(target=fn, args=(shmem.name, width, total // 2))
p1.start(), p2.start()
p1.join(), p2.join()
# print results and cleanup
with atomics.atomicview(buffer=buf, atype=atomics.INT) as a:
print(f"a[{a.load()}] == total[{total}]")
del buf
shmem.close()
shmem.unlink()
(atomics could be installed via pip install atomics on most of the major platforms)
This is a different solution and the simplest to my taste.
The reasoning is you create an empty list and append to it each time your function executes , then print len(list) to check progress.
Here is an example based on your code :
import os
import multiprocessing
counter = []
def analyze(file):
# Analyze the file.
counter.append(' ')
print len(counter)
if __name__ == '__main__':
files = os.listdir('/some/directory')
pool = multiprocessing.Pool(4)
pool.map(analyze, files)
For future visitors, the hack to add counter to multiprocessing is as follow :
from multiprocessing.pool import ThreadPool
counter = []
def your_function():
# function/process
counter.append(' ') # you can append anything
return len(counter)
pool = ThreadPool()
result = pool.map(get_data, urls)
Hope this will help.
I'm working on a process bar in PyQT5, so I use thread and pool together
import threading
import multiprocessing as mp
from queue import Queue
def multi(x):
return x*x
def pooler(q):
with mp.Pool() as pool:
count = 0
for i in pool.imap_unordered(ggg, range(100)):
print(count, i)
count += 1
q.put(count)
def main():
q = Queue()
t = threading.Thread(target=thr, args=(q,))
t.start()
print('start')
process = 0
while process < 100:
process = q.get()
print('p',process)
if __name__ == '__main__':
main()
this I put in Qthread worker and it works with acceptable latency

Loading test data using batch Tensorflow

The following code is my pipeline for reading images and labels from files:
import tensorflow as tf
import numpy as np
import tflearn.data_utils
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
import sys
#process labels in the input file
def process_label(label):
info=np.zeros(6)
...
return info
def read_label_file(file):
f = open(file, "r")
filepaths = []
labels = []
lines = []
for line in f:
tokens = line.split(",")
filepaths.append([tokens[0],tokens[1],tokens[2]])
labels.append(process_label(tokens[3:]))
lines.append(line)
return filepaths, np.vstack(labels), lines
def get_data_batches(params):
# reading labels and file path
train_filepaths, train_labels, train_line = read_label_file(params.train_info)
test_filepaths, test_labels, test_line = read_label_file(params.test_info)
# convert string into tensors
train_images = ops.convert_to_tensor(train_filepaths)
train_labels = ops.convert_to_tensor(train_labels)
train_line = ops.convert_to_tensor(train_line)
test_images = ops.convert_to_tensor(test_filepaths)
test_labels = ops.convert_to_tensor(test_labels)
test_line = ops.convert_to_tensor(test_line)
# create input queues
train_input_queue = tf.train.slice_input_producer([train_images, train_labels, train_line], shuffle=params.shuffle)
test_input_queue = tf.train.slice_input_producer([test_images, test_labels, test_line],shuffle=False)
# process path and string tensor into an image and a label
train_image=None
for i in range(train_input_queue[0].get_shape()[0]):
file_content = tf.read_file(params.path_prefix+train_input_queue[0][i])
train_imageT = (tf.to_float(tf.image.decode_jpeg(file_content, channels=params.num_channels)))*(1.0/255)
train_imageT = tf.image.resize_images(train_imageT,[params.load_size[0],params.load_size[1]])
train_imageT = tf.random_crop(train_imageT,size=[params.crop_size[0],params.crop_size[1],params.num_channels])
train_imageT = tf.image.random_flip_up_down(train_imageT)
train_imageT = tf.image.per_image_standardization(train_imageT)
if(i==0):
train_image = train_imageT
else:
train_image = tf.concat([train_image, train_imageT], 2)
train_label = train_input_queue[1]
train_lineInfo = train_input_queue[2]
test_image=None
for i in range(test_input_queue[0].get_shape()[0]):
file_content = tf.read_file(params.path_prefix+test_input_queue[0][i])
test_imageT = tf.to_float(tf.image.decode_jpeg(file_content, channels=params.num_channels))*(1.0/255)
test_imageT = tf.image.resize_images(test_imageT,[params.load_size[0],params.load_size[1]])
test_imageT = tf.image.central_crop(test_imageT, (params.crop_size[0]+0.0)/params.load_size[0])
test_imageT = tf.image.per_image_standardization(test_imageT)
if(i==0):
test_image = test_imageT
else:
test_image = tf.concat([test_image, test_imageT],2)
test_label = test_input_queue[1]
test_lineInfo = test_input_queue[2]
# define tensor shape
train_image.set_shape([params.crop_size[0], params.crop_size[1], params.num_channels*3])
train_label.set_shape([66])
test_image.set_shape( [params.crop_size[0], params.crop_size[1], params.num_channels*3])
test_label.set_shape([66])
# collect batches of images before processing
train_image_batch, train_label_batch, train_lineno = tf.train.batch([train_image, train_label, train_lineInfo],batch_size=params.batch_size,num_threads=params.num_threads,allow_smaller_final_batch=True)
test_image_batch, test_label_batch, test_lineno = tf.train.batch([test_image, test_label, test_lineInfo],batch_size=params.test_size,num_threads=params.num_threads,allow_smaller_final_batch=True)
if(params.loadSlice=='all'):
return train_image_batch, train_label_batch, train_lineno, test_image_batch, test_label_batch, test_lineno
elif params.loadSlice=='train':
return train_image_batch, train_label_batch
elif params.loadSlice=='test':
return test_image_batch, test_label_batch
elif params.loadSlice=='train_info':
return train_image_batch, train_label_batch, train_lineno
elif params.loadSlice=='test_info':
return test_image_batch, test_label_batch, test_lineno
else:
return train_image_batch, train_label_batch, test_image_batch, test_label_batch
I want to use the same pipeline for loading the test data. The size of my test data is huge and I cannot load all of them at once.
I have 20453 test examples which is not an integer multiply of the batch size (here 512).
How can I read all of my test examples via this pipeline one and only one time and then measure the performance on them?
Currently, I am using this code for batching my test data and it does not work. It always read a full batch from the queue even when I set allow_smaller_final_batch to True
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.restore(sess,"checkpoints2/snapshot-16")
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
more = True
num_examples=0
while(more):
img_test, lbl_test, lbl_line=sess.run([test_image_batch,test_label_batch,test_lineno])
print(lbl_test.shape)
size=lbl_test.shape[0]
num_examples += size
if size<args.batch_size:
more = False
sess.close()
This is the code of my model:
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.normalization import batch_normalization
from tflearn.layers.estimator import regression
from tflearn.activations import relu
def get_alexnet(x,num_output):
network = conv_2d(x, 64, 11, strides=4)
network = batch_normalization(network,epsilon=0.001)
network = relu (network)
network = max_pool_2d(network, 3, strides=2)
network = conv_2d(network, 192, 5)
network = batch_normalization(network,epsilon=0.001)
network = relu(network)
network = max_pool_2d(network, 3, strides=2)
network = conv_2d(network, 384, 3)
network = batch_normalization(network,epsilon=0.0001)
network = relu(network)
network = conv_2d(network, 256, 3)
network = batch_normalization(network,epsilon=0.001)
network = relu(network)
network = conv_2d(network, 256, 3)
network = batch_normalization(network,epsilon=0.001)
network = relu(network)
network = max_pool_2d(network, 3, strides=2)
network = fully_connected(network, 4096)
network = batch_normalization(network,epsilon=0.001)
network = relu(network)
network = dropout(network, 0.5)
network = fully_connected(network, 4096)
network = batch_normalization(network,epsilon=0.001)
network = relu(network)
network = dropout(network, 0.5)
network1 = fully_connected(network, num_output)
network2 = fully_connected(network, 12)
network3 = fully_connected(network,6)
return network1,network2,network3
This simply could be achieved by setting num_epochs=1 and allow_smaller_final_batch= True!
One solution is set batch_size=size of test set

How do I move Windows desktop icons using python 2.7?

I am trying to write a python routine to save and restore desktop icon positions. I am using 32-bit python 2.7 on Windows 7 x64. Using information from here (stack exchange), I am able to read the icon names and positions from the foreign process list view that Windows uses to store this info, but I fail when using LVM_SETITEMPOSITION to set new positions (or restore the positions). All of the icons end up in the same exact spot on the desktop. 'Auto arrange' and 'align to grid' are off. The relevant code is towards the bottom. WARNING: if you run this code all of your icons will be in a pile :(
import ctypes
class LVITEMW(ctypes.Structure):
_fields_ = [
('mask', ctypes.c_uint32),
('iItem', ctypes.c_int32),
('iSubItem', ctypes.c_int32),
('state', ctypes.c_uint32),
('stateMask', ctypes.c_uint32),
('pszText', ctypes.c_uint64),
('cchTextMax', ctypes.c_int32),
('iImage', ctypes.c_int32),
('lParam', ctypes.c_uint64), # On 32 bit should be c_long
('iIndent',ctypes.c_int32),
('iGroupId', ctypes.c_int32),
('cColumns', ctypes.c_uint32),
('puColumns', ctypes.c_uint64),
('piColFmt', ctypes.c_int64),
('iGroup', ctypes.c_int32),
]
class POINT(ctypes.Structure):
_fields_ = [('x', ctypes.c_int), ('y', ctypes.c_int)]
def icon_save_restore(savedicons=None, restore=False):
import struct, commctrl, win32gui, win32con, win32api
dthwnd = win32gui.FindWindow(None, 'Program Manager')
ukhwnd = win32gui.GetWindow(dthwnd, win32con.GW_CHILD)
slvhwnd = win32gui.GetWindow(ukhwnd, win32con.GW_CHILD)
pid = ctypes.create_string_buffer(4)
p_pid = ctypes.addressof(pid)
ctypes.windll.user32.GetWindowThreadProcessId(slvhwnd, p_pid)
hProcHnd = ctypes.windll.kernel32.OpenProcess(win32con.PROCESS_ALL_ACCESS, False, struct.unpack("i",pid)[0])
pBuffertxt = ctypes.windll.kernel32.VirtualAllocEx(hProcHnd, 0, 4096, win32con.MEM_RESERVE|win32con.MEM_COMMIT, win32con.PAGE_READWRITE)
copied = ctypes.create_string_buffer(4)
p_copied = ctypes.addressof(copied)
lvitem = LVITEMW()
lvitem.mask = ctypes.c_uint32(commctrl.LVIF_TEXT)
lvitem.pszText = ctypes.c_uint64(pBuffertxt)
lvitem.cchTextMax = ctypes.c_int32(4096)
lvitem.iSubItem = ctypes.c_int32(0)
pLVI = ctypes.windll.kernel32.VirtualAllocEx(hProcHnd, 0, 4096, win32con.MEM_RESERVE| win32con.MEM_COMMIT, win32con.PAGE_READWRITE)
win32api.SetLastError(0)
ctypes.windll.kernel32.WriteProcessMemory(hProcHnd, pLVI, ctypes.addressof(lvitem), ctypes.sizeof(lvitem), p_copied)
num_items = win32gui.SendMessage(slvhwnd, commctrl.LVM_GETITEMCOUNT)
if restore is False:
p = POINT()
pBufferpnt = ctypes.windll.kernel32.VirtualAllocEx(hProcHnd, 0, ctypes.sizeof(p), win32con.MEM_RESERVE|win32con.MEM_COMMIT, win32con.PAGE_READWRITE)
icons = {}
for i in xrange(num_items):
# Get icon text
win32gui.SendMessage(slvhwnd, commctrl.LVM_GETITEMTEXT, i, pLVI)
target_bufftxt = ctypes.create_string_buffer(4096)
ctypes.windll.kernel32.ReadProcessMemory(hProcHnd, pBuffertxt, ctypes.addressof(target_bufftxt), 4096, p_copied)
key = target_bufftxt.value
# Get icon position
win32api.SendMessage(slvhwnd, commctrl.LVM_GETITEMPOSITION, i, pBufferpnt)
p = POINT()
ctypes.windll.kernel32.ReadProcessMemory(hProcHnd, pBufferpnt, ctypes.addressof(p), ctypes.sizeof(p), p_copied)
icons[key] = (i,p)
ctypes.windll.kernel32.VirtualFreeEx(hProcHnd, pLVI, 0, win32con.MEM_RELEASE)
ctypes.windll.kernel32.VirtualFreeEx(hProcHnd, pBuffertxt, 0, win32con.MEM_RELEASE)
ctypes.windll.kernel32.VirtualFreeEx(hProcHnd, pBufferpnt, 0, win32con.MEM_RELEASE)
win32api.CloseHandle(hProcHnd)
return icons
else: # RESTORE ICON POSITIONS PROBLEM IS HERE SOMEWHERE!!!
win32gui.SendMessage(slvhwnd, win32con.WM_SETREDRAW, 0, 0)
for i in xrange(num_items):
# Get icon text
win32gui.SendMessage(slvhwnd, commctrl.LVM_GETITEMTEXT, i, pLVI)
target_bufftxt = ctypes.create_string_buffer(4096)
ctypes.windll.kernel32.ReadProcessMemory(hProcHnd, pBuffertxt, ctypes.addressof(target_bufftxt), 4096, p_copied)
key = target_bufftxt.value
if key in savedicons.keys():
# Set icon position
p = savedicons[key][1] # p is ctypes POINT
p_lng = point_to_long(p) # explicitly convert to HIWORD/LOWORD and c_long
# Reserve space for input variable in foreign process and get pointer to it the that memory space
pBufferpnt = ctypes.windll.kernel32.VirtualAllocEx(hProcHnd, 0, ctypes.sizeof(p_lng), win32con.MEM_RESERVE|win32con.MEM_COMMIT, win32con.PAGE_READWRITE)
# Write the desired coordinates in to the space just created
ret = ctypes.windll.kernel32.WriteProcessMemory(hProcHnd, pBufferpnt, ctypes.addressof(p_lng), ctypes.sizeof(p_lng), p_copied)
if ret == 0:
raise WindowsError
# Send the message to change the position for that item's index and the pointer to the new position
ret = win32gui.SendMessage(slvhwnd, commctrl.LVM_SETITEMPOSITION, i, pBufferpnt)
if ret == 0:
raise WindowsError
# Release the reserved memory for the variable (I recognize that I probably don't need to aLloc/free this within the loop)
ctypes.windll.kernel32.VirtualFreeEx(hProcHnd, pBufferpnt, 0, win32con.MEM_RELEASE)
win32gui.SendMessage(slvhwnd, win32con.WM_SETREDRAW, 1, 0)
ctypes.windll.kernel32.VirtualFreeEx(hProcHnd, pLVI, 0, win32con.MEM_RELEASE)
ctypes.windll.kernel32.VirtualFreeEx(hProcHnd, pBuffertxt, 0, win32con.MEM_RELEASE)
win32api.CloseHandle(hProcHnd)
return None
def point_to_long(p):
ret = (p.y * 0x10000) + (p.x & 0xFFFF)
return ctypes.c_long(ret)
if __name__ == '__main__':
mysavedicons = icon_save_restore(restore=False)
icon_save_restore(mysavedicons, restore=True)
I think there may be a problem with either 1) something to do with 32 bit and 64 bit memory address space, but the other components where I write the LVITEM structure or read the icon text work ok or 2) there is some issue in the way I am converting the coordinate information or calling SendMessage for GETITEMPOSITION. Any insight or help would be greatly appreciated.
Turns out that there is version that uses 32-bit addresses (LVM_SETITEMPOSITION32) that I wish MSDN would have cross-referenced in their documentation: https://msdn.microsoft.com/en-us/library/windows/desktop/bb761194(v=vs.85).aspx
This accepts the POINT structure directly so there is no need to try to convert to HIWORD/LOWORD. Before posting, I did indeed try using a 32 bit shift and longlong (64 bit version of long) and that didn't work either with LVM_SETITEMPOSITION. In any case, with the change, everything works as expected.

Graphite / Carbon / Ceres node overlap

I'm working with Graphite monitoring using Carbon and Ceres as the storage method. I have some problems with correcting bad data. It seems that (due to various problems) I've ended up with overlapping files. That is, since Carbon / Ceres stores the data as timestamp#interval.slice, I can have two or more files with overlapping time ranges.
There are two kinds of overlaps:
File A: +------------+ orig file
File B: +-----+ subset
File C: +---------+ overlap
This is causing problems because the existing tools available (ceres-maintenance defrag and rollup) don't cope with these overlaps. Instead, they skip the directory and move on. This is a problem, obviously.
I've created a script that fixes this problem, as follows:
For subsets, just delete the subset file.
For overlaps, using the file system 'truncate' on the orig file at the point where the next file starts. While it is possible to cut off the start of the overlap file and rename it properly, I would suggest that this is fraught with danger.
I've found that it's possible to do this in two ways:
Walk the dirs and iterate over the files, fixing as you go, and find the file subsets, remove them;
Walk the dirs and fix all the problems in a dir before moving on. This is BY FAR the faster approach, since the dir walk is hugely time consuming.
Code:
#!/usr/bin/env python2.6
################################################################################
import io
import os
import time
import sys
import string
import logging
import unittest
import datetime
import random
import zmq
import json
import socket
import traceback
import signal
import select
import simplejson
import cPickle as pickle
import re
import shutil
import collections
from pymongo import Connection
from optparse import OptionParser
from pprint import pprint, pformat
################################################################################
class SliceFile(object):
def __init__(self, fname):
self.name = fname
basename = fname.split('/')[-1]
fnArray = basename.split('#')
self.timeStart = int(fnArray[0])
self.freq = int(fnArray[1].split('.')[0])
self.size = None
self.numPoints = None
self.timeEnd = None
self.deleted = False
def __repr__(self):
out = "Name: %s, tstart=%s tEnd=%s, freq=%s, size=%s, npoints=%s." % (
self.name, self.timeStart, self.timeEnd, self.freq, self.size, self.numPoints)
return out
def setVars(self):
self.size = os.path.getsize(self.name)
self.numPoints = int(self.size / 8)
self.timeEnd = self.timeStart + (self.numPoints * self.freq)
################################################################################
class CeresOverlapFixup(object):
def __del__(self):
import datetime
self.writeLog("Ending at %s" % (str(datetime.datetime.today())))
self.LOGFILE.flush()
self.LOGFILE.close()
def __init__(self):
self.verbose = False
self.debug = False
self.LOGFILE = open("ceresOverlapFixup.log", "a")
self.badFilesList = set()
self.truncated = 0
self.subsets = 0
self.dirsExamined = 0
self.lastStatusTime = 0
def getOptionParser(self):
return OptionParser()
def getOptions(self):
parser = self.getOptionParser()
parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False, help="debug mode for this program, writes debug messages to logfile." )
parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="verbose mode for this program, prints a lot to stdout." )
parser.add_option("-b", "--basedir", action="store", type="string", dest="basedir", default=None, help="base directory location to start converting." )
(options, args) = parser.parse_args()
self.debug = options.debug
self.verbose = options.verbose
self.basedir = options.basedir
assert self.basedir, "must provide base directory."
# Examples:
# ./updateOperations/1346805360#60.slice
# ./updateOperations/1349556660#60.slice
# ./updateOperations/1346798040#60.slice
def getFileData(self, inFilename):
ret = SliceFile(inFilename)
ret.setVars()
return ret
def removeFile(self, inFilename):
os.remove(inFilename)
#self.writeLog("removing file: %s" % (inFilename))
self.subsets += 1
def truncateFile(self, fname, newSize):
if self.verbose:
self.writeLog("Truncating file, name=%s, newsize=%s" % (pformat(fname), pformat(newSize)))
IFD = None
try:
IFD = os.open(fname, os.O_RDWR|os.O_CREAT)
os.ftruncate(IFD, newSize)
os.close(IFD)
self.truncated += 1
except:
self.writeLog("Exception during truncate: %s" % (traceback.format_exc()))
try:
os.close(IFD)
except:
pass
return
def printStatus(self):
now = self.getNowTime()
if ((now - self.lastStatusTime) > 10):
self.writeLog("Status: time=%d, Walked %s dirs, subsetFilesRemoved=%s, truncated %s files." % (now, self.dirsExamined, self.subsets, self.truncated))
self.lastStatusTime = now
def fixupThisDir(self, inPath, inFiles):
# self.writeLog("Fixing files in dir: %s" % (inPath))
if not '.ceres-node' in inFiles:
# self.writeLog("--> Not a slice directory, skipping.")
return
self.dirsExamined += 1
sortedFiles = sorted(inFiles)
sortedFiles = [x for x in sortedFiles if ((x != '.ceres-node') and (x.count('#') > 0)) ]
lastFile = None
fileObjList = []
for thisFile in sortedFiles:
wholeFilename = os.path.join(inPath, thisFile)
try:
curFile = self.getFileData(wholeFilename)
fileObjList.append(curFile)
except:
self.badFilesList.add(wholeFilename)
self.writeLog("ERROR: file %s, %s" % (wholeFilename, traceback.format_exc()))
# name is timeStart, really.
fileObjList = sorted(fileObjList, key=lambda thisObj: thisObj.name)
while fileObjList:
self.printStatus()
changes = False
firstFile = fileObjList[0]
removedFiles = []
for curFile in fileObjList[1:]:
if (curFile.timeEnd <= firstFile.timeEnd):
# have subset file. elim.
self.removeFile(curFile.name)
removedFiles.append(curFile.name)
self.subsets += 1
changes = True
if self.verbose:
self.writeLog("Subset file situation. First=%s, overlap=%s" % (firstFile, curFile))
fileObjList = [x for x in fileObjList if x.name not in removedFiles]
if (len(fileObjList) < 2):
break
secondFile = fileObjList[1]
# LT is right. FirstFile's timeEnd is always the first open time after first is done.
# so, first starts#100, len=2, end=102, positions used=100,101. second start#102 == OK.
if (secondFile.timeStart < firstFile.timeEnd):
# truncate first file.
# file_A (last): +---------+
# file_B (curr): +----------+
# solve by truncating previous file at startpoint of current file.
newLenFile_A_seconds = int(secondFile.timeStart - firstFile.timeStart)
newFile_A_datapoints = int(newLenFile_A_seconds / firstFile.freq)
newFile_A_bytes = int(newFile_A_datapoints) * 8
if (not newFile_A_bytes):
fileObjList = fileObjList[1:]
continue
assert newFile_A_bytes, "Must have size. newLenFile_A_seconds=%s, newFile_A_datapoints=%s, newFile_A_bytes=%s." % (newLenFile_A_seconds, newFile_A_datapoints, newFile_A_bytes)
self.truncateFile(firstFile.name, newFile_A_bytes)
if self.verbose:
self.writeLog("Truncate situation. First=%s, overlap=%s" % (firstFile, secondFile))
self.truncated += 1
fileObjList = fileObjList[1:]
changes = True
if not changes:
fileObjList = fileObjList[1:]
def getNowTime(self):
return time.time()
def walkDirStructure(self):
startTime = self.getNowTime()
self.lastStatusTime = startTime
updateStatsDict = {}
self.okayFiles = 0
emptyFiles = 0
for (thisPath, theseDirs, theseFiles) in os.walk(self.basedir):
self.printStatus()
self.fixupThisDir(thisPath, theseFiles)
self.dirsExamined += 1
endTime = time.time()
# time.sleep(11)
self.printStatus()
self.writeLog( "now = %s, started at %s, elapsed time = %s seconds." % (startTime, endTime, endTime - startTime))
self.writeLog( "Done.")
def writeLog(self, instring):
print instring
print >> self.LOGFILE, instring
self.LOGFILE.flush()
def main(self):
self.getOptions()
self.walkDirStructure()

Resources