How do I get results from the blocking function? - python-asyncio

import asyncio
import time
def abab():
for i in range(10):
print(i)
time.sleep(1)
return 10000
async def ab():
while True:
print(111)
time.sleep(1)
await asyncio.sleep(0.01)
async def main():
abc = asyncio.create_task(ab())
loop = asyncio.get_event_loop()
a = loop.run_in_executor(None, abab)
await abc
asyncio.run(main())
while print(111) of ab() function
After 10 seconds, I want to get abab result 10000.
abab function must be blocking function. not a async function
this example print like this
0 111 1 111 2 111 3 111 4 111 5 111 6 111 7 111 8 111 9 111 111 111 111...
but i want to print 10000 when return result
0 111 1 111 2 111 3 111 4 111 5 111 6 111 7 111 8 111 9 111 10000 111 111 111...
This is actually an example, but In the actual code, abab is a blocking function and cannot be modified. I can't think of a solution to this. I need help.

If you await the result of run_in_executor from somewhere, you will receive the return value of the blocking function it executed. Since you already use create_task to run ab() in the background, nothing stops you from just awaiting the call to run_in_executor to pick up the result of abab():
async def main():
# spawn ab() in the background
abc = asyncio.create_task(ab())
loop = asyncio.get_event_loop()
# wait for abab() to finish, allowing ab() to run
abab_result = await loop.run_in_executor(None, abab)
# print (or otherwise process) the result of abab
print(abab_result)
# continue waiting for ab()
await abc
With this change, the output from the program matches the one you requested.

Related

Count number of connections in each network using DAX

I have a dataset like this in Power BI with connections between "Participant ID" Column and "Knows Participant":
Participant ID
Knows Participant
111
353
111
777
111
112
111
249
112
143
112
144
113
111
113
244
114
NaN
115
113
...
...
777
111
777
398
777
114
778
NaN
779
112
3499
NaN
I've build Network chart. However, there are a lot of 1-1 connections that are not very useful for visualization, so I want to exclude them (see image):
Is it possible to count a number of connections in each network using DAX and then use this value to filter out all nodes with only 1 connection (red circled)? Or maybe filter out 1 connection nodes using another approach?
I've tried to make a calculated column using DAX:
Connection Column = COUNTROWS(
FILTER(Table,
EARLIER(Table[Knows Participant])=Table[Knows Participant])
)
However, it only shows duplicate values in "Knows Participant" Column, but not number of connections in each network.
Example of desired output:
Participant ID
Knows Participant
Number of Connections in the Network
111
353
4
353
444
4
444
551
4
551
987
4
112
143
1
220
190
1
333
337
2
337
410
2
765
0
You need the PATH functions as you're essentially trying to flatten a hierarchy and then exclude certain parts of it. The following help page gives a good rundown of the approach to take.
https://learn.microsoft.com/en-us/dax/understanding-functions-for-parent-child-hierarchies-in-dax
You can add a column to the table with a measure like this:
VAR pIdLinksCount = CALCULATE(COUNTROWS(tbl), ALL('tbl'[Knows Participant]))
VAR neighbourLinksCount =
IF(
pIdLinksCount=1
, -- if pIdLinksCount=1 then count neighbour links
VAR neighbourId =
CALCULATETABLE(
Values('tbl'[Knows Participant])
)
RETURN
CALCULATE(
COUNTROWS(tbl)
,ALL() -- removes all filters from data model
,'tbl'[Participant ID] = neighbourId -- applies filter to [Participant ID] column
--,'tbl'[Participant ID] IN neighbourId -- alternatively try this. I believe it is not necessary
)
,2 -- returns 2 if pIdLinksCount>1.
-- The "value = 2" will return "result > 3 = TRUE()"
)
VAR result = pIdLinksCount + neighbourLinksCount
RETURN
IF(
result>2
,1
,0
)
The idea is to check a neighbor too - if it has more then 1 link

How to get the shap values for the masked language modeling task using transformer?

I am trying to get the shap values for the masked language modeling task using transformer. I get the error KeyError: 'label' for the code where I input a single data sample to get the explanation. My complete code and error trace are as follows:
import transformers
import shap
from transformers import RobertaTokenizer, RobertaForMaskedLM, pipeline
import torch
model = RobertaForMaskedLM.from_pretrained('microsoft/codebert-base-mlm')
tokenizer = RobertaTokenizer.from_pretrained('microsoft/codebert-base-mlm')
code_example = "if (x <mask> 10)"
fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)
explainer = shap.Explainer(fill_mask)
shap_values = explainer(['x {tokenizer.mask_token} 10'])
Following is the error trace
KeyError Traceback (most recent call last)
[<ipython-input-12-bb3832d1772d>](https://localhost:8080/#) in <module>
6 # explain the model on two sample inputs
7 explainer = shap.Explainer(fill_mask)
----> 8 shap_values = explainer(['x {tokenizer.mask_token} 10'])
9 print(shap_values)
10 # visualize the first prediction's explanation for the POSITIVE output class
5 frames
[/usr/local/lib/python3.7/dist-packages/shap/explainers/_partition.py](https://localhost:8080/#) in __call__(self, max_evals, fixed_context, main_effects, error_bounds, batch_size, outputs, silent, *args)
136 return super().__call__(
137 *args, max_evals=max_evals, fixed_context=fixed_context, main_effects=main_effects, error_bounds=error_bounds, batch_size=batch_size,
--> 138 outputs=outputs, silent=silent
139 )
140
[/usr/local/lib/python3.7/dist-packages/shap/explainers/_explainer.py](https://localhost:8080/#) in __call__(self, max_evals, main_effects, error_bounds, batch_size, outputs, silent, *args, **kwargs)
266 row_result = self.explain_row(
267 *row_args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds,
--> 268 batch_size=batch_size, outputs=outputs, silent=silent, **kwargs
269 )
270 values.append(row_result.get("values", None))
[/usr/local/lib/python3.7/dist-packages/shap/explainers/_partition.py](https://localhost:8080/#) in explain_row(self, max_evals, main_effects, error_bounds, batch_size, outputs, silent, fixed_context, *row_args)
159 # if not fixed background or no base value assigned then compute base value for a row
160 if self._curr_base_value is None or not getattr(self.masker, "fixed_background", False):
--> 161 self._curr_base_value = fm(m00.reshape(1, -1), zero_index=0)[0] # the zero index param tells the masked model what the baseline is
162 f11 = fm(~m00.reshape(1, -1))[0]
163
[/usr/local/lib/python3.7/dist-packages/shap/utils/_masked_model.py](https://localhost:8080/#) in __call__(self, masks, zero_index, batch_size)
65
66 else:
---> 67 return self._full_masking_call(masks, batch_size=batch_size)
68
69 def _full_masking_call(self, masks, zero_index=None, batch_size=None):
[/usr/local/lib/python3.7/dist-packages/shap/utils/_masked_model.py](https://localhost:8080/#) in _full_masking_call(self, masks, zero_index, batch_size)
142
143 joined_masked_inputs = tuple([np.concatenate(v) for v in all_masked_inputs])
--> 144 outputs = self.model(*joined_masked_inputs)
145 _assert_output_input_match(joined_masked_inputs, outputs)
146 all_outputs.append(outputs)
[/usr/local/lib/python3.7/dist-packages/shap/models/_transformers_pipeline.py](https://localhost:8080/#) in __call__(self, strings)
33 val = [val]
34 for obj in val:
---> 35 output[i, self.label2id[obj["label"]]] = sp.special.logit(obj["score"]) if self.rescale_to_logits else obj["score"]
36 return output
KeyError: 'label'

Cannot run pool.amap() twice in pathos.multiprocessing

I want to parallelize simulation of multiple agents. Because I want my results to be instances of a class, to avoid issues with serialization I use pathos.multiprocessing instead of multiprocessing.
I do it like this:
import pathos.multiprocessing as mp
sim_agent(T): #simulate single agent for T periods
ag = agent()
for t in range(T):
ag.step()
return ag
def simulate_parallel(N, T):
if __name__ == '__main__':
pool = mp.ProcessPool()
results = pool.amap(sim_agent, [T]*N)
agents = results.get()
return agents
I can run simulate_parallel once. When I do it again, I get an error:
--> 142 agents = results.get()
~/anaconda3/anaconda3/lib/python3.7/site-packages/multiprocess/pool.py in get(self, timeout)
655 return self._value
656 else:
--> 657 raise self._value
658
659 def _set(self, i, obj):
~/anaconda3/anaconda3/lib/python3.7/site-packages/multiprocess/pool.py in _handle_tasks(taskqueue, put, outqueue, pool, cache)
429 break
430 try:
--> 431 put(task)
432 except Exception as e:
433 job, idx = task[:2]
~/anaconda3/anaconda3/lib/python3.7/site-packages/multiprocess/connection.py in send(self, obj)
207 self._check_closed()
208 self._check_writable()
--> 209 self._send_bytes(_ForkingPickler.dumps(obj))
210
211 def recv_bytes(self, maxlength=None):
~/anaconda3/anaconda3/lib/python3.7/site-packages/multiprocess/connection.py in _send_bytes(self, buf)
394 n = len(buf)
395 # For wire compatibility with 3.2 and lower
--> 396 header = struct.pack("!i", n)
397 if n > 16384:
398 # The payload is large so Nagle's algorithm won't be triggered
error: 'i' format requires -2147483648 <= number <= 2147483647
I also get the same error when I try to use pool.amap() in other functions. Even when I make N and T small, it is still the same behavior: I can run simulate_parallel once and cannot do it for the second time. Why could that be? Thanks!

AWS Elasticsearch TransportError(413, '')

We are using the python elasticsearch API version "elasticsearch-6.8.0"
With AWS Elasticsearch version 6.7
It only happens sometimes, on some machines (even when we make the queries within seconds of each other from different machines, one will succeed and the other will throw the error). Thus this is difficult to reproduce. We are using a paginated search like this:
def scrolled_search_query(self, index, query, size=es_config.ES_SIZE,
scroll=es_config.ES_SCROLL,
request_timeout=es_config.ES_TIMEOUT,
docs_only=False):
data = self.es_conn.search(index=index, size=size, scroll=scroll, body=query,
request_timeout=request_timeout)
while len(data['hits']['hits']) > 0:
for hit in data['hits']['hits']:
if docs_only:
yield hit['_source']
else:
yield hit
data = self.es_conn.scroll(scroll_id=data['_scroll_id'], scroll=scroll,
request_timeout=request_timeout)
The error message is utterly useless, just basically saying that there's a 413. like this:
TransportError: TransportError(413, '')
From looking around at forums, it seems the problem is because ES is attempting to return a large payload. However, since we're using scroll pagination, I don't know why this would be an issue because each payload should be well below the 100MB limit. Since the error message is so terrible, I can't really see what it's trying to do in the database. It seems like the database is calculating the total data for the query and rejecting it without taking into account pagination. It's normal for us to request large payloads, and paginate them.
The full exception looks like this, what else do you need?
---------------------------------------------------------------------------
TransportError Traceback (most recent call last)
<ipython-input-3-a3fd964bf999> in <module>
41 blah=blah,
42 blah=blah,
---> 43 start_date=start_date)
44
45 df = df.drop(['#blah', '#blah'], axis=1)
~/Documents/Repositories/blah/blah/downloader.py in get_data(self, data_type, blah, blah, device, blah, start_date, end_date)
186 return self.gatorEs.get_blah(es_requests=self.es_requests, blah=blah, blah=blah, blah=blah,
--> 188 start_date=start_date, end_date=end_date)
189
190 if data_type == 'blah':
~/Documents/Repositories/gator_access/gator_access/es_gator_api.py in get_blah(self, es_requests, blah, blah, blah, blah, blah, start_date, end_date)
180 logger=self.logger)
181 if not data: continue
--> 182 es_data += data
183
184 self.logger.info(" ".join(["Download took {t} seconds for p:'{p}' d:'{d}'",
~/Documents/Repositories/gator_core/gator_core/elasticsearch/es_requests.py in scrolled_search_query(self, index, query, size, scroll, request_timeout, docs_only)
144
145 data = self.es_conn.scroll(scroll_id=data['_scroll_id'], scroll=scroll,
--> 146 request_timeout=request_timeout)
147
148
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/client/utils.py in _wrapped(*args, **kwargs)
82 if p in kwargs:
83 params[p] = kwargs.pop(p)
---> 84 return func(*args, params=params, **kwargs)
85
86 return _wrapped
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/client/__init__.py in scroll(self, scroll_id, body, params)
1353 """
1354 return self.transport.perform_request(
-> 1355 "GET", _make_path("_search", "scroll", scroll_id), params=params, body=body
1356 )
1357
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/transport.py in perform_request(self, method, url, headers, params, body)
351 headers=headers,
352 ignore=ignore,
--> 353 timeout=timeout,
354 )
355
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/connection/http_requests.py in perform_request(self, method, url, params, body, timeout, ignore, headers)
139 raw_data,
140 )
--> 141 self._raise_error(response.status_code, raw_data)
142
143 self.log_request_success(
~/anaconda3/envs/prognostic/lib/python3.6/site-packages/elasticsearch/connection/base.py in _raise_error(self, status_code, raw_data)
160
161 raise HTTP_EXCEPTIONS.get(status_code, TransportError)(
--> 162 status_code, error_message, additional_info
163 )
TransportError: TransportError(413, '')

Transformations with desired values

I'm trying a process with powercenter designer but I do not get the desired objective.
I have these initial data:
CODE CODE2 OPTION
001 A 89
001 A 55
001 A 12
002 B 25
002 A 59
025 A 44
I have to get it for code to do the following: if there are several records per CODE then you have to put the value 1111 in the OPTION2 field to the record with the highest value in the OPTION field, if there is only one record in CODE it also puts the value 1111. I do this by making an SORTER transformation in powercenter, not complicated.
What I can not do is the next step. The second record with the highest value in the OPTION field corresponds to the value of the first field and so on.
OUTPUT:
CODE CODE2 OPTION OPTION2
001 A 89 111111
001 A 55 89
001 A 12 55
002 A 59 111111
002 B 25 59
025 A 44 111111
How could I get this?
What transformations should I use?
Thanks! ^^
You can sort it by code and descending order of option. Then in an expression variable hold the value for the previous record's value in a variable.
v_OPTION2 = IIF(ISNULL(v_PREV_CODE) OR CODE != v_PREV_CODE,
111111,
v_PREV_OPTION
)
out_OPTION2 = v_OPTION2
v_PREV_OPTION = OPTION
v_PREV_CODE = CODE

Resources