Filter tweets in tweepy.StreamListener on_data method - filter

Understand from many articles on stack overflow that the filter method in the tweepy.streaming.stream class uses a logical OR for track and location arguements
so the below will return either tweets from location=USA or with a word ""
streamObj = tweepy.streaming.Stream(oauthObject
,EchoStreamListener(api=apiInstance,
dump_json=args.json,
numtweets=args.numtweets))
keyWordList = ['panthers','falcon']
GEOBOX_USA = [-125,25.1,-60.5,49.1]
streamObj.filter(locations=GEOBOX_USA, track=keyWordList, languages=['en'])
This solution (How to add a location filter to tweepy module
) to check keywords in the on_status method works great, but if i needed to store the entire json variable i think i would have to use the on_data
so changed the on_data (as shown in code below), but get an error:
File "/Library/Python/2.7/site-packages/tweepy/streaming.py", line 294, in _run
raise exception
KeyError: 'text'
-- coding: utf-8 --
from types import *
import tweepy
import json
import argparse
import io
class EchoStreamListener(tweepy.StreamListener):
def __init__(self, api, dump_json=False, numtweets=0):
self.api = api
self.dump_json = dump_json
self.count = 0
self.limit = int(numtweets)
super(tweepy.StreamListener, self).__init__()
# def on_status(self, status):
# if any(keyWord in status.text.lower() for keyWord in keyWordList):
# print status.text
#
# self.count+=1
# return False if self.count == self.limit else True
# else:
# return True # Don't kill the stream
def on_data(self, tweet):
tweet_data = json.loads(tweet) # This allows the JSON data be used as a normal dictionary:
if any(keyWord in tweet_data['text'] for keyWord in keyWordList):
if self.dump_json:
print json.dumps(tweet_data)
saveFile.write(unicode(tweet) + "\n")
self.count+=1
return False if self.count == self.limit else True
else:
print tweet_data['created_at','name','text'].encode("utf-8").rstrip()
def on_error(self, status_code):
print >> sys.stderr, 'Encountered error with status code:', status_code
return True
def get_parser():
parser = argparse.ArgumentParser(add_help=True)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument(
'-j', '--json',
action='store_true',
help='dump each tweet as a json string'
)
group.add_argument(
'-t', '--text',
dest='json',
action='store_false',
help='dump each tweet\'s text'
)
parser.add_argument(
'-n', '--numtweets',
metavar='numtweets',
help='set number of tweets to retrieve'
)
return parser
if __name__ == '__main__':
oauthObject = tweepy.OAuthHandler(myconsumer_key, myconsumer_secret)
oauthObject.set_access_token(myaccess_key,myaccess_secret)
apiInstance = tweepy.API(oauthObject)
parser = get_parser()
args = parser.parse_args()
streamObj = tweepy.streaming.Stream(oauthObject
,EchoStreamListener(api=apiInstance,
dump_json=args.json,
numtweets=args.numtweets))
keyWordList = ['panthers','falcon']
GEOBOX_USA = [-125,25.1,-60.5,49.1]
saveFile = io.open('/Users/deepaktanna/raw_tweets.json', 'w', encoding='utf-8')
streamObj.filter(locations=GEOBOX_USA, languages=['en'])
saveFile.close()

Related

Urwid and Multiprocessing

i try to sequence some actions in urwid
I made a timer which run in background and communicate with the mainprocess
like this:
from multiprocessing import Process, Pipe
import time
import urwid
def show_or_exit(key):
if key in ('q', 'Q'):
raise urwid.ExitMainLoop()
class midiloop(urwid.Frame):
def __init__(self):
self.message = urwid.Text('Press Space', align='center')
self.filler = urwid.Filler(self.message, "middle")
super().__init__(urwid.Frame(self.filler))
def keypress(self, size, key):
if key == " ":
self.seq()
else:
return key
def timer(self,conn):
x = 0
while True:
if (conn.poll() == False):
pass
else:
z = conn.recv()
if (z == "kill"):
return()
conn.send(x)
x+=1
time.sleep(0.05)
def seq(self):
self.parent_conn, self.child_conn = Pipe()
self.p = Process(target=self.timer, args=(self.child_conn,))
self.p.start()
while True:
if (self.parent_conn.poll(None)):
self.y = self.parent_conn.recv()
self.message.set_text(str(self.y))
loop.draw_screen()
if ( self.y > 100 ):
self.parent_conn.send("kill")
self.message.set_text("Press Space")
return()
if __name__ == '__main__':
midiloop = midiloop()
loop = urwid.MainLoop(midiloop, unhandled_input=show_or_exit, handle_mouse=True)
loop.run()
The problem is i'm blocking urwid mainloop with while True:
So anyone can give me a solution to listen for key Q to quit the program before it reachs the end of the loop for example and more generally to interact with urwid and communicate with the subprocess
It seems to be rather complicated to combine multiprocessing and urwid.
Since you're using a timer and your class is called midiloop, I'm going to guess that maybe you want to implement a mini sequencer.
One possible way of implementing that is using an asyncio loop instead of urwid's MainLoop, and schedule events with the loop.call_later() function. I've implemented a simple drum machine with that approach in the past, using urwid for drawing the sequencer, asyncio for scheduling the play events and simpleaudio to play. You can see the code for that here: https://github.com/eliasdorneles/kickit
If you still want to implement communication with multiprocessing, I think your best bet is to use urwid.AsyncioEventLoop and the aiopipe helper for duplex communication.
It's not very minimal I'm afraid. However I did spend a day writing this Urwid frontend that starts, stops and communicates with a subprocess.
import os
import sys
from multiprocessing import Process, Pipe, Event
from collections import deque
import urwid
class suppress_stdout_stderr(object):
"""
Supresses the stdout and stderr by piping them to dev null...
The same place I send bad faith replies to my tweets
"""
def __enter__(self):
self.outnull_file = open(os.devnull, 'w')
self.errnull_file = open(os.devnull, 'w')
self.old_stdout_fileno_undup = sys.stdout.fileno()
self.old_stderr_fileno_undup = sys.stderr.fileno()
self.old_stdout_fileno = os.dup(sys.stdout.fileno())
self.old_stderr_fileno = os.dup(sys.stderr.fileno())
self.old_stdout = sys.stdout
self.old_stderr = sys.stderr
os.dup2(self.outnull_file.fileno(), self.old_stdout_fileno_undup)
os.dup2(self.errnull_file.fileno(), self.old_stderr_fileno_undup)
sys.stdout = self.outnull_file
sys.stderr = self.errnull_file
return self
def __exit__(self, *_):
sys.stdout = self.old_stdout
sys.stderr = self.old_stderr
os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup)
os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup)
os.close(self.old_stdout_fileno)
os.close(self.old_stderr_fileno)
self.outnull_file.close()
self.errnull_file.close()
def subprocess_main(transmit, stop_process):
with suppress_stdout_stderr():
import time
yup = ['yuuuup', 'yuuuuup', 'yeaup', 'yeoop']
nope = ['noooooooe', 'noooope', 'nope', 'nope']
mesg = 0
i = 0
while True:
i = i % len(yup)
if transmit.poll():
mesg = transmit.recv()
if mesg == 'Yup':
transmit.send(yup[i])
if mesg == 'Nope':
transmit.send(nope[i])
if stop_process.wait(0):
break
i += 1
time.sleep(2)
class SubProcess:
def __init__(self, main):
"""
Handles forking, stopping and communication with a subprocess
:param main: subprocess method to run method signature is
def main(transmit, stop_process):
transmit: is a multiprocess Pipe to send data to parent process
stop_process: is multiprocess Event to set when you want the process to exit
"""
self.main = main
self.recv, self.transmit = None, None
self.stop_process = None
self.proc = None
def fork(self):
"""
Forks and starts the subprocess
"""
self.recv, self.transmit = Pipe(duplex=True)
self.stop_process = Event()
self.proc = Process(target=self.main, args=(self.transmit, self.stop_process))
self.proc.start()
def write_pipe(self, item):
self.recv.send(item)
def read_pipe(self):
"""
Reads data sent by the process into a list and returns it
:return:
"""
item = []
if self.recv is not None:
try:
while self.recv.poll():
item += [self.recv.recv()]
except:
pass
return item
def stop(self):
"""
Sets the event to tell the process to exit.
note: this is co-operative multi-tasking, the process must respect the flag or this won't work!
"""
self.stop_process.set()
self.proc.join()
class UrwidFrontend:
def __init__(self, subprocess_main):
"""
Urwid frontend to control the subprocess and display it's output
"""
self.title = 'Urwid Frontend Demo'
self.choices = 'Start Subprocess|Quit'.split('|')
self.response = None
self.item = deque(maxlen=10)
self.event_loop = urwid.SelectEventLoop()
# start the heartbeat
self.event_loop.alarm(0, self.heartbeat)
self.main = urwid.Padding(self.main_menu(), left=2, right=2)
self.top = urwid.Overlay(self.main, urwid.SolidFill(u'\N{MEDIUM SHADE}'),
align='center', width=('relative', 60),
valign='middle', height=('relative', 60),
min_width=20, min_height=9)
self.loop = urwid.MainLoop(self.top, palette=[('reversed', 'standout', ''), ], event_loop=self.event_loop)
self.subprocess = SubProcess(subprocess_main)
def exit_program(self, button):
raise urwid.ExitMainLoop()
def main_menu(self):
body = [urwid.Text(self.title), urwid.Divider()]
for c in self.choices:
button = urwid.Button(c)
urwid.connect_signal(button, 'click', self.handle_button, c)
body.append(urwid.AttrMap(button, None, focus_map='reversed'))
return urwid.ListBox(urwid.SimpleFocusListWalker(body))
def subproc_menu(self):
self.response = urwid.Text('Waiting ...')
body = [self.response, urwid.Divider()]
choices = ['Yup', 'Nope', 'Stop Subprocess']
for c in choices:
button = urwid.Button(c)
urwid.connect_signal(button, 'click', self.handle_button, c)
body.append(urwid.AttrMap(button, None, focus_map='reversed'))
listbox = urwid.ListBox(urwid.SimpleFocusListWalker(body))
return listbox
def update_subproc_menu(self, text):
self.response.set_text(text)
def handle_button(self, button, choice):
if choice == 'Start Subprocess':
self.main.original_widget = self.subproc_menu()
self.subprocess.fork()
self.item = deque(maxlen=10)
if choice == 'Stop Subprocess':
self.subprocess.stop()
self.main.original_widget = self.main_menu()
if choice == 'Quit':
self.exit_program(button)
if choice == 'Yup':
self.subprocess.write_pipe('Yup')
if choice == 'Nope':
self.subprocess.write_pipe('Nope')
def heartbeat(self):
"""
heartbeat that runs 24 times per second
"""
# read from the process
self.item.append(self.subprocess.read_pipe())
# display it
if self.response is not None:
self.update_subproc_menu(['Subprocess started\n', f'{self.item}\n', ])
self.loop.draw_screen()
# set the next beat
self.event_loop.alarm(1 / 24, self.heartbeat)
def run(self):
self.loop.run()
if __name__ == "__main__":
app = UrwidFrontend(subprocess_main)
app.run()

How to pass flowfile attribute inside a python class in NiFi?

Goal: Is to add filename field value in CSV using executeScript in Python by getting the flowfile attribute.
Problem: How to pass the flowfile for me to get the attribute to be included the outputstream write
Below sample code is not working in getting attribute filename.
class PyStreamCallback(StreamCallback):
def __init__(self,flowFile):
self.ff = flowFile
pass
def process(self, inputStream, outputStream):
text = IOUtils.toString(inputStream, StandardCharsets.UTF_8)
list_index = 0
textArr = []
newText=''
for t in text.splitlines():
list_index += 1
t= t + '|' + str(list_index) + '|"' + t + '"|' + self.ff.getAttribute('filename')
textArr.append(t)
newText = '\n'.join(textArr)
outputStream.write(bytearray(newText.encode('utf-8')))
flowFile = session.get()
if (flowFile != None):
flowFile = session.write(flowFile,PyStreamCallback())
session.transfer(flowFile, REL_SUCCESS)
Declare a global variable to hold the file name attribute value. Sample code snippet,
from org.apache.commons.io import IOUtils
from java.nio.charset import StandardCharsets
from org.apache.nifi.processor.io import StreamCallback
from org.apache.nifi.processors.script import ExecuteScript
from org.python.core.util.FileUtil import wrap
from io import StringIO
global file_name
# Define a subclass of StreamCallback for use in session.write()
class PyStreamCallback(StreamCallback):
def __init__(self):
pass
def process(self, inputStream, outputStream):
with wrap(inputStream) as f:
lines = f.readlines()
for line in lines:
line = line.strip() + '|' + file_name + '\n'
with wrap(outputStream, 'w') as filehandle:
filehandle.writelines("%s" % line for line in lines)
# end class
flowFile = session.get()
if (flowFile != None):
try
file_name = flowFile.getAttribute('filename')
flowFile = session.write(flowFile, PyStreamCallback())
session.transfer(flowFile, ExecuteScript.REL_SUCCESS)
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
excp = str(exc_type) + str(fname)+ str(exc_tb.tb_lineno)
attrMap = {'exception': str(excp)}
flowFile = session.putAllAttributes(flowFile , attrMap)
session.transfer(flowFile , ExecuteScript.REL_FAILURE)

Why does Google AutoML Sample Python Code Not Run?

The sample Google AutoML prediction python code causes an error on execution. Recommended execution is "python predict.py YOUR_LOCAL_IMAGE_FILE YOUR_PROJECT_ID YOUR_MODEL_ID" Error is:
File "predict.py", line 25
print get_prediction(content, project_id, model_id)
^
SyntaxError: invalid syntax
(Thanks in advance)
Google sample code
import sys
from google.cloud import automl_v1beta1
from google.cloud.automl_v1beta1.proto import service_pb2
def get_prediction(content, project_id, model_id):
prediction_client = automl_v1beta1.PredictionServiceClient()
name = 'projects/{}/locations/us-central1/models/{}'.format(project_id, model_id)
payload = {'image': {'image_bytes': content }}
params = {}
request = prediction_client.predict(name, payload, params)
return request # waits till request is returned
if __name__ == '__main__':
file_path = sys.argv[1]
project_id = sys.argv[2]
model_id = sys.argv[3]
with open(file_path, 'rb') as ff:
content = ff.read()
print get_prediction(content, project_id, model_id)
last line of the code print... should not be in line indent.
import sys
from google.cloud import automl_v1beta1
from google.cloud.automl_v1beta1.proto import service_pb2
def get_prediction(content, project_id, model_id):
prediction_client = automl_v1beta1.PredictionServiceClient()
name = 'projects/{}/locations/us-central1/models/{}'.format(project_id, model_id)
payload = {'image': {'image_bytes': content }}
params = {}
request = prediction_client.predict(name, payload, params)
return request # waits till request is returned
if __name__ == '__main__':
file_path = sys.argv[1]
project_id = sys.argv[2]
model_id = sys.argv[3]
with open(file_path, 'rb') as ff:
content = ff.read()
print get_prediction(content, project_id, model_id)
print (get_prediction(content, project_id, model_id))
from google.cloud import automl_v1beta1
from google.cloud.automl_v1beta1.proto import service_pb2
# 'content' is base-64-encoded image data.
def get_prediction(content, project_id, model_id):
prediction_client = automl_v1beta1.PredictionServiceClient()
name = 'projects/{}/locations/us-central1/models/{}'.format(project_id, model_id)
payload = {'image': {'image_bytes': content }}
params = {}
request = prediction_client.predict(name, payload, params)
return request # waits till request is returned
if __name__ == '__main__':
file_path = sys.argv[1]
project_id = sys.argv[2]
model_id = sys.argv[3]
with open(file_path, 'rb') as ff:
content = ff.read()
print (get_prediction(content, project_id, model_id))

Unable to fetch value from JSR223 Sampler

I have a JSR223 Sampler in Jmeter with the following code:
import com.jayway.jsonpath.JsonPath
import org.apache.commons.lang3.RandomUtils
import org.apache.jmeter.samplers.SampleResult
def options = JsonPath.read(prev.getResponseDataAsString(), '$.options')
if(options.size() == "1" || options.size() == "2") {
def randomOption = options.get(0)
def code = randomOption.get("code")
vars.put('code1', code)
def values = randomOption.get('values')
def randomValue = values.get(RandomUtils.nextInt(0, values.size()))
def value = randomValue.get('value')
vars.put('valueF', value)
def options2 = JsonPath.read(prev.getResponseDataAsString(), '$.options')
def randomOption2 = options2.get(1)
def code2 = randomOption2.get("code")
vars.put('code2', code2)
def values2 = randomOption2.get('values')
def randomValue2 = values2.get(RandomUtils.nextInt(0, values.size()))
def value2 = randomValue2.get('value')
vars.put('valueF2', value2)
}
else {
vars.put('no loop','Not enterd into loop')
}
vars.put('counts',new
groovy.json.JsonSlurper().parse(prev.getResponseData()).options.size() as
String)
def size = com.jayway.jsonpath.JsonPath.read(prev.getResponseDataAsString(),
'$.options_available')
if (size == []) {
vars.put('size', 'NonConfigurable')
}
else {
vars.put('size', 'Configurable')
}
I am unable to get the value of code1 and valueF , code2 and valueF2 outside of the Sampler. Any possible help is appreciated!
Try amending this line:
def value = randomValue.get('value')
to
def value = randomValue.get('value') as String
Similarly for the "code:
def code = randomOption.get("code") as String
Get used to look into jmeter.log file when you face an issue with JMeter, in the majority of cases it should contain enough troubleshooting information.
If you need further assistance on the topic update it with the full response you're trying to parse. In the meantime:
Groovy: Parsing and Producing JSON
JayWay JSonPath
Groovy is the New Black
JSONArray size should be used by using length(), change in your code
if(options.length() == 1 || options.length() == 2) {

Python using multiple decorators on a method and logging the method name inside each decorator

Assume I have two decorator functions in a file log.py
def timeit(logger, level = 'DEBUG'):
def timeit_decorator(method):
def timeit_wrapper(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
logger.log(logging.getLevelName(level), '%2.4f sec' % (te - ts), extra = dict(filename = method.__code__.co_filename, funcName = method.__code__.co_name))
return result
return timeit_wrapper
return timeit_decorator
And I have a file test.py having one function which uses both the decorators like this,
#timeit(logger = LOGGER)
#logargs(logger = LOGGER)
def test(arg1 = 'something'):
pass
When I run test.py one of the decorator prints module, func & lineno as [test.py:7 - test() ]
and other one prints like [log.py:6 - timeit_wrapper()]
How do I make both the decorator to print the actual method, module & lineno which is [test.py:7 - test() ]

Resources