This is my streaming code
session = get_session(SparkConf())
lookup = '/Users/vahagn/stream'
userSchema = StructType().add("auction_id", "string").add("dma", "string")
auctions = session.readStream.schema(userSchema).json("/Users/vahagn/stream/")
inputDF = auctions.groupBy("auction_id").count()
print inputDF.isStreaming
inputDF.printSchema()
inputDF.writeStream.outputMode("update").format("console").start().awaitTermination()
After reading first file I'm getting error, which doesn't explain anything.
Any ideas ?
Traceback (most recent call last):
File "/Users/vahagn/hydra/spark/structured_streaming.py", line 257, in <module>
inputDF.writeStream.outputMode("update").format("console").start().awaitTermination()
File "/Users/vahagn/Downloads/spark-2.3.0-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/sql/streaming.py", line 106, in awaitTermination
File "/Users/vahagn/Downloads/spark-2.3.0-bin-hadoop2.7/python/lib/py4j-0.10.6-src.zip/py4j/java_gateway.py", line 1160, in __call__
File "/Users/vahagn/Downloads/spark-2.3.0-bin-hadoop2.7/python/lib/pyspark.zip/pyspark/sql/utils.py", line 75, in deco
pyspark.sql.utils.StreamingQueryException: u'null\n=== Streaming Query ===\nIdentifier: [id = 2f4b442a-38f9-41f1-a3d4-52e0a48427c0, runId = b843f25f-4132-4d52-ae64-f3be5e85a3d9]\nCurrent Committed Offsets: {}\nCurrent Available Offsets: {FileStreamSource[file:/Users/vahagn/stream]: {"logOffset":0}}\n\nCurrent State: ACTIVE\nThread State: RUNNABLE\n\nLogical Plan:\nAggregate [auction_id#0], [auction_id#0, count(1) AS count#7L]\n+- StreamingExecutionRelation FileStreamSource[file:/Users/vahagn/stream], [auction_id#0, dma#1]\n'
I've solved problem by downgrading java9 to java8.
Related
I was trying to compute the sentiment using Harvard IV-4dictionary.
I installed the "pysentiment" successfully.
I run the following:
import pysentiment as ps
hiv4 = ps.HIV4()
tokens = hiv4.tokenize(text)
score = hiv4.get_score(tokens)
and I got the following error:
Traceback (most recent call last):
File "C:/Users/df/Desk Top/Finalazed/punctuation.py", line 274, in <module>
hiv4 = ps.HIV4()
File "C:\Users\df\AppData\Local\Programs\Python\Python37\lib\site-packages\pysentiment\base.py", line 55, in __init__
self._tokenizer = Tokenizer()
File "C:\Users\df\AppData\Local\Programs\Python\Python37\lib\site-packages\pysentiment\utils.py", line 36, in __init__
self._stopset = self.get_stopset()
File "C:\Users\df\AppData\Local\Programs\Python\Python37\lib\site-packages\pysentiment\utils.py", line 52, in get_stopset
fin = open('%s/%s'%(STATIC_PATH, f), 'rb')
FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\df\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\pysentiment\\static/Currencies.txt'
Could any body tell why I am getting this? Thanks.
Do copy pysentiment folder in the given path. Actually pysentiment folder doesnt contain static sub folder. You can check it by diplaying hidden folder "local".
I am getting the below error while restoring the couchbase database to my local mac from a server.
Traceback (most recent call last):
File "/Applications/Couchbase Server.app/Contents/Resources/couchbase-core/lib/python/cbrestore", line
12, in <module>
pump_transfer.exit_handler(pump_transfer.Restore().main(sys.argv))
File "/Applications/Couchbase Server.app/Contents/Resources/couchbase-core/lib/python/pump_transfer.py", line 80, in main
rv = pumpStation.run()
File "/Applications/Couchbase Server.app/Contents/Resources/couchbase-core/lib/python/pump.py", line 136, in run
rv = self.transfer_bucket_msgs(source_bucket, source_map, sink_map)
File "/Applications/Couchbase Server.app/Contents/Resources/couchbase-core/lib/python/pump.py", line 233, in transfer_bucket_msgs
source_map)
File "/Applications/Couchbase Server.app/Contents/Resources/couchbase-core/lib/python/pump_bfd.py", line 546, in total_msgs
rv, db, ver = connect_db(x, opts, CBB_VERSION)
ValueError: need more than 2 values to unpack
Any help is appreciated.
Thanks,
Emraan
When I try to get values for a list of keys using asyncio_redis or aioredis, I am getting the following error. I know it is about something python socket, but unable to resolve the error. I attached both the code and error log with this issue. Here keys are a list of large byte arrays. get_params_redis is called by multiple processes. Any help would be appreciated, thanks!
async def multi_get_key_redis(keys):
redis = await aioredis.create_redis_pool(
'redis://localhost')
result =[]
for key in keys:
result.append(await redis.get(key))
# assert result == await asyncio.gather(*keys)
# return result
redis.close()
await redis.wait_closed()
print(result)
return result
def get_params_redis(shapes):
i = -1
params=[]
keys = []
for s in range(len(shapes)):
keys.append(s)
values = asyncio.get_event_loop().run_until_complete(multi_get_key_redis(keys))
for shape in shapes:
i = i + 1
param_np = pc._loads(values[i]).reshape(shape)
param_tensor = torch.nn.Parameter(torch.from_numpy(param_np))
params.append(param_tensor)
return params
Error Log:
Process Process-1:
Traceback (most recent call last):
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/Users/srujithpoondla/largescaleml_project/train_redis.py", line 33, in train_redis
train_redis_epoch(epoch, args, model, train_loader, optimizer,shapes_len, loop)
File "/Users/srujithpoondla/largescaleml_project/train_redis.py", line 43, in train_redis_epoch
params = get_params_redis(shapes_len,loop)
File "/Users/srujithpoondla/largescaleml_project/common_functions.py", line 76, in get_params_redis
params = loop.run_until_complete(multi_get_key_redis(keys))
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 454, in run_until_complete
self.run_forever()
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 421, in run_forever
self._run_once()
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 1395, in _run_once
event_list = self._selector.select(timeout)
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/selectors.py", line 577, in select
kev_list = self._kqueue.control(None, max_ev, timeout)
OSError: [Errno 9] Bad file descriptor
I seem to have some problems let python read key event, I wrote this piece of code
for recording while i have space down and stop when i've release it..
import pyaudio
import wave
import keyboard
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
while keyboard.is_pressed('space'):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
But when running this code, I get this error message.
python sound_record.py
* recording
Traceback (most recent call last):
File "sound_record.py", line 24, in <module>
while keyboard.is_pressed('space'):
File "/usr/local/lib/python2.7/site-packages/keyboard/__init__.py", line 162, in is_pressed
_listener.start_if_necessary()
File "/usr/local/lib/python2.7/site-packages/keyboard/_generic.py", line 36, in start_if_necessary
self.init()
File "/usr/local/lib/python2.7/site-packages/keyboard/__init__.py", line 112, in init
_os_keyboard.init()
File "/usr/local/lib/python2.7/site-packages/keyboard/_nixkeyboard.py", line 110, in init
build_device()
File "/usr/local/lib/python2.7/site-packages/keyboard/_nixkeyboard.py", line 106, in build_device
ensure_root()
File "/usr/local/lib/python2.7/site-packages/keyboard/_nixcommon.py", line 163, in ensure_root
raise ImportError('You must be root to use this library on linux.')
ImportError: You must be root to use this library on linux.
And when i do it using sudo:
sudo !!
sudo python sound_record.py
Password:
* recording
Traceback (most recent call last):
File "sound_record.py", line 24, in <module>
while keyboard.is_pressed('space'):
File "/usr/local/lib/python2.7/site-packages/keyboard/__init__.py", line 162, in is_pressed
_listener.start_if_necessary()
File "/usr/local/lib/python2.7/site-packages/keyboard/_generic.py", line 36, in start_if_necessary
self.init()
File "/usr/local/lib/python2.7/site-packages/keyboard/__init__.py", line 112, in init
_os_keyboard.init()
File "/usr/local/lib/python2.7/site-packages/keyboard/_nixkeyboard.py", line 110, in init
build_device()
File "/usr/local/lib/python2.7/site-packages/keyboard/_nixkeyboard.py", line 107, in build_device
device = aggregate_devices('kbd')
File "/usr/local/lib/python2.7/site-packages/keyboard/_nixcommon.py", line 141, in aggregate_devices
uinput = make_uinput()
File "/usr/local/lib/python2.7/site-packages/keyboard/_nixcommon.py", line 27, in make_uinput
uinput = open("/dev/uinput", 'wb')
IOError: [Errno 1] Operation not permitted: '/dev/uinput'
So why am I getting this error message?
You appear to be using the Python package keyboard, whose description is:
Hook and simulate keyboard events on Windows and Linux
If you want to do work with keyboard events on MacOS, you'll need to find a package that does that.
I have the following code, reads oauth2 token form file, then try's to perform a doc's list query to find a specific spreadsheet that I want to copy, however no matter what I try the code either errors out or returns with an object containing no document data.
I am using gdata.docs.client.DocsClient which as far as I can tell is version 3 of the API
def CreateClient():
"""Create a Documents List Client."""
client = gdata.docs.client.DocsClient(source=config.APP_NAME)
client.http_client.debug = config.DEBUG
# Authenticate the user with CLientLogin, OAuth, or AuthSub.
if os.path.exists(config.CONFIG_FILE):
f = open(config.CONFIG_FILE)
tok = pickle.load(f)
f.close()
client.auth_token = tok.auth_token
return client
1st query attempt
def get_doc():
new_api_query = gdata.docs.client.DocsQuery(title='RichSheet', title_exact=True, show_collections=True)
d = client.GetResources(q = new_api_query)
this fails with the following stack trace
Traceback (most recent call last):
File "/Users/richard/PycharmProjects/reportone/make_my_report.py", line 83, in <module>
get_doc()
File "/Users/richard/PycharmProjects/reportone/make_my_report.py", line 57, in get_doc
d = client.GetResources(q = new_api_query)
File "/Users/richard/PycharmProjects/reportone/gdata/docs/client.py", line 151, in get_resources
**kwargs)
File "/Users/richard/PycharmProjects/reportone/gdata/client.py", line 640, in get_feed
**kwargs)
File "/Users/richard/PycharmProjects/reportone/gdata/docs/client.py", line 66, in request
return super(DocsClient, self).request(method=method, uri=uri, **kwargs)
File "/Users/richard/PycharmProjects/reportone/gdata/client.py", line 267, in request
uri=uri, auth_token=auth_token, http_request=http_request, **kwargs)
File "/Users/richard/PycharmProjects/reportone/atom/client.py", line 115, in request
self.auth_token.modify_request(http_request)
File "/Users/richard/PycharmProjects/reportone/gdata/gauth.py", line 1047, in modify_request
token_secret=self.token_secret, verifier=self.verifier)
File "/Users/richard/PycharmProjects/reportone/gdata/gauth.py", line 668, in generate_hmac_signature
next, token, verifier=verifier)
File "/Users/richard/PycharmProjects/reportone/gdata/gauth.py", line 629, in build_oauth_base_string
urllib.quote(params[key], safe='~')))
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 1266, in quote
if not s.rstrip(safe):
AttributeError: 'bool' object has no attribute 'rstrip'
Process finished with exit code 1
then my second attempt
def get_doc():
other = gdata.docs.service.DocumentQuery(text_query='RichSheet')
d = client.GetResources(q = other)
this returns an ResourceFeed object, but has no content. I have been through the source code for these function but thing are not any obvious.
Have i missed something ? or should i go back to version 2 of the api ?