python methods in comparison - methods

Rock, Paper, Scissors variable bot has default value
Variable Alex, has values that are passed to main.py
When I call method comparison I get an error
Method comparisons
from secrets import choice
from variants import Variants
Player.py
class Player:
name = '',
choice = ''
def __init__(self, choise = 'ROCK', name = 'bot'):
self.name = name
self.choice = choice
def whoWins(self, bot, alex):
if bot.choice > alex.choice:
print('bot, winner')
if bot.choice < alex.choice:
print('Alex, winner')
if bot.choice == alex.choice:
print('draw')
main.py
from variants import Variants
from player import Player
bot = Player()
alex = Player(Variants.ROCK, "Alex")
print(bot.whoWins(bot, alex))
variants.py
from enum import Enum
class Variants(Enum):
ROCK = 1,
PAPER = 2,
SCISSORS = 3

The problem with Variants is the trailing comma after ROCK and PAPER -- the comma turns the value into a tuple, so
ROCK.value == (1, )
PAPER.value == (2, )
SCISSORS.value == 3

Answer:
I had to modify the comparison of methods, I did it like this:
also main.po remained the same:
> from variants import Variants from player import Player
>
> bot = Player() alex = Player(Variants.ROCK, "Alex")
> print(bot.whoWins(bot, alex))
variants.py remained the same
from enum import Enum
class Variants(Enum):
ROCK = 1
PAPER = 2
SCISSORS = 3
Player.py
from secrets import choice
from variants import Variants
class Player:
name = '',
choice = ''
def __init__(self, choice = Variants.ROCK, name = 'bot'):
self.name = name
self.choice = choice
def whoWins(self, bot, alex):
if (bot.choice == Variants.ROCK and alex.choice == Variants.PAPER):
print('Alex, win!')
if (bot.choice == Variants.PAPER and alex.choice == Variants.ROCK):
print('Alex, win!')
if (bot.choice == Variants.SCISSORS and alex.choice == Variants.SCISSORS):
print('draw')
if (bot.choice == Variants.ROCK and alex.choice == Variants.ROCK):
print('draw!')
if (bot.choice == Variants.ROCK and alex.choice == Variants.SCISSORS):
print('Bot, win')
if (bot.choice == Variants.SCISSORS and alex.choice == Variants.PAPER):
print('Bot, win')
if (bot.choice == Variants.SCISSORS and alex.choice == Variants.ROCK):
print('Alex, win!')
if (bot.choice == Variants.PAPER and alex.choice == Variants.SCISSORS):
print('Alex, win!')
elif (bot.choice == Variants.SCISSORS and alex.choice == Variants.SCISSORS):
print('draw!')
if bot.choice > alex.choice: TypeError: '>' not supported between instances of 'method' and 'method' - The error is gone
All, is ok!
PS C:\Users\user\2> python.exe main.py
Alex, win!
PS PS C:\Users\user\2>

Related

applyInPandas() aggregation runs slowly on big delta table

I'm trying to create a gold table notebook in Databricks, however it would take 9 days to fully reprocess the historical data (43GB, 35k parquet files). I tried scaling up the cluster but it doesn't go above 5000 records/second. The bottleneck seems to be the applyInPandas() function. I'm wondering if I could replace pandas with anything else to make the gold notebook execute faster.
Silver table has 60 columns (read_id, reader_id, tracker_timestamp, event_type, ebook_id, page_id, agent_ip, agent_device_type, ...). Each row of data represents read event of an ebook. E.g 'page turn', 'click on image', 'click on link',... All of the events that have occurred in the single session have the same read.id. In the gold table I'm trying to group those events in sessions and calculate the number of times each event has occurred in the single session. So instead of 100+ rows of data for a read session in silver table I would end up just with a single aggregated row in gold table.
Input is the silver delta table:
import pyspark.sql.functions as F
import pyspark.sql.types as T
import pandas as pd
from pyspark.sql.functions import pandas_udf
input = (spark
.readStream
.format("delta")
.option("withEventTimeOrder", "true")
.option("maxFilesPerTrigger", 100)
.load(f"path_to_silver_bucket")
)
I use withWatermark and session_window functions to ensure I end up grouping all of the events from the single read session. (read session automatically ends 30 minutes after the last reader activity)
group = input.withWatermark("tracker_timestamp", "10 minutes").groupBy("read_id", F.session_window(input.tracker_timestamp, "30 minutes"))
In the next step I use the applyInPandas function like so:
sessions = group.applyInPandas(processing_function, schema=processing_function_output_schema)
Definition of the processing_function used in applyInPandas:
def processing_function(df):
surf_time_ms = df.query('event_type == "surf"')['duration'].sum()
immerse_time_ms = df.query('event_type == "immersion"')['duration'].sum()
min_timestamp = df['tracker_timestamp'].min()
max_timestamp = df['tracker_timestamp'].max()
shares = len(df.query('event_type == "share"'))
leads = len(df.query('event_type == "lead_store"'))
is_read = len(df.query('event_type == "surf"')) > 0
distinct_pages = df['page_id'].nunique()
data = {
"read_id": df['read_id'].values[0],
"surf_time_ms": surf_time_ms,
"immerse_time_ms": immerse_time_ms,
"min_timestamp": min_timestamp,
"max_timestamp": max_timestamp,
"shares": shares,
"leads": leads,
"is_read": is_read,
"number_of_events": len(df),
"distinct_pages": distinct_pages
}
for field in not_calculated_string_fields:
data[field] = df[field].values[0]
new_df = pd.DataFrame(data=data, index=['read_id'])
for x in all_events:
new_df[f"count_{x}"] = df.query(f"type == '{x}'").count()
for x in duration_events:
duration = df.query(f"event_type == '{x}'")['duration']
duration_sum = duration.sum()
new_df[f"duration_{x}_ms"] = duration_sum
if duration_sum > 0:
new_df[f"mean_duration_{x}_ms"] = duration.mean()
else:
new_df[f"mean_duration_{x}_ms"] = 0
return new_df
And finally, I'm writing the calculated row to the gold table like so:
for_partitioning = (sessions
.withColumn("tenant", F.col("story_tenant"))
.withColumn("year", F.year(F.col("min_timestamp")))
.withColumn("month", F.month(F.col("min_timestamp"))))
checkpoint_path = "checkpoint-path"
gold_path = f"gold-bucket"
(for_partitioning
.writeStream
.format('delta')
.partitionBy('year', 'month', 'tenant')
.option("mergeSchema", "true")
.option("checkpointLocation", checkpoint_path)
.outputMode("append")
.start(gold_path))
Can anybody think of a more efficient way to do a UDF in PySpark than applyInPandas for the above example? I simply cannot afford to wait 9 days to reprocess 43GB of data...
I've tried playing around with different input and output options (e.g. .option("maxFilesPerTrigger", 100)) but the real problem seems to be applyInPandas.
You could rewrite your processing_function into native Spark if you really wanted.
"read_id": df['read_id'].values[0]
F.first('read_id').alias('read_id')
"surf_time_ms": df.query('event_type == "surf"')['duration'].sum()
F.sum(F.when(F.col('event_type') == 'surf', F.col('duration'))).alias('surf_time_ms')
"immerse_time_ms": df.query('event_type == "immersion"')['duration'].sum()
F.sum(F.when(F.col('event_type') == 'immersion', F.col('duration'))).alias('immerse_time_ms')
"min_timestamp": df['tracker_timestamp'].min()
F.min('tracker_timestamp').alias('min_timestamp')
"max_timestamp": df['tracker_timestamp'].max()
F.max('tracker_timestamp').alias('max_timestamp')
"shares": len(df.query('event_type == "share"'))
F.count(F.when(F.col('event_type') == 'share', F.lit(1))).alias('shares')
"leads": len(df.query('event_type == "lead_store"'))
F.count(F.when(F.col('event_type') == 'lead_store', F.lit(1))).alias('leads')
"is_read": len(df.query('event_type == "surf"')) > 0
(F.count(F.when(F.col('event_type') == 'surf', F.lit(1))) > 0).alias('is_read')
"number_of_events": len(df)
F.count(F.lit(1)).alias('number_of_events')
"distinct_pages": df['page_id'].nunique()
F.countDistinct('page_id').alias('distinct_pages')
for field in not_calculated_string_fields:
data[field] = df[field].values[0]
*[F.first(field).alias(field) for field in not_calculated_string_fields]
for x in all_events:
new_df[f"count_{x}"] = df.query(f"type == '{x}'").count()
The above can probably be skipped? As far as my tests go, new columns get NaN values, because .count() returns a Series object instead of one simple value.
for x in duration_events:
duration = df.query(f"event_type == '{x}'")['duration']
duration_sum = duration.sum()
new_df[f"duration_{x}_ms"] = duration_sum
if duration_sum > 0:
new_df[f"mean_duration_{x}_ms"] = duration.mean()
else:
new_df[f"mean_duration_{x}_ms"] = 0
*[F.sum(F.when(F.col('event_type') == x, F.col('duration'))).alias(f"duration_{x}_ms") for x in duration_events]
*[F.mean(F.when(F.col('event_type') == x, F.col('duration'))).alias(f"mean_duration_{x}_ms") for x in duration_events]
So, instead of
def processing_function(df):
...
...
sessions = group.applyInPandas(processing_function, schema=processing_function_output_schema)
you could use efficient native Spark:
sessions = group.agg(
F.first('read_id').alias('read_id'),
F.sum(F.when(F.col('event_type') == 'surf', F.col('duration'))).alias('surf_time_ms'),
F.sum(F.when(F.col('event_type') == 'immersion', F.col('duration'))).alias('immerse_time_ms'),
F.min('tracker_timestamp').alias('min_timestamp'),
F.max('tracker_timestamp').alias('max_timestamp'),
F.count(F.when(F.col('event_type') == 'share', F.lit(1))).alias('shares'),
F.count(F.when(F.col('event_type') == 'lead_store', F.lit(1))).alias('leads'),
(F.count(F.when(F.col('event_type') == 'surf', F.lit(1))) > 0).alias('is_read'),
F.count(F.lit(1)).alias('number_of_events'),
F.countDistinct('page_id').alias('distinct_pages'),
*[F.first(field).alias(field) for field in not_calculated_string_fields],
# skipped count_{x}
*[F.sum(F.when(F.col('event_type') == x, F.col('duration'))).alias(f"duration_{x}_ms") for x in duration_events],
*[F.mean(F.when(F.col('event_type') == x, F.col('duration'))).alias(f"mean_duration_{x}_ms") for x in duration_events],
)

How do I make this flow from def get_burger_choice to def get_burger_price

So I am trying to make this continue from get_burger_choice to get_burger price to where it would flow and finish the program. I am trying to write this program to where I could get the type of burger then the price from get_burger_price. Heres the problem, I do not know how to intergrate it towards doing that. I am completely stuck after the part where it says "no" at the end part of get_burger_choice to where I can transition to get_burger_price
Here is the code that I used : the part highlighted is where Im having trouble transitioning:I would like it to where if the user says no, it would ask if they would like to see the price, and it would transition towards get_burger_price
def get_burger_choice():
SIZE = 3
burgers = ["Cheesy", "Regular", "Veggie" ]
search = True
while search == True:
index = 0
found = False
print("What type of burger would you like?")
searchValue = input()
while found == False and index <= SIZE - 1:
if burgers[index].lower()==searchValue.lower():
found = True
else:
index = index + 1
if found:
print("That is on our menu")
else:
print("That is not in our menu")
print("Do you want see another burger? yes/no: ")
tryagain = input()
if tryagain.lower() =='no':
print("Would you like to see the price? yes or no: ")
if
def price_burger(burger_choice, burger_price):
if burger_price == 'cheesy':
burger_price = 3.00
elif burger_choice == 'regular':
burger_price = 2.00
elif burger_choice == 'veggie':
burger_price = 2.00
else:
print("we do not have that, sorry")
return burger_price
def total_price(burger_price, burger_choice=None):
print("The burger cost $", burger_price)
def closing(burger_choice):
if burger_choice == 'cheesy':
print("Nice selection, This is our best!")
else:
print("enjoy!")
def main(burger_price = 0):
choice = "yes"
while choice != "no":
burger_choice = get_burger_choice()
burger_price = price_burger(burger_choice,burger_price)
total_price(burger_price)
closing(burger_choice)
choice = input("\nWould you like to try again? yes or no: ")
main()

How to do smart sort in groovy? [duplicate]

I have a list of version numbers like,
Versions = [0.0.10, 0.0.11, 0.0.13, 0.0.14, 0.0.15, 0.0.16, 0.0.17, 0.0.18, 0.0.19, 0.0.20, 0.0.21, 0.0.22, 0.0.23, 0.0.24, 0.0.25, 0.0.26, 0.0.27, 0.0.28, 0.0.29, 0.0.3, 0.0.30, 0.0.33, 0.0.34, 0.0.35, 0.0.36, 0.0.37, 0.0.38, 0.0.39, 0.0.4, 0.0.41, 0.0.42, 0.0.43, 0.0.44, 0.0.45, 0.0.46, 0.0.47, 0.0.48, 0.0.49, 0.0.5, 0.0.5-delivery.5, 0.0.50, 0.0.51, 0.0.52, 0.0.53, 0.0.54, 0.0.55, 0.0.56, 0.0.57, 0.0.58, 0.0.59, 0.0.6, 0.0.60, 0.0.61, 0.0.62, 0.0.63, 0.0.64, 0.0.7, 0.0.8, 0.0.9]'
And i need to get the last version (0.0.64), Versions.sort() && Collections.max(Versions) doesn't work for me.
So I developed this function blow
def mostRecentVersion(def versions) {
def lastversion = "0.0.0"
for (def items : versions) {
def version = items.tokenize('-')[0]
def ver = version.tokenize('.')
def lastver = lastversion.tokenize('.')
if (lastver[0].toInteger() < ver[0].toInteger() ){
lastversion = version
}else if(lastver[0].toInteger() == ver[0].toInteger()) {
if (lastver[1].toInteger() < ver[1].toInteger() ){
lastversion = version
}else if(lastver[1].toInteger() == ver[1].toInteger()){
if (lastver[2].toInteger() < ver[2].toInteger() ){
lastversion = version
}
}
}
}
return lastversion }
i'm asking if there is something better,
Thank you for help :)
the idea:
build map with sortable key and original version value, then sort map by keys, then get only values
to create sortable key for each value
split version to digits & not-digit strings array
prepend to each part 0 to have minimum length 3 (assume each number not longer then 3 digits)
join array to string
so, for 0.11.222-dev ->
1. [ '0', '.', '11', '222', '-dev' ]
2. [ '000', '00.', '011', '222', '-dev' ]
3. '00000.011222-dev'
the code
def mostRecentVersion(versions){
return versions.collectEntries{
[(it=~/\d+|\D+/).findAll().collect{it.padLeft(3,'0')}.join(),it]
}.sort().values()[-1]
}
//test cases:
def fullVersions = ['0.0.10', '0.0.11', '0.0.13', '0.0.14', '0.0.15', '0.0.16',
'0.0.17', '0.0.18', '0.0.19', '0.0.20', '0.0.21', '0.0.22', '0.0.23', '0.0.24',
'0.0.25', '0.0.26', '0.0.27', '0.0.28', '0.0.29', '0.0.3', '0.0.30', '0.0.33',
'0.0.34', '0.0.35', '0.0.36', '0.0.37', '0.0.38', '0.0.39', '0.0.4', '0.0.41',
'0.0.42', '0.0.43', '0.0.44', '0.0.45', '0.0.46', '0.0.47', '0.0.48', '0.0.49',
'0.0.5', '0.0.5-delivery.5', '0.0.50', '0.0.51', '0.0.52', '0.0.53', '0.0.54',
'0.0.55', '0.0.56', '0.0.57', '0.0.58', '0.0.59', '0.0.6', '0.0.60', '0.0.61',
'0.0.62', '0.0.63', '0.0.64', '0.0.7', '0.0.8', '0.0.9']
assert mostRecentVersion(fullVersions) == '0.0.64'
assert mostRecentVersion(['0.0.5-delivery.5', '0.0.3', '0.0.5']) == '0.0.5-delivery.5'
assert mostRecentVersion(['0.0.5.5', '0.0.5-delivery.5', '0.0.5']) == '0.0.5.5'
I believe this will work... it also keeps the original version strings around, incase 0.5.5-devel.5 is the latest... It relies on the fact that Groovy will use a LinkedHashMap for the sorted map, so the order will be preserved :-)
def mostRecentVersion(def versions) {
versions.collectEntries {
[it, it.split(/\./).collect { (it =~ /([0-9]+).*/)[0][1] }*.toInteger()]
}.sort { a, b ->
[a.value, b.value].transpose().findResult { x, y -> x <=> y ?: null } ?:
a.value.size() <=> b.value.size() ?:
a.key <=> b.key
}.keySet()[-1]
}
def fullVersions = ['0.0.10', '0.0.11', '0.0.13', '0.0.14', '0.0.15', '0.0.16', '0.0.17', '0.0.18', '0.0.19', '0.0.20', '0.0.21', '0.0.22', '0.0.23', '0.0.24', '0.0.25', '0.0.26', '0.0.27', '0.0.28', '0.0.29', '0.0.3', '0.0.30', '0.0.33', '0.0.34', '0.0.35', '0.0.36', '0.0.37', '0.0.38', '0.0.39', '0.0.4', '0.0.41', '0.0.42', '0.0.43', '0.0.44', '0.0.45', '0.0.46', '0.0.47', '0.0.48', '0.0.49', '0.0.5', '0.0.5-delivery.5', '0.0.50', '0.0.51', '0.0.52', '0.0.53', '0.0.54', '0.0.55', '0.0.56', '0.0.57', '0.0.58', '0.0.59', '0.0.6', '0.0.60', '0.0.61', '0.0.62', '0.0.63', '0.0.64', '0.0.7', '0.0.8', '0.0.9']
assert mostRecentVersion(fullVersions) == '0.0.64'
assert mostRecentVersion(['0.0.5-delivery.5', '0.0.3', '0.0.5']) == '0.0.5-delivery.5'
assert mostRecentVersion(['0.0.5.5', '0.0.5-delivery.5', '0.0.5']) == '0.0.5.5'
Edit:
Made a change so that 0.5.5.5 > 0.5.5-devel.5

tk.Entry validate command doesn't restore previous value when False returned

I have carefully reviewed answers to Interactively validating Entry widget content in tkinter, but my script fails to restore previous value if the validate command returns False. I captured %P and %s and print them out...They both show the same value.
import tkinter as tk
class Controller :
def __init__(self) :
i=10
j=20
# list comprehension
self.entry_widgets = [[None for col in range(j)] for row in range(i)]
#print(self.entry_widgets)
self.values = [["string"+str(row) + str(col) for col in range(10)] for row in range(20)]
#print(self.values)
class EnterBox(tk.Entry):
def __init__(self,*args,**kwargs):
#print (args)
self._modified = False
self._save = 0
self._raise = 1
self._lower = 2
frame, i,j, *newargs = args
self._content = tk.StringVar()
# tk.Entry.__init__(self,frame,*newargs,
# validate = 'focusout',
# validatecommand = vcmd,
# **kwargs)
tk.Entry.__init__(self,frame,*newargs,**kwargs)
vcmd = (self.register(self._revert), '%P', '%s')
ct.entry_widgets[i][j] = self
self.config(textvariable=self._content)
self.config(validate = "focusout")
self.config(validatecommand = vcmd )
x=(ct.values[i][j])
self.insert(0,x)
#self._content.set(x)
self.bind("<Return>",lambda event, x=self._save : self._action(event,x) )
self.bind("<Button-2>",lambda event, x=self._save : self._action(event,x) )
self.bind("<FocusIn>", lambda event, x=self._raise : self._action(event,x))
self.bind("<FocusOut>", lambda event, x=self._lower : self._action(event,x))
self.bind('<Button-3>', lambda event, x=self._lower : self._action(event,x))
self.grid(column=i+1,row=j+2)
def _revert(self,P,s):
print ("Hi There")
print(P)
print(s)
return False
def _action(self,event,action):
print(str(action)+' ' + str(event))
if action == self._save :
ct.values[i][j] = self._content.get()
self.config(bg='lightskyblue2')
self._modified = True
elif action == self._raise :
self.config(bg = 'light pink')
elif action == self._lower :
self.config(bg = 'gray80')
self._modified = False
else :
print('action value is bad action =>' + str(action))
if "__main__" == __name__ :
root = tk.Tk()
frame = tk.Frame()
i=j=0
ct = Controller()
root.grid()
frame.grid()
check = EnterBox(frame,i,j,width = 24)
check2 = EnterBox(frame,i+1,j,width = 24)
root.mainloop()
I have tried removing all other bindings, to no avail.
Interestingly, but a separate issue, If I use StringVar. set instead of self.insert, (see commented out line) the validate command runs once, and never again despite several focus changes. Using Python 3.8
The validation isn't designed to restore anything if the validation happens on focusout. The validation can only prevent characters from being added at the time they are added. You will have to add code to restore the previous value.

Q: Resample bitstamp bars in pyalgotrade

I'm working with an algo on the bitstamp client that works better with 30-min bars, rather than seeing each trade as a bar.
Is there a "right" way to resample those bars into 30-min intervals on the fly?
I can do it no problem with the bitcoincharts broker, but I need the execution from the bitstampbroker, so I was hoping to do it with one.
This should help:
from pyalgotrade.bitstamp import barfeed
from pyalgotrade.bitstamp import broker
from pyalgotrade import strategy
class Strategy(strategy.BaseStrategy):
def __init__(self, feed, brk):
super(Strategy, self).__init__(feed, brk)
self._instrument = "BTC"
self._bid = None
self._ask = None
self._resampledBF = self.resampleBarFeed(60, self.onResampledBars)
# Subscribe to order book update events to get bid/ask prices to trade.
feed.getOrderBookUpdateEvent().subscribe(self._onOrderBookUpdate)
def _onOrderBookUpdate(self, orderBookUpdate):
bid = orderBookUpdate.getBidPrices()[0]
ask = orderBookUpdate.getAskPrices()[0]
if bid != self._bid or ask != self._ask:
self._bid = bid
self._ask = ask
self.info("Order book updated. Best bid: %s. Best ask: %s" % (self._bid, self._ask))
def onResampledBars(self, dt, bars):
bar = bars[self._instrument]
self.info("Resampled - Price: %s. Volume: %s." % (bar.getClose(), bar.getVolume()))
def onBars(self, bars):
bar = bars[self._instrument]
self.info("Price: %s. Volume: %s." % (bar.getClose(), bar.getVolume()))
def main():
barFeed = barfeed.LiveTradeFeed()
brk = broker.PaperTradingBroker(1000, barFeed)
strat = Strategy(barFeed, brk)
strat.run()
if __name__ == "__main__":
main()

Resources