I'm really stumped on an issue. On my website, whenever you pan the map, an AJAX call is fired, doing a query in the database. The problem is, the AJAX call takes somewhere between 2-10 seconds, which is unacceptable.
Link to website
There are about 500k records in my database. I notice that the more records I add, the slower it gets. That makes sense right, but why is it exceptionally slow and so inconsistent?
I am using Digital Ocean. When I check my control panel, the CPU/RAM/disk are all operating at very low levels.
The AJAX call code:
def filter_closed_listings(request):
zoom = int(request.GET.get('zoomLevel'))
minLat = request.GET.get('minLat')
maxLat = request.GET.get('maxLat')
minLng = request.GET.get('minLng')
maxLng = request.GET.get('maxLng')
sold = request.GET.get('sold')
property_type = request.GET.get('pType')
building_style = request.GET.get('bType')
showActiveListings = request.GET.get('showActiveListings')
showSoldListings = request.GET.get('showSoldListings')
showEndedListings = request.GET.get('showEndedListings')
bed = request.GET.get('bed')
bath = request.GET.get('bath')
low_price = request.GET.get('low')
high_price = request.GET.get('high')
includePandL = request.GET.get('includePandL')
transaction_type = request.GET.get('transactionType')
ended_time_difference = datetime.date.today() + datetime.timedelta(-int(sold)) # Date check
print(ended_time_difference)
# Initial filter with map bounds and date check
data = Listing.objects.filter(la__gt=minLat).filter(la__lt=maxLat).filter(lo__gt=minLng).filter(lo__lt=maxLng)
data = data.filter(transaction_type=transaction_type)
# 0 is 'Any'. Therefore a filter is done if selection is not 'Any'.
if not property_type == '0':
if property_type == '1':
data = data.filter(Q(property_type='Condo Apt') | Q(property_type='Comm Element Condo'))
elif property_type == '2':
data = data.filter(property_type='Detached')
elif property_type == '3':
data = data.filter(property_type='Semi-Detached')
elif property_type == '4':
data = data.filter(Q(property_type='Att/Row/Twnhouse') | Q(property_type='Condo Townhouse'))
else:
data = data.exclude(Q(property_type='Condo Apt') | Q(property_type='Comm Element Condo') | Q(property_type='Detached') | Q(property_type='Semi-Detached') | Q(property_type='Att/Row/Twnhouse') | Q(property_type='Condo Townhouse'))
if showActiveListings == 'n':
data = data.filter(Q(status='Ter') | Q(status='Exp') | Q(status='Sld') | Q(status='Lsd'))
if showSoldListings == 'n':
if transaction_type == 'Sale':
data = data.exclude(status='Sld')
else:
data = data.exclude(status='Lsd')
else:
if transaction_type == 'Sale':
data = data.exclude(Q(status='Sld') & Q(sold_date__lt=ended_time_difference))
else:
data = data.exclude(Q(status='Lsd') & Q(sold_date__lt=ended_time_difference))
if showEndedListings == 'n':
data = data.exclude(Q(status='Ter') | Q(status='Exp'))
else:
data = data.exclude(Q(status='Exp') & Q(expiry_date__lt=ended_time_difference)).exclude(Q(status='Ter') & Q(terminated_date__lt=ended_time_difference))
if includePandL == 'false':
data = data.exclude(Q(property_type='Parking Space') | Q(property_type='Locker'))
# Bedrooms check
if not bed == 'undefined':
bed = bed.split(',')
queries = [Q(bedrooms_ag=b) for b in bed]
query = queries.pop()
if bed.pop() == '6':
query = Q(bedrooms_ag__gte=5)
for i in queries:
query |= i
#print(query)
data = data.filter(query)
# Bathrooms check
if not bath == 'undefined':
bath = bath.split(',')
queries = [Q(washrooms=b) for b in bath]
query = queries.pop()
if bath.pop() == '6':
query = Q(washrooms__gte=5)
for i in queries:
query |= i
#print(query)
data = data.filter(query)
# Filters low price and high price
if high_price == '0' and low_price == '0':
pass
elif high_price =='0':
data = data.filter(Q(Q(list_price__gte=low_price) & Q(list_price__lte=999999999) & Q(sold_price__isnull=True)) | Q(Q(sold_price__gte=low_price) & Q(sold_price__lte=999999999) & Q(sold_price__isnull=False))) # Nested Q object. Use list price if sold price is null.
else:
data = data.filter(Q(Q(list_price__gte=low_price) & Q(list_price__lte=high_price) & Q(sold_price__isnull=True)) | Q(Q(sold_price__gte=low_price) & Q(sold_price__lte=high_price) & Q(sold_price__isnull=False)))
if data.count() > 500:
return JsonResponse({'over500': True})
data = data.values('id', 'la', 'lo')
# Determines lat/lng precision based on zoom level
for i in data:
if zoom >= 13:
i['lo'] = round(i['lo'], 4)
i['la'] = round(i['la'], 4)
elif zoom > 9 and zoom < 13:
i['lo'] = round(i['lo'], 2)
i['la'] = round(i['la'], 2)
else:
i['lo'] = round(i['lo'], 1)
i['la'] = round(i['la'], 1)
return JsonResponse({'results': list(data)}) # This uses list() which is bad design. But only 3 fields are passed and highly unlikely to change so this can stay like this.
Related
I am developing an application that allows to place orders with python and QtDesigner. I can't manage to place two commands in a row. The first command passes without any problem but when I want to place another command without closing the application, this error is displayed: "self.ui.treeWidgetcommand.topLevelItem(self.Line ).setText(0, str(Id))
AttributeError: 'NoneType' object has no attribute 'setText'".
def AddCommande(self):
QtWidgets.QTreeWidgetItem(self.ui.treeWidgetcommande)
Libelle = self.ui.comboBoxproduit.currentText()
Qte = int(self.ui.lineEditQteproduit.text())
Info = self.stock.GetProductName(Libelle)[0]
Id = str(int(Info[0]))
Pu = Info[1]
Total = int(Qte)*int(Pu)
data=(Libelle,Qte,Id,Pu,Total)
#print(data)
self.ui.treeWidgetcommande.topLevelItem(self.Ligne).setText(0, str(Id))
self.ui.treeWidgetcommande.topLevelItem(self.Ligne).setText(1, str(Libelle))
self.ui.treeWidgetcommande.topLevelItem(self.Ligne).setText(2, str(Qte))
self.ui.treeWidgetcommande.topLevelItem(self.Ligne).setText(3, str(Pu))
self.ui.treeWidgetcommande.topLevelItem(self.Ligne).setText(4, str(Total))
self.Ligne +=1
def ValiderCommande(self):
Client = self.ui.comboBoxclient.currentText()
IdClient = self.stock.GetClientIdByName(Client.split(" ")[0])
PrixTotal = 0
UniqueId = random.random()
Date = date.today()
Data = (IdClient,PrixTotal,Date,UniqueId)
if self.stock.AddCommande(Data) == 0:
for i in range(self.Ligne):
IdCommande = self.stock.GetClientIdByUniqueId(UniqueId)
Libelle = self.ui.treeWidgetcommande.topLevelItem(i).text(1)
IdProduit = self.ui.treeWidgetcommande.topLevelItem(i).text(0)
Pu = self.ui.treeWidgetcommande.topLevelItem(i).text(3)
Qte = self.ui.treeWidgetcommande.topLevelItem(i).text(2)
Total = int(self.ui.treeWidgetcommande.topLevelItem(i).text(4))
InfoData = (IdCommande, Libelle, Qte, Pu, Total)
data = (Qte,IdProduit)
if self.stock.AjoutInfoCommande(InfoData) == 0:
PrixTotal += Total
self.stock.UpdateQteStock(data)
if self.stock.UpdateCommande(PrixTotal,IdCommande) == 0:
self.ui.treeWidgetcommande.clear()
#self.ui.treeWidgetcommande.topLevelItem(self.Ligne).setHidden(True)
self.ui.lineEditQteproduit.setText(" ")
`
I would like after placing an order, reset my treeWidget array and be able to place other orders.
I'm trying to create a gold table notebook in Databricks, however it would take 9 days to fully reprocess the historical data (43GB, 35k parquet files). I tried scaling up the cluster but it doesn't go above 5000 records/second. The bottleneck seems to be the applyInPandas() function. I'm wondering if I could replace pandas with anything else to make the gold notebook execute faster.
Silver table has 60 columns (read_id, reader_id, tracker_timestamp, event_type, ebook_id, page_id, agent_ip, agent_device_type, ...). Each row of data represents read event of an ebook. E.g 'page turn', 'click on image', 'click on link',... All of the events that have occurred in the single session have the same read.id. In the gold table I'm trying to group those events in sessions and calculate the number of times each event has occurred in the single session. So instead of 100+ rows of data for a read session in silver table I would end up just with a single aggregated row in gold table.
Input is the silver delta table:
import pyspark.sql.functions as F
import pyspark.sql.types as T
import pandas as pd
from pyspark.sql.functions import pandas_udf
input = (spark
.readStream
.format("delta")
.option("withEventTimeOrder", "true")
.option("maxFilesPerTrigger", 100)
.load(f"path_to_silver_bucket")
)
I use withWatermark and session_window functions to ensure I end up grouping all of the events from the single read session. (read session automatically ends 30 minutes after the last reader activity)
group = input.withWatermark("tracker_timestamp", "10 minutes").groupBy("read_id", F.session_window(input.tracker_timestamp, "30 minutes"))
In the next step I use the applyInPandas function like so:
sessions = group.applyInPandas(processing_function, schema=processing_function_output_schema)
Definition of the processing_function used in applyInPandas:
def processing_function(df):
surf_time_ms = df.query('event_type == "surf"')['duration'].sum()
immerse_time_ms = df.query('event_type == "immersion"')['duration'].sum()
min_timestamp = df['tracker_timestamp'].min()
max_timestamp = df['tracker_timestamp'].max()
shares = len(df.query('event_type == "share"'))
leads = len(df.query('event_type == "lead_store"'))
is_read = len(df.query('event_type == "surf"')) > 0
distinct_pages = df['page_id'].nunique()
data = {
"read_id": df['read_id'].values[0],
"surf_time_ms": surf_time_ms,
"immerse_time_ms": immerse_time_ms,
"min_timestamp": min_timestamp,
"max_timestamp": max_timestamp,
"shares": shares,
"leads": leads,
"is_read": is_read,
"number_of_events": len(df),
"distinct_pages": distinct_pages
}
for field in not_calculated_string_fields:
data[field] = df[field].values[0]
new_df = pd.DataFrame(data=data, index=['read_id'])
for x in all_events:
new_df[f"count_{x}"] = df.query(f"type == '{x}'").count()
for x in duration_events:
duration = df.query(f"event_type == '{x}'")['duration']
duration_sum = duration.sum()
new_df[f"duration_{x}_ms"] = duration_sum
if duration_sum > 0:
new_df[f"mean_duration_{x}_ms"] = duration.mean()
else:
new_df[f"mean_duration_{x}_ms"] = 0
return new_df
And finally, I'm writing the calculated row to the gold table like so:
for_partitioning = (sessions
.withColumn("tenant", F.col("story_tenant"))
.withColumn("year", F.year(F.col("min_timestamp")))
.withColumn("month", F.month(F.col("min_timestamp"))))
checkpoint_path = "checkpoint-path"
gold_path = f"gold-bucket"
(for_partitioning
.writeStream
.format('delta')
.partitionBy('year', 'month', 'tenant')
.option("mergeSchema", "true")
.option("checkpointLocation", checkpoint_path)
.outputMode("append")
.start(gold_path))
Can anybody think of a more efficient way to do a UDF in PySpark than applyInPandas for the above example? I simply cannot afford to wait 9 days to reprocess 43GB of data...
I've tried playing around with different input and output options (e.g. .option("maxFilesPerTrigger", 100)) but the real problem seems to be applyInPandas.
You could rewrite your processing_function into native Spark if you really wanted.
"read_id": df['read_id'].values[0]
F.first('read_id').alias('read_id')
"surf_time_ms": df.query('event_type == "surf"')['duration'].sum()
F.sum(F.when(F.col('event_type') == 'surf', F.col('duration'))).alias('surf_time_ms')
"immerse_time_ms": df.query('event_type == "immersion"')['duration'].sum()
F.sum(F.when(F.col('event_type') == 'immersion', F.col('duration'))).alias('immerse_time_ms')
"min_timestamp": df['tracker_timestamp'].min()
F.min('tracker_timestamp').alias('min_timestamp')
"max_timestamp": df['tracker_timestamp'].max()
F.max('tracker_timestamp').alias('max_timestamp')
"shares": len(df.query('event_type == "share"'))
F.count(F.when(F.col('event_type') == 'share', F.lit(1))).alias('shares')
"leads": len(df.query('event_type == "lead_store"'))
F.count(F.when(F.col('event_type') == 'lead_store', F.lit(1))).alias('leads')
"is_read": len(df.query('event_type == "surf"')) > 0
(F.count(F.when(F.col('event_type') == 'surf', F.lit(1))) > 0).alias('is_read')
"number_of_events": len(df)
F.count(F.lit(1)).alias('number_of_events')
"distinct_pages": df['page_id'].nunique()
F.countDistinct('page_id').alias('distinct_pages')
for field in not_calculated_string_fields:
data[field] = df[field].values[0]
*[F.first(field).alias(field) for field in not_calculated_string_fields]
for x in all_events:
new_df[f"count_{x}"] = df.query(f"type == '{x}'").count()
The above can probably be skipped? As far as my tests go, new columns get NaN values, because .count() returns a Series object instead of one simple value.
for x in duration_events:
duration = df.query(f"event_type == '{x}'")['duration']
duration_sum = duration.sum()
new_df[f"duration_{x}_ms"] = duration_sum
if duration_sum > 0:
new_df[f"mean_duration_{x}_ms"] = duration.mean()
else:
new_df[f"mean_duration_{x}_ms"] = 0
*[F.sum(F.when(F.col('event_type') == x, F.col('duration'))).alias(f"duration_{x}_ms") for x in duration_events]
*[F.mean(F.when(F.col('event_type') == x, F.col('duration'))).alias(f"mean_duration_{x}_ms") for x in duration_events]
So, instead of
def processing_function(df):
...
...
sessions = group.applyInPandas(processing_function, schema=processing_function_output_schema)
you could use efficient native Spark:
sessions = group.agg(
F.first('read_id').alias('read_id'),
F.sum(F.when(F.col('event_type') == 'surf', F.col('duration'))).alias('surf_time_ms'),
F.sum(F.when(F.col('event_type') == 'immersion', F.col('duration'))).alias('immerse_time_ms'),
F.min('tracker_timestamp').alias('min_timestamp'),
F.max('tracker_timestamp').alias('max_timestamp'),
F.count(F.when(F.col('event_type') == 'share', F.lit(1))).alias('shares'),
F.count(F.when(F.col('event_type') == 'lead_store', F.lit(1))).alias('leads'),
(F.count(F.when(F.col('event_type') == 'surf', F.lit(1))) > 0).alias('is_read'),
F.count(F.lit(1)).alias('number_of_events'),
F.countDistinct('page_id').alias('distinct_pages'),
*[F.first(field).alias(field) for field in not_calculated_string_fields],
# skipped count_{x}
*[F.sum(F.when(F.col('event_type') == x, F.col('duration'))).alias(f"duration_{x}_ms") for x in duration_events],
*[F.mean(F.when(F.col('event_type') == x, F.col('duration'))).alias(f"mean_duration_{x}_ms") for x in duration_events],
)
Here is part of some code i create for a project in tkinter using sqlite3 as a database in python. Im trying to make it so that when a user enters their values into the entry fields it only accepts integer values, and tried to implement this into the validation function. Ive tried using the try and except method, but this still seems to allow all values to be added to the table. How else could i attempt to make this work?
def validation (self):
try:
int(self.inc.get()) and int(self.out.get()) == True
except ValueError:
self.message['text'] = 'Value must be a number!'
def adding (self):
if self.validation:
query = 'INSERT INTO data VALUES (?,?)'
parameters = (self.inc.get(), self.out.get())
self.run_query (query, parameters)
self.message ['text'] = 'Record [] added' .format (self.inc.get ())
self.inc.delete (0, END)
self.out.delete (0, END)
else:
self.message['text'] = 'Income or outgoing field is empty'
self.viewing_records()
def deleting (self):
self.message ['text'] = ''
try:
self.tree.item(self.tree.selection ()) ['values'][0]
except IndexError as e:
self.message['text'] = 'Please, select record!'
return
self.message['text'] = ''
Income = self.tree.item (self.tree.selection ()) ['text']
query = 'DELETE FROM data WHERE totalinc = ?'
self.run_query (query, (Income, ))
self.message['text'] = 'Record [] deleted.'.format(Income)
self.viewing_records()
def editing (self):
self.message['text'] = ''
try:
self.tree.item (self.tree.selection ())['values'][0]
except IndexError as e:
self.message['text'] = 'Please select record'
return
name = self.tree.item (self.tree.selection ())['text']
old_out = self.tree.item (self.tree.selection ())['values'][0]
self.edit_wind = Toplevel ()
self.edit_wind.title ("Editing")
Label (self.edit_wind, text = 'Old income:').grid (row = 0, column = 1)
Entry (self.edit_wind, textvariable = StringVar(self.edit_wind, value = name), state = 'readonly').grid(row = 0, column = 2)
Label (self.edit_wind, text = 'New income:').grid(row = 1, column = 1)
new_inc = Entry (self.edit_wind)
new_inc.grid (row = 1, column = 2)
Label (self.edit_wind, text = 'Old outgoing:').grid (row = 2, column = 1)
Entry (self.edit_wind, textvariable = StringVar(self.edit_wind, value = old_out), state = 'readonly').grid(row = 2, column = 2)
Label (self.edit_wind, text = 'New outgoing: ').grid(row = 3, column = 1)
new_out = Entry (self.edit_wind)
new_out.grid (row = 3, column = 2)
Button (self.edit_wind, text = 'Save changes', command = lambda: self.edit_records (new_inc.get(), name, new_out.get(), old_out)).grid (row = 4, column = 2, sticky = W)
self.edit_wind.mainloop()
def edit_records (self, new_inc, name, new_out, old_out):
query = "UPDATE data SET totalinc = ?, totalout = ? WHERE totalinc = ? AND totalout = ?"
parameters = (new_inc, new_out, name, old_out)
self.run_query (query, parameters)
self.edit_wind.destroy()
self.message['text'] = 'Record [] changed.' .format (name)
self.viewing_records()
if __name__ == '__main__':
wind = Tk()
application = Product (wind)
wind.mainloop()
str = '8'
if str.isdigit():
print(str)
I suggest taking a look at is isdigit().
I have written this function:
def duplicate_sheet1(wb, title=None):
if title is None:
title = wb.sheet1.title + ' DUPLICATE'
wb._sheet_list = [wb.sheet1]
wb.add_worksheet(title, wb.sheet1.row_count, wb.sheet1.col_count)
wb._sheet_list = wb._sheet_list[::-1]
wb._sheet_list[0].update_cells(wb._sheet_list[1]._fetch_cells())
...everything works as expected upon inspection with a debugger except update_cells, when I _fetch_cells for worksheet 0 after running the code, the sheet is empty.
Apparently the list returned by _fetch_cells is not the same as what is expected by update_cells. This may be because _fetch_cells does not include empty cells in the returned list, update_cells may only work with a 1 or 2-D grid--I am unsure.
Here is the work-around I found, apologies as the code could could probably be improved:
def duplicate_sheet1(wb, title=None):
if title is None:
title = wb.sheet1.title + ' DUPLICATE'
wb._sheet_list = [wb.sheet1]
wb.add_worksheet(title, wb.sheet1.row_count, wb.sheet1.col_count)
wb._sheet_list = wb._sheet_list[::-1]
cell_list = build_cell_list(wb._sheet_list[0], wb._sheet_list[1])
wb._sheet_list[0].update_cells(cell_list)
def build_cell_list(new_worksheet, old_worksheet):
fetched = old_worksheet._fetch_cells()
max_row = fetched[-1].row
max_col = max([cell.col for cell in fetched])
cell_list = new_worksheet.range('A1:' + chr(max_col + 64) + str(max_row))
for cell in cell_list:
cell.value = next(
(
f.value for f in fetched
if f.col == cell.col and f.row == cell.row
),
'',
)
return cell_list
I am using powerbuilder 11.2 and I have a pbl that creates a main screen. The user enters an order number in the textbox and hit enter and it fills in data in the bottom of the screen. I am trying to get debugging to hit a breakpoint in my function but it seems to ignore the breakpoint. Is there a way to break into the function? I have a variable I need to evaluate and I can't seem to get into the function while running. Here is the code:
Decimal{2} ld_total_hrs,ld_load_hrs,ld_unload_hrs
long ll_stops_rowcount, ll_row, ll_type, ll_ord_number, ll_rd_rowcount
datetime ldt_1st_stop, ldt_last_stop, ldt_start_time, ldt_end_time, ldt_deliver_time
string ls_dest_id, ls_type, ls_pay_id, ls_ref_number, ls_pay_leg_config
boolean lb_first_drop = TRUE
ld_load_hrs = 0
ld_unload_hrs = 0
SetNull(ldt_deliver_time)
ll_stops_rowcount = dw_trip.RowCount()
If ll_stops_rowcount < 1 then Return 0
For ll_row = 1 to ll_stops_rowcount
If ll_row = 1 then
ldt_1st_stop = dw_trip.GetItemDateTime ( 1, "stops_stp_arrivaldate" )
End if
If dw_trip.GetItemString(ll_row,"stops_stp_type") = "PUP" then
ldt_start_time = dw_trip.GetItemDateTime(ll_row,"stops_stp_arrivaldate")
ldt_end_time = dw_trip.GetItemDateTime(ll_row,"stops_stp_departuredate")
ld_load_hrs = ld_load_hrs + (f_datetimediff(ldt_start_time,ldt_end_time)/60)/60
End if
If dw_trip.GetItemString(ll_row,"stops_stp_type") = "DRP" then
ldt_start_time = dw_trip.GetItemDateTime(ll_row,"stops_stp_arrivaldate")
ldt_end_time = dw_trip.GetItemDateTime(ll_row,"stops_stp_departuredate")
ld_unload_hrs = ld_unload_hrs + (f_datetimediff(ldt_start_time,ldt_end_time)/60)/60
// get the first drops info for the report if paylegaslane is true else get last drop
ls_pay_leg_config = is_PayLegConfig
If is_CompanyOverride=true then
ls_pay_leg_config = "ByLeg"
End if
//TGRIFFIT - PayLegConfig = 'ByLeg' in TMW is equivalent to 'PayLegAsLane = 'Y' in FSS
if Upper(ls_pay_leg_config) = 'BYLEG' then
If lb_first_drop Then
ldt_deliver_time = ldt_end_time
ls_dest_id = dw_trip.GetItemString(ll_row,"stops_cmp_id")
ls_ref_number = dw_trip.GetItemString(ll_row,"stops_stp_refnum")
lb_first_drop = FALSE
End if
else
ldt_deliver_time = ldt_end_time
ls_dest_id = dw_trip.GetItemString(ll_row,"stops_cmp_id")
ls_ref_number = dw_trip.GetItemString(ll_row,"stops_stp_refnum")
end if
End if
Next
ldt_last_stop = dw_trip.GetItemDateTime ( ll_stops_rowcount, "stops_stp_departuredate" )
ld_total_hrs = (f_datetimediff(ldt_1st_stop,ldt_last_stop)/60)/60
ll_ord_number = long(dw_triptab.GetItemString(1,"ord_number"))
//If g_messlevel% < 1 Then
if gnv_app.ii_MessLevel < 1 Then
ids_revdist.Reset()
End if
//Load the datastore that stores all the revenue distribution values
ll_rd_rowcount = ids_revdist.RowCount()
ids_revdist.InsertRow(0)
ll_rd_rowcount ++
ids_revdist.SetItem(ll_rd_rowcount,"mov_number",i_movenum%)
ids_revdist.SetItem(ll_rd_rowcount,"lgh_number",dw_trip.GetItemNumber(1,"stops_lgh_number"))
ids_revdist.SetItem(ll_rd_rowcount,"total_hours",ld_total_hrs)
ids_revdist.SetItem(ll_rd_rowcount,"load_hours",ld_load_hrs)
ids_revdist.SetItem(ll_rd_rowcount,"unload_hours",ld_unload_hrs)
ids_revdist.SetItem(ll_rd_rowcount,"deliver_date",ldt_deliver_time)
ids_revdist.SetItem(ll_rd_rowcount,"dest_code",ls_dest_id)
ids_revdist.SetItem(ll_rd_rowcount,"ref_number",ls_ref_number)
Return ids_revdist.RowCount()
I need to evaluate this line specifically and I set a breakpoint at this line:
ls_pay_leg_config = "ByLeg"
as well as the following lines. It does not break. I am rusty at PowerBuilder and can figure this out.
Put the breakpoint at the start of the loop.