Why is Google Translate API giving me so many 403s? - google-api

I've posted the relevant code below. I have a quote of 100 requests / second and a total quota of 50M characters daily (the latter of which I've never hit). I'm including 75 requests in each batch (i.e. in the below, there are 75 strings in each group).
I'm constantly running into 403s, usually after a very short time span of less than a minute of firing off requests. After that, no amount of backoff works until the next day. This is really debilitating and I'm very unsure why it's happening. So far, their response team hasn't been helpful for diagnosing the issue.
Here's an example error:
"Google Translate Error on checksum 48af8c32261d9cb8911d99168a6f5b21: https://www.googleapis.com/language/translate/v2?q=QUERYSTRING&source=ja&target=en&key=MYKEY&format=text&alt=json returned "User Rate Limit Exceeded">"
def _google_translate_callback(self, request_id, response, err):
if err:
print 'Google Translate Error on request_id %s: %s' % (request_id, err)
print 'Backing off for %d seconds.' % self.backoff
sleep(self.backoff)
if self.backoff < 4096:
self.backoff = self.backoff * 2
self._translate_array_google_helper()
else:
translation = response['translations'][0]['translatedText'] \
.replace('"', '"') \
.replace(''', "'")
self.translations.append((request_id, translation))
if is_done():
self.is_translating = False
else:
self.current_group += 1
self._translate_array_google_helper()
def _translate_array_google_helper(self):
if self.current_group >= len(self.groups):
self.is_translating = False
return
service = self.google_translator.translations()
group = self.groups[self.current_group]
batch = self.google_translator.new_batch_http_request(
callback=self._google_translate_callback
)
for text, request_id in group:
format_ = 'text'
if is_html(text):
format_ = 'html'
batch.add(
service.list(q=text, format=format_,
target=self.to_lang, source=self.from_lang),
request_id=request_id
)
batch.execute()

Related

Google Ads API: How to Send Batch Requests?

I'm using Google Ads API v11 to upload conversions and adjust conversions.
I send hundreds of conversions each day and want to start sending batch requests instead.
I've followed Google's documentation and I upload/ adjust conversions exactly the way they stated.
https://developers.google.com/google-ads/api/docs/conversions/upload-clicks
https://developers.google.com/google-ads/api/docs/conversions/upload-adjustments
I could not find any good explanation or example on how to send batch requests:
https://developers.google.com/google-ads/api/reference/rpc/v11/BatchJobService
Below is my code, an example of how I adjust hundreds of conversions.
An explanation of how to do so with batch requests would be very appreciated.
# Adjust the conversion value of an existing conversion, via Google Ads API
def adjust_offline_conversion(
client,
customer_id,
conversion_action_id,
gclid,
conversion_date_time,
adjustment_date_time,
restatement_value,
adjustment_type='RESTATEMENT'):
# Check that gclid is valid string else exit the function
if type(gclid) is not str:
return None
# Check if datetime or string, if string make as datetime
if type(conversion_date_time) is str:
conversion_date_time = datetime.strptime(conversion_date_time, '%Y-%m-%d %H:%M:%S')
# Add 1 day forward to conversion time to avoid this error (as explained by Google: "The Offline Conversion cannot happen before the ad click. Add 1-2 days to your conversion time in your upload, or check that the time zone is properly set.")
to_datetime_plus_one = conversion_date_time + timedelta(days=1)
# If time is bigger than now, set as now (it will be enough to avoid the original google error, but to avoid a new error since google does not support future dates that are bigger than now)
to_datetime_plus_one = to_datetime_plus_one if to_datetime_plus_one < datetime.utcnow() else datetime.utcnow()
# We must convert datetime back to string + add time zone suffix (+00:00 or -00:00 this is utc) **in order to work with google ads api**
adjusted_string_date = to_datetime_plus_one.strftime('%Y-%m-%d %H:%M:%S') + "+00:00"
conversion_adjustment_type_enum = client.enums.ConversionAdjustmentTypeEnum
# Determine the adjustment type.
conversion_adjustment_type = conversion_adjustment_type_enum[adjustment_type].value
# Associates conversion adjustments with the existing conversion action.
# The GCLID should have been uploaded before with a conversion
conversion_adjustment = client.get_type("ConversionAdjustment")
conversion_action_service = client.get_service("ConversionActionService")
conversion_adjustment.conversion_action = (
conversion_action_service.conversion_action_path(
customer_id, conversion_action_id
)
)
conversion_adjustment.adjustment_type = conversion_adjustment_type
conversion_adjustment.adjustment_date_time = adjustment_date_time.strftime('%Y-%m-%d %H:%M:%S') + "+00:00"
# Set the Gclid Date
conversion_adjustment.gclid_date_time_pair.gclid = gclid
conversion_adjustment.gclid_date_time_pair.conversion_date_time = adjusted_string_date
# Sets adjusted value for adjustment type RESTATEMENT.
if conversion_adjustment_type == conversion_adjustment_type_enum.RESTATEMENT.value:
conversion_adjustment.restatement_value.adjusted_value = float(restatement_value)
conversion_adjustment_upload_service = client.get_service("ConversionAdjustmentUploadService")
request = client.get_type("UploadConversionAdjustmentsRequest")
request.customer_id = customer_id
request.conversion_adjustments = [conversion_adjustment]
request.partial_failure = True
response = (
conversion_adjustment_upload_service.upload_conversion_adjustments(
request=request,
)
)
conversion_adjustment_result = response.results[0]
print(
f"Uploaded conversion that occurred at "
f'"{conversion_adjustment_result.adjustment_date_time}" '
f"from Gclid "
f'"{conversion_adjustment_result.gclid_date_time_pair.gclid}"'
f' to "{conversion_adjustment_result.conversion_action}"'
)
# Iterate every row (subscriber) and call the "adjust conversion" function for it
df.apply(lambda row: adjust_offline_conversion(client=client
, customer_id=customer_id
, conversion_action_id='xxxxxxx'
, gclid=row['click_id']
, conversion_date_time=row['subscription_time']
, adjustment_date_time=datetime.utcnow()
, restatement_value=row['revenue'])
, axis=1)
I managed to solve it in the following way:
The conversion upload and adjustment are not supported in the Batch Processing, as they are not listed here.
However, it is possible to upload multiple conversions in one request since the conversions[] field (list) could be populated with several conversions, not only a single conversion as I mistakenly thought.
So if you're uploading conversions/ adjusting conversions you can simply upload them in batch this way:
Instead of uploading one conversion:
request.conversions = [conversion]
Upload several:
request.conversions = [conversion_1, conversion_2, conversion_3...]
Going the same way for conversions adjustment upload:
request.conversion_adjustments = [conversion_adjustment_1, conversion_adjustment_2, conversion_adjustment_3...]

Can GA4-API fetch the data from requests made with a combination of minute and region and sessions?

Problem
With UA, I was able to get the number of sessions per region per minute (a combination of minute, region, and sessions), but is this not possible with GA4?
If not, is there any plan to support this in the future?
Detail
I ran GA4 Query Explorer with date, hour, minute, region in Dimensions and sessions in Metrics.
But I got an incompatibility error.
What I tried
I have checked with GA4 Dimensions & Metrics Explorer and confirmed that the combination of minute and region is not possible. (see image below).
(updated 2022/05/16 15:35)Checked by Code Execution
I ran it with ruby.
require "google/analytics/data/v1beta/analytics_data"
require 'pp'
require 'json'
ENV['GOOGLE_APPLICATION_CREDENTIALS'] = '' # service acount file path
client = ::Google::Analytics::Data::V1beta::AnalyticsData::Client.new
LIMIT_SIZE = 1000
offset = 0
loop do
request = Google::Analytics::Data::V1beta::RunReportRequest.new(
property: "properties/xxxxxxxxx",
date_ranges: [
{ start_date: '2022-04-01', end_date: '2022-04-30'}
],
dimensions: %w(date hour minute region).map { |d| { name: d } },
metrics: %w(sessions).map { |m| { name: m } },
keep_empty_rows: false,
offset: offset,
limit: LIMIT_SIZE
)
ret = client.run_report(request)
dimension_headers = ret.dimension_headers.map(&:name)
metric_headers = ret.metric_headers.map(&:name)
puts (dimension_headers + metric_headers).join(',')
ret.rows.each do |row|
puts (row.dimension_values.map(&:value) + row.metric_values.map(&:value)).join(',')
end
offset += LIMIT_SIZE
break if ret.row_count <= offset
end
The result was an error.
3:The dimensions and metrics are incompatible.. debug_error_string:{"created":"#1652681913.393028000","description":"Error received from peer ipv4:172.217.175.234:443","file":"src/core/lib/surface/call.cc","file_line":953,"grpc_message":"The dimensions and metrics are incompatible.","grpc_status":3}
Error in your code, Make sure you use the actual dimension name and not the UI name. The correct name of that dimension is dateHourMinute not Date hour and minute
dimensions: %w(dateHourMinute).map { |d| { name: d } },
The query explore returns this request just fine
results
Limited use for region dimension
The as for region. As the error message states the dimensions and metrics are incompatible. The issue being that dateHourMinute can not be used with region. Switch to date or datehour
at the time of writing this is a beta api. I have sent a message off to google to find out if this is working as intended or if it may be changed.

Confused about the use of validation set here

For the main.py of the px2graph project, the part of training and validation is shown as below:
splits = [s for s in ['train', 'valid'] if opt.iters[s] > 0]
start_round = opt.last_round - opt.num_rounds
# Main training loop
for round_idx in range(start_round, opt.last_round):
for split in splits:
print("Round %d: %s" % (round_idx, split))
loader.start_epoch(sess, split, train_flag, opt.iters[split] * opt.batchsize)
flag_val = split == 'train'
for step in tqdm(range(opt.iters[split]), ascii=True):
global_step = step + round_idx * opt.iters[split]
to_run = [sample_idx, summaries[split], loss, accuracy]
if split == 'train': to_run += [optim]
# Do image summaries at the end of each round
do_image_summary = step == opt.iters[split] - 1
if do_image_summary: to_run[1] = image_summaries[split]
# Start with lower learning rate to prevent early divergence
t = 1/(1+np.exp(-(global_step-5000)/1000))
lr_start = opt.learning_rate / 15
lr_end = opt.learning_rate
tmp_lr = (1-t) * lr_start + t * lr_end
# Run computation graph
result = sess.run(to_run, feed_dict={train_flag:flag_val, lr:tmp_lr})
out_loss = result[2]
out_accuracy = result[3]
if sum(out_loss) > 1e5:
print("Loss diverging...exiting before code freezes due to NaN values.")
print("If this continues you may need to try a lower learning rate, a")
print("different optimizer, or a larger batch size.")
return
time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print("{}: step {}, loss {:g}, acc {:g}".format(time_str, global_step, out_loss, out_accuracy))
# Log data
if split == 'valid' or (split == 'train' and step % 20 == 0) or do_image_summary:
writer.add_summary(result[1], global_step)
writer.flush()
# Save training snapshot
saver.save(sess, 'exp/' + opt.exp_id + '/snapshot')
with open('exp/' + opt.exp_id + '/last_round', 'w') as f:
f.write('%d\n' % round_idx)
It seems that the author only get the result of each batch of the validation set. I am wondering, if I want to observe whether the model is improving or reaching the best performance, should I use the result on the whole validation set?
If the validation set is small enough, we could calculate the loss, accuracy on the whole validation set during training to observe the performance. However, if the validation set is too large, it is better to calculate batch-wise validation results and for multiple steps.

How to speed up the addition of a new column in pandas, based on comparisons on an existing one

I am working on a large-ish dataframe collection with some machine data in several tables. The goal is to add a column to every table which expresses the row's "class", considering its vicinity to a certain time stamp.
seconds = 1800
for i in range(len(tables)): # looping over 20 equally structured tables containing machine data
table = tables[i]
table['Class'] = 'no event'
for event in events[i].values: # looping over 20 equally structured tables containing events
event_time = event[1] # get integer time stamp
start_time = event_time - seconds
table.loc[(table.Time<=event_time) & (table.Time>=start_time), 'Class'] = 'event soon'
The event_times and the entries in table.Time are integers. The point is to assign the class "event soon" to all rows in a specific time frame before an event (the number of seconds).
The code takes quite long to run, and I am not sure what is to blame and what can be fixed. The amount of seconds does not have much impact on the runtime, so the part where the table is actually changed is probabaly working fine and it may have to do with the nested loops instead. However, I don't see how to get rid of them. Hopefully, there is a faster, more pandas way to go about adding this class column.
I am working with Python 3.6 and Pandas 0.19.2
You can use numpy broadcasting to do this vectotised instead of looping
Dummy data generation
num_tables = 5
seconds=1800
def gen_table(count):
for i in range(count):
times = [(100 + j)**2 for j in range(i, 50 + i)]
df = pd.DataFrame(data={'Time': times})
yield df
def gen_events(count, num_tables):
for i in range(num_tables):
times = [1E4 + 100 * (i + j )**2 for j in range(count)]
yield pd.DataFrame(data={'events': times})
tables = list(gen_table(num_tables)) # a list of 5 DataFrames of length 50
events = list(gen_events(5, num_tables)) # a list of 5 DataFrames of length 5
Comparison
For debugging, I added a dict of verification DataFrames. They are not needed, I just used them for debugging
verification = {}
for i, (table, event_df) in enumerate(zip(tables, events)):
event_list = event_df['events']
time_diff = event_list.values - table['Time'].values[:,np.newaxis] # This is where the magic happens
events_close = np.any( (0 < time_diff) & (time_diff < seconds), axis=1)
table['Class'] = np.where(events_close, 'event soon', 'no event')
# The stuff after this line can be deleted since it's only used for the verification
df = pd.DataFrame(data=time_diff, index=table['Time'], columns=event_list)
df['event'] = np.any((0 < time_diff) & (time_diff < seconds), axis=1)
verification[i] = df
newaxis
A good explanation on broadcasting is in Jakevdp's book
table['Time'].values[:,np.newaxis]
gives a (50,1) 2-d array
array([[10000],
[10201],
[10404],
....
[21609],
[21904],
[22201]], dtype=int64)
Verification
For the first step the verification df looks like this:
events 10000.0 10100.0 10400.0 10900.0 11600.0 event
Time
10000 0.0 100.0 400.0 900.0 1600.0 True
10201 -201.0 -101.0 199.0 699.0 1399.0 True
10404 -404.0 -304.0 -4.0 496.0 1196.0 True
10609 -609.0 -509.0 -209.0 291.0 991.0 True
10816 -816.0 -716.0 -416.0 84.0 784.0 True
11025 -1025.0 -925.0 -625.0 -125.0 575.0 True
11236 -1236.0 -1136.0 -836.0 -336.0 364.0 True
11449 -1449.0 -1349.0 -1049.0 -549.0 151.0 True
11664 -1664.0 -1564.0 -1264.0 -764.0 -64.0 False
11881 -1881.0 -1781.0 -1481.0 -981.0 -281.0 False
12100 -2100.0 -2000.0 -1700.0 -1200.0 -500.0 False
12321 -2321.0 -2221.0 -1921.0 -1421.0 -721.0 False
12544 -2544.0 -2444.0 -2144.0 -1644.0 -944.0 False
....
20449 -10449.0 -10349.0 -10049.0 -9549.0 -8849.0 False
20736 -10736.0 -10636.0 -10336.0 -9836.0 -9136.0 False
21025 -11025.0 -10925.0 -10625.0 -10125.0 -9425.0 False
21316 -11316.0 -11216.0 -10916.0 -10416.0 -9716.0 False
21609 -11609.0 -11509.0 -11209.0 -10709.0 -10009.0 False
21904 -11904.0 -11804.0 -11504.0 -11004.0 -10304.0 False
22201 -12201.0 -12101.0 -11801.0 -11301.0 -10601.0 False
Small optimizations of original answer.
You can shave a few lines and some assignments of the original algorithm
for table, event_df in zip(tables, events):
table['Class'] = 'no event'
for event_time in event_df['events']: # looping over 20 equally structured tables containing events
start_time = event_time - seconds
table.loc[table['Time'].between(start_time, event_time), 'Class'] = 'event soon'
You might shave some more if instead of the text 'no event' and 'event soon' you would just use booleans

How to return ALL events in a Google Calendar without knowing whether it is a timed or all day event

Now, I'm working on making a program in Python that can pull events from all the calendars in my Google account; however, I'm trying to make the program potentially as commercial as possible. With that said, it's quite simple to customize the code for myself, when I know that all the US Holidays events attached to my calendar are all day events, so I can set up a simple if statement that checks if it's a Holiday calendar and specify the events request as such:
def get_main_events(pageToken=None):
events = gc_source.service.events().list(
calendarId=calendarId,
singleEvents=True,
maxResults=1000,
orderBy='startTime',
pageToken=pageToken,
).execute()
return events
So, that works for all day events. After which I'd append the results to a list and filter it to get only the events I want. Now getting events from my primary calendar is a bit easier to specify the events I want because they're generally not all day events, just my work schedule so I can use:
now = datetime.now()
now_plus_thirtydays = now + timedelta(days=30)
def get_main_events(pageToken=None):
events = gc_source.service.events().list(
calendarId=calendarId,
singleEvents=True,
maxResults=1000,
orderBy='startTime',
timeMin=now.strftime('%Y-%m-%dT%H:%M:%S-00:00'),
timeMax=now_plus_thirtydays.strftime('%Y-%m-%dT%H:%M:%S-00:00'),
pageToken=pageToken,
).execute()
return events
Now, the problem I run into with making the program available for commercial use, as well as myself, is the above will ONLY return NON-all day events from my primary calendar. I'd like to find out if there's a way - if so, how - to run the get events request and return ALL results whether they're all day or if they're just a timed event that takes place in a portion of the day. In addition part of this issue is that in another part of the code where I print the results I would need to use:
print event['start']['date']
for an all day event, and:
print event['start']['dateTime']
for a non all day event.
So, since 'dateTime' wont work on an all day event, I'd like to figure out a way to set it up so that I can evaluate whether an event is all day or not. i.e. "if said event is an all day event, use event['start']['date'], else use event['start']['dateTime']
So, through much testing, and finding a way to use a log feature to see what error was happening with:
print event['start']['date']
vs:
print event['start']['dateTime']
I found that I could use the error result to my advantage using 'try' and 'except'.
Here is the resulting fix:
First the initial part as earlier with the actual query to the calendar:
now = datetime.now()
now_plus_thirtydays = now + timedelta(days=30)
def get_calendar_events(pageToken=None):
events = gc_source.service.events().list(
calendarId=cal_id[cal_count],
singleEvents=True,
orderBy='startTime',
timeMin=now.strftime('%Y-%m-%dT%H:%M:%S-00:00'),
timeMax=now_plus_thirtydays.strftime('%Y-%m-%dT%H:%M:%S-00:00'),
pageToken=pageToken,
).execute()
return events
Then the event handling portion:
# Events Portion
print "Calendar: ", cal_summary[cal_count]
events = get_calendar_events()
while True:
for event in events['items']:
try:
if event['start']['dateTime']:
dstime = dateutil.parser.parse(event['start']['dateTime'])
detime = dateutil.parser.parse(event['end']['dateTime'])
if dstime.strftime('%d/%m/%Y') == detime.strftime('%d/%m/%Y'):
print event['summary'] + ": " + dstime.strftime('%d/%m/%Y') + " " + dstime.strftime('%H%M') + "-" + detime.strftime('%H%M')
# Making a list for the respective items so they can be iterated through easier for time comparison and TTS messages
if cal_count == 0:
us_holiday_list.append((dstime, event['summary']))
elif cal_count == 1:
birthday_list.append((dstime, event['summary']))
else:
life_list.append((dstime, event['summary']))
else:
print event['summary'] + ": " + dstime.strftime('%d/%m/%Y') + " # " + dstime.strftime('%H%M') + " to " + detime.strftime('%H%M') + " on " + detime.strftime('%d/%m/%Y')
# Making a list for the respective items so they can be iterated through easier for time comparison and TTS messages
if cal_count == 0:
us_holiday_list.append((dstime, event['summary']))
elif cal_count == 1:
birthday_list.append((dstime, event['summary']))
else:
life_list.append((dstime, event['summary']))
else:
return
except KeyError:
dstime = dateutil.parser.parse(event['start']['date'])
detime = dateutil.parser.parse(event['end']['date'])
print event['summary'] + ": " + dstime.strftime('%d/%m/%Y')
# Making a list for the respective items so they can be iterated through easier for time comparison and TTS messages
if cal_count == 0:
us_holiday_list.append((dstime, event['summary']))
elif cal_count == 1:
birthday_list.append((dstime, event['summary']))
else:
life_list.append((dstime, event['summary']))
page_token = events.get('nextPageToken')
if page_token:
events = get_calendar_events(page_token)
else:
if cal_count == (len(cal_id) - 1): # If there are no more calendars to process
break
else: #Continue to next calendar
print "-----"
cal_count += 1
print "Retrieving From Calendar: ", cal_summary[cal_count]
events = get_calendar_events()

Resources