Django import export How to export just one object with selective columns - django-import-export

I have a model in which there are only aggregates of the values imported
class AggregateImport(models.Model):
Price = models.IntegerField
Total_Price = models.IntegerField
Collected_Date = models.Charfield(max_length=15)
Now import file only has price and collected date (40 rows) in it but I only want to export the total price

Related

Match individual records during Batch predictions with VertexAI pipeline

I have a custom model in Vertex AI and a table storing the features for the model along with the record_id.
I am building pipeline component for the batch prediction and facing a critical issue.
When I submit the batch_prediction, I should exclude the record_id for the job but How can I map the record if I don't have the record_id in the result?
from google.cloud import bigquery
from google.cloud import aiplatform
aiplatform.init(project=project_id)
client = bigquery.Client(project=project_id)
query = '''
SELECT * except(record_id) FROM `table`
'''
df = client.query(query).to_dataframe() # drop the record_id and load it to another table
job = client.load_table_from_dataframe(
X, "table_wo_id",
)
clf = aiplatform.Model(model_id = 'custom_model')
clf.batch_predict(job_display_name = 'custom model batch prediction',
bigquery_source = 'bq://table_wo_id',
instances_format = 'bigquery',
bigquery_destination_prefix = 'bq://prediction_result_table',
predictions_format = 'bigquery',
machine_type = 'n1-standard-4',
max_replica_count = 1
)
like the above example, there is no record_id column in prediction_result_table. There is no way to map the result back to each record

Filter a Query Set

The following view works great. I pass in student_id and I get a queryset of headers and attendance days related to that student_id. My challenge is that I cannot figure out how to filter my attendance_days queryset by a specific date or date range.
I have tried variations of:
attendance_days = student.zzabsentdetail_set.order_by('-absent_date').filter('absent_date' is between '2021-02-01' and '2021-02-26')
or
attendance_days = student.zzabsentdetail_set.order_by('-absent_date').filter('absent_date' > '2021-02-01')
I would like to filter my zzabsent_detail queryset by absent date on that line if possible.
Here is my view:
def absent_detail(request, student_id):
"""Show student & attendance info"""
student = Student.objects.get(id=student_id)
header = student.zzheader_set.order_by('id')
attendance_days = student.zzabsentdetail_set.order_by('-absent_date')
context = {'student': student, 'header': header, 'attendance_days':
attendance_days}
return render(request, 'learning_logs/absent_detail_99.html', context)
You can do this with a __range lookup [Django-doc]:
attendance_days = student.zzabsentdetail_set.filter(
absent_date__range=('2021-02-01', '2021-02-26')
).order_by('-absent_date')
here the two bounds are inclusive, so items on February 1st, or February 26th will be included as well.
or you can work with the __gt lookup [Django-doc] to express that the value should be greater than a given value:
attendance_days = student.zzabsentdetail_set.filter(
absent_date__gt='2021-02-01'
).order_by('-absent_date')
for the last 10 days, you can work with a date object instead of a string, so:
from datetime import timedelta
from django.utils.timezone import now
from_dt = now().date()-timedelta(days=10)
attendance_days = student.zzabsentdetail_set.filter(
absent_date__gt=from_dt
).order_by('-absent_date')

Using nearest neighbour to find postcode to new postcodes found

I have a list of new postcodes and I'm trying to find the nearest postcode from an existing postcode file to attach to the new postcodes. I am using the below code but it seems to have duplicated some rows, please could I have some help resolving this...
My 2 dataframes are:
new_postcode_df which contains 92,590 rows, and columns:
Postcode e.g. "AB101BJ"
Latitude e.g. 57.146051
Longitude e.g. -2.107375
current_postcode_df which contains 1,738,339 rows, and columns:
Postcode e.g. "AB101AB"
Latitude e.g. 57.149606
Longitude e.g. -2.096916
my desired output is output_df
new_postcode e.g. "AB101BJ"
current_postcode e.g. "AB101AB"
My code is below:
new_postcode_df_gps = new_postcode_df[["lat", "long"]].values
current_postcode_df_gps = current_postcode_df[["Latitude", "Longitude"]].values
new_postcode_df_radians = np.radians(new_postcode_df_gps)
current_postcode_df_radians = np.radians(current_postcode_df_gps)
tree = BallTree(current_postcode_df_radians , leaf_size=15, metric='haversine')
distance, index = tree.query(new_postcode_df_radians, k=1)
earth_radius = 6371000
distance_in_meters = distance * earth_radius
current_postcode_df.Postcode_NS[index[:,0]]
my output is shown in the attached where you can see postcodes beginning with "GY" have been added near the top which should not be the case. Postcodes starting with "AB" should all be at the top.
The new dataframe has increase from 92,590 rows to 92,848 rows
Image of final output dataframe
Libraries I'm using are:
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree
new_postcode_df = pd.DataFrame({"Postcode":["AB101BJ", "AB101BL", "AB107FU"],
"Latitude":[57.146051, 57.148655, 57.119636],
"Longitude":[-2.107375, -2.097433, -2.147906]})
current_postcode_df = pd.DataFrame({"Postcode":["AB101AB", "AB101AF", "AB101AG"],
"Latitude":[57.149606, 57.148707, 57.149051],
"Longitude":[-2.096916, -2.097806, -2.097004]})
output_df = pd.DataFrame({"Postcode":["AB101RS", "AB129TS", "GY35HG"]})

need checkboxes for a list using Django form

forms.py
from django import forms
from .models import VendorApplication, VendorAppWorkFlow
import MySQLdb as mdb
from django.forms.fields import DateField, ChoiceField, MultipleChoiceField
from django.forms.widgets import RadioSelect, CheckboxSelectMultiple
from django.forms.extras.widgets import SelectDateWidget
con = mdb.connect('', '', '', '');
cursor1 = con.cursor()
cursor1.execute("select * from vendorapp_sharetable;")
columns = cursor1.description
num_fields = len(cursor1.description)
field_names = [i[0] for i in cursor1.description]
class CheckForm(forms.Form):
for index in range(num_fields):
field_names[index] = forms.BooleanField(required=False, label=field_names[index])
I want to add field_names values in forms.py to checkbox list without choices or how can I iterate them in choices to have checkboxMultiselect widget. As the count of field_names varies in my application am facing difficulty to add them to choices and use checkbox widget.
Something like:
field_names = [i[0] for i in cursor1.description]
choices = [(x, y) for (x, y) in enumerate (field_names)]
class CheckForm (forms.Form):
fields = forms.CheckboxMultiSelect (choices = choices, required = False)

Unable to fetch data from Hbase based on query parameters

How to get data from HBase? I have a table with empId, name, startDate, endDate and other columns. Now I want to get data from an HBase table based upon empId, startDate and endDate.In normal SQL I can use:
select * from tableName where empId=val and date>=startDate and date<=endDate
How can I do this in HBase as it stores data as key value pairs? The key is empId.
Getting filtered rows in HBase shell is tricky. Since the shell is JRuby-based you can have here Ruby commands as well:
import org.apache.hadoop.hbase.filter.CompareFilter
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
import org.apache.hadoop.hbase.filter.BinaryComparator
import org.apache.hadoop.hbase.filter.FilterList
import java.text.SimpleDateFormat
import java.lang.Long
def dateToBytes(val)
Long.toString(
SimpleDateFormat.new("yyyy/MM/dd").parse(val).getTime()).to_java_bytes
end
# table properties
colfam='c'.to_java_bytes;
col_name='name';
col_start='startDate';
col_end='endDate';
# query params
q_name='name2';
q_start='2012/08/14';
q_end='2012/08/24';
# filters
f_name=SingleColumnValueFilter.new(
colfam, col_name.to_java_bytes,
CompareFilter::CompareOp::EQUAL,
BinaryComparator.new(q_name.to_java_bytes));
f_start=SingleColumnValueFilter.new(
colfam, col_start.to_java_bytes,
CompareFilter::CompareOp::GREATER_OR_EQUAL,
BinaryComparator.new(dateToBytes(q_start)));
f_end=SingleColumnValueFilter.new(
colfam, col_end.to_java_bytes,
CompareFilter::CompareOp::LESS_OR_EQUAL,
BinaryComparator.new(dateToBytes(q_end)));
filterlist= FilterList.new([f_name, f_start, f_end]);
# get the result
scan 'mytable', {"FILTER"=>filterlist}
Similarly in Java construct a FilterList :
// Query params
String nameParam = "name2";
String startDateParam = "2012/08/14";
String endDateParam = "2012/08/24";
Filter nameFilter =
new SingleColumnValueFilter(colFam, nameQual, CompareOp.EQUAL,
Bytes.toBytes(nameParam));
//getBytesFromDate(): parses startDateParam and create a byte array out of it
Filter startDateFilter =
new SingleColumnValueFilter(colFam, startDateQual,
CompareOp.GREATER_OR_EQUAL, getBytesFromDate(startDateParam));
Filter endDateFilter =
new SingleColumnValueFilter(colFam, endDateQual,
CompareOp.LESS_OR_EQUAL, getBytesFromDate(endDateParam));
FilterList filters = new FilterList();
filters.addFilter(nameFilter);
filters.addFilter(startDateFilter);
filters.addFilter(endDateFilter);
HTable htable = new HTable(conf, tableName);
Scan scan = new Scan();
scan.setFilter(filters);
ResultScanner rs = htable.getScanner(scan);
//process your result...

Resources