I am trying to send image through API in Micropython. still no solution how to do it. please help
import urequests
import json
URL = 'https://example.com/test'
datas = json.dumps({"auth_key": "43435", "mac": "abcd", "name": "washid"})
filep = 'OBJ.jpg'
filess = {'odimg': open(filep, 'rb')}
try:
response = urequests.post(URL,data=datas,files=files)
print(response.json())
except Exception as e:
print(e)
Maybe this template can help you :
import ubinascii
import uos
import urequests
def make_request(data, image=None):
boundary = ubinascii.hexlify(uos.urandom(16)).decode('ascii')
def encode_field(field_name):
return (
b'--%s' % boundary,
b'Content-Disposition: form-data; name="%s"' % field_name,
b'',
b'%s'% data[field_name]
)
def encode_file(field_name):
filename = 'latest.jpeg'
return (
b'--%s' % boundary,
b'Content-Disposition: form-data; name="%s"; filename="%s"' % (
field_name, filename),
b'',
image
)
lines = []
for name in data:
lines.extend(encode_field(name))
if image:
lines.extend(encode_file('file'))
lines.extend((b'--%s--' % boundary, b''))
body = b'\r\n'.join(lines)
headers = {
'content-type': 'multipart/form-data; boundary=' + boundary,
'content-length': str(len(body))}
return body, headers
def upload_image(url, headers, data):
http_response = urequests.post(
url,
headers=headers,
data=data
)
if http_response.status_code == 204:
print('Uploaded request')
else:
raise UploadError(http_response)
http_response.close()
return http_response
You need to declare an header for your request
I used Jonathan's answer. Had to modify the code a bit (-thanks to Akshay to figure this out). A fixed boundary is used instead of generating a new one every time. Also, there needs to be an additional \r\n at the end of the file. I have used this to upload photos to Telegram Bot using ESP-32 CAM.
def make_request(data, image=None):
boundary = '---011000010111000001101001'
#boundary fixed instead of generating new one everytime
def encode_field(field_name): # prepares lines that include chat_id
return (
b'--%s' % boundary,
b'Content-Disposition: form-data; name="%s"' % field_name,
b'',
b'%s'% data[field_name] #field_name conatains chat_id
)
def encode_file(field_name): # prepares lines for the file
filename = 'latest.jpg' # dummy name is assigned to uploaded file
return (
b'--%s' % boundary,
b'Content-Disposition: form-data; name="%s"; filename="%s"' % (
field_name, filename),
b'',
image
)
lines = [] # empty array initiated
for name in data:
lines.extend(encode_field(name)) # adding lines (data)
if image:
lines.extend(encode_file('photo')) # adding lines image
lines.extend((b'--%s--' % boundary, b'')) # ending with boundary
body = b'\r\n'.join(lines) # joining all lines constitues body
body = body + b'\r\n' # extra addtion at the end of file
headers = {
'content-type': 'multipart/form-data; boundary=' + boundary
} # removed content length parameter
return body, headers # body contains the assembled upload package
def upload_image(url, headers, data):
http_response = urequests.post(
url,
headers=headers,
data=data
)
print(http_response.status_code) # response status code is the output for request made
if (http_response.status_code == 204 or http_response.status_code == 200):
print('Uploaded request')
else:
print('cant upload')
#raise UploadError(http_response) line commneted out
http_response.close()
return http_response
# funtion below is used to set up the file / photo to upload
def send_my_photo(photo_pathstring): # path and filename combined
token = 'authentication token or other data' # this my bot token
chat_id= 999999999 # my chat_id
url = 'https://api.telegram.org/bot' + token
path = photo_pathstring # this is the local path
myphoto = open(path , 'rb') #myphoto is the photo to send
myphoto_data = myphoto.read() # generate file in bytes
data = { 'chat_id' : 999999999 }
body, headers = make_request(data, myphoto_data) # generate body to upload
url = url + '/sendPhoto'
headers = { 'content-type': "multipart/form-data; boundary=---011000010111000001101001" }
upload_image(url, headers, body) # using function to upload to telegram
Related
I want to read docx/pdf/txt the file from the url and if docx/pdf/txt file has a non-english characters then it will stop read file. I am able to read all type of file through url but I am facing a problem in detecting a non-english characters. If there a non-english character in file then it will skip that character.
So, is there any way to read a file through url without skipping non-english words?
here is my code to read file in windows.
request=requests.get("url of file")
resp = request.json()
file_path=(resp["data"]["file_path"])
_id=(resp["data"]["_id"])
number_of_question=(resp["data"]["number_of_question"])
file_type=(resp["data"]["file_type"])
def get_context(total_pages):
page_data = []
for i in range(0,total_pages):
page = read_pdf.pages[i]
parts = []
def visitor_body(text, cm, tm, fontDict, fontSize):
y = tm[5]
if y > 70 and y < 780:
parts.append(text)
page.extract_text(visitor_text=visitor_body)
text_body = "".join(parts).strip()
page_data.append({"page_no":i+1, "page_content":text_body})
return page_data
if file_type=="txt":
response = urllib.request.urlopen(file_path)
html = response.read()
Text = html.decode('utf8')
elif file_type=="pdf":
response = requests.get(file_path)
my_raw_data = response.content
with open("my_pdf.pdf", 'wb') as my_data:
my_data.write(my_raw_data)
open_pdf_file = open("my_pdf.pdf", 'rb')
read_pdf = PyPDF2.PdfReader(open_pdf_file)
total_pages = len(read_pdf.pages)
Text = get_context(total_pages)
print(Text)
elif file_type=="docx":
response = requests.get(file_path)
my_raw_data = response.content
with open("my_doc.docx", "wb") as text_file:
text_file.write(my_raw_data)
pythoncom.CoInitialize()
docx2pdf.convert("my_doc.docx")
open_pdf_file = open("my_doc.pdf", 'rb')
read_pdf = PyPDF2.PdfReader("my_doc.pdf")
total_pages = len(read_pdf.pages)
Text = get_context(total_pages)
else:
print("Invalid File Type")
here is the output that I am getting.
[{'page_no': 1, 'page_content': ''}]
example of non-english file.
https://www.dropbox.com/s/utq3y82qrbenisg/%E0%A4%95%E0%A4%B2-%E0%A4%95%E0%A5%80-%E0%A4%95%E0%A4%B2-%E0%A4%B8%E0%A5%8B%E0%A4%9A%E0%A5%87%E0%A4%82%E0%A4%97%E0%A5%87-.pdf?dl=0
def get_rstname_links(pref_urls, pref_name):
rstname_links = []
for link in tqdm.tqdm(pref_urls):
HEADERS = {"User-Agent": random.choice(AGENT_LIST)}
try:
html_content = requests.get(
link, proxies=proxies, headers=HEADERS, timeout=100, verify=False
).text
soup = BeautifulSoup(html_content, "html.parser")
hyperlinks = soup.find_all("div", {"class": "rstname"})
for hyperlink in hyperlinks:
links = hyperlink.find_all("a")
for link in links:
href = link.get("href")
print(href)
rstname_links.append(href)
except:
pass
with open(f"{ABSOLUTE_PATH}output/urls/{pref_name}.txt", "w") as f:
for link in rstname_links:
f.write("https://example.com" + link + "\n")
print(f"{pref_name} URL EXTRACTION DONE!")
multiprocessing code:
chunk_size = math.ceil(len(pref_urls) / (mp.cpu_count() * 20))
range_of_chunk = list(
chunks(range(0, len(pref_urls)), chunk_size)
)
processes = []
for chunk in range_of_chunk:
process = multiprocessing.Process(
target=get_rstname_links,
args=(
pref_urls[chunk[0] : chunk[-1]],
pref_name,
),
)
processes.append(process)
process.start()
for idx, proc in enumerate(processes):
proc.join()
Here, after all the processes completed the files are supposed to write on disk as per the code in the first function, but it's not saving on the disk.
Any solution to this problem?
Okay, I actually solved the problem like this:
def get_rstname_links(pref_urls):
rstname_links = []
for link in tqdm.tqdm(pref_urls):
HEADERS = {"User-Agent": random.choice(AGENT_LIST)}
try:
html_content = requests.get(
link, proxies=proxies, headers=HEADERS, timeout=100, verify=False
).text
# parse the html content
soup = BeautifulSoup(html_content, "html.parser")
# get all the hyperlinks in the html content list <li>
hyperlinks = soup.find_all("div", {"class": "rstname"})
for hyperlink in hyperlinks:
links = hyperlink.find_all("a")
for link in links:
href = link.get("href")
print(href)
rstname_links.append(href)
except:
pass
return rstname_links
changed multiprocessing code:
def url_download(pref_name):
# parse the arguments
base_links = get_hyperlinks(f"https://example.com/{pref_name}/")
pref_urls = visit_links(base_links)
# rstname_links = get_rstname_links(pref_urls)
pool = Pool(mp.cpu_count() * 50)
rstname_links = list(
tqdm.tqdm(
pool.imap(get_rstname_links, chunks(pref_urls, 100)),
total=math.ceil(len(pref_urls) / 100),
)
)
pool.close()
pool.join()
rstname_links = [item for sublist in rstname_links for item in sublist]
print(f"{pref_name} : {len(rstname_links)}")
return rstname_links
Then, in the main method, write the rstname_links data on the disk.
I am trying to store dictionaries in a JSON file which user input. The code is:
#client.command()
async def shibaku0(ctx, coin1, coin2, coin3, coin4, coin5, coin6, shibakunumber, oslink):
await ctx.message.delete()
with open('Shibaku0.json', 'r') as f:
coins_data = json.load(f)
coins_data[str(ctx.author.id)]["coins"] = (coin1, coin2, coin3, coin4, coin5, coin6)
shibakunumber[str(ctx.author.id)]["shibakunumber"] = (shibakunumber)
oslink[str(ctx.author.id)]["oslink"] = (oslink)
with open('Shibaku0.json', 'w') as f:
json.dump(coins_data, f)
embed=discord.Embed(title="Shibaku0", url=f'{oslink}', description=f'No. {shibakunumber}')
embed.add_field(name="Coins: ", value=f'{coin1} {coin2} {coin3} {coin4} {coin5} {coin6}', inline=True)
embed.set_footer(text=f"{ctx.author.name}'s Shibaku")
await ctx.send(embed=embed)
I want to store the "coins", "shibakunumber, "oslink", but I am getting this error message when I try and run the code:
TypeError: list indices must be integers or slices, not str
To make a json that is an associative array (JSON object / dict). You need to have { or a single object within the file. Each object associates userid with the data for that user, i.e, a nested object.
Here's a sample :
import json
# sample_data/Obj.json
# {
# "user1": {"coins": "coinstringvalue", "shibakunumber": 1, "oslink": "linkstringvalue"},
# "user2": {"coins": "coinstringvalue2", "shibakunumber": 2, "oslink": "linkstringvalue2"}
# }
mydata = json.load(open('sample_data/Obj.json', 'r'))
print(mydata['user1'])
print(mydata['user2'])
print(mydata['user2']['shibakunumber'])
This gives the output:
{'coins': 'coinstringvalue', 'shibakunumber': 1, 'oslink': 'linkstringvalue'}
{'coins': 'coinstringvalue2', 'shibakunumber': 2, 'oslink': 'linkstringvalue2'}
2
Note that the mydata is a dict type in python.
Here's an example of starting with an empty json file.
import json
# sample_data/empty.json
# {}
mydata = json.load(open('sample_data/empty.json', 'r'))
print(mydata)
# author_id = str(ctx.author.id)
author_id = str(1161214)
if author_id not in mydata:
mydata[author_id] = dict()
mydata[author_id]['coins'] = ("coin1", "coin2")
mydata[author_id]['shibakunumber'] = 2
mydata[author_id]['oslink'] = "somelink"
json.dump(mydata, open('sample_data/new_file.json', 'w'))
loaded_data = json.load(open('sample_data/new_file.json', 'r'))
print(loaded_data)
Produces:
mydata {}
loaded_data {'1161214': {'coins': ['coin1', 'coin2'], 'shibakunumber': 2, 'oslink': 'somelink'}}
I want to download after image crawling for multiple pages. However, all images cannot be downloaded because they are overwritten in [for syntax].
Below is my code. What is wrong?
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests as rq
for page in range(2,4):
baseUrl = 'https://onepiecetreasurecruise.fr/Artwork/index.php?page=index'
plusUrl = baseUrl + str(page)
html = urlopen(plusUrl).read()
soup = BeautifulSoup(html, 'html.parser')
img = soup.find_all(class_='card-img-top')
listimg = []
for i in img:
listimg.append(i['src'])
n = 1
for index, img_link in enumerate(listimg):
img_data = rq.get(img_link).content
with open('./onepiece/' + str(index+1) + '.png', 'wb+') as f:
f.write(img_data)
n += 1
Another way is to download all the pictures.
from simplified_scrapy import Spider, SimplifiedDoc, utils, SimplifiedMain
class ImageSpider(Spider):
name = 'onepiecetreasurecruise'
start_urls = ['https://onepiecetreasurecruise.fr/Artwork/index.php?page=index']
# refresh_urls = True
concurrencyPer1s = 0.5 # set download speed
imgPath = 'images/'
def __init__(self):
Spider.__init__(self, self.name) # necessary
utils.createDir(self.imgPath) # create image dir
def afterResponse(self, response, url, error=None, extra=None):
try: # save images
flag = utils.saveResponseAsFile(response, self.imgPath, 'image')
if flag: return None
except Exception as err:
print(err)
return Spider.afterResponse(self, response, url, error, extra)
def extract(self, url, html, models, modelNames):
doc = SimplifiedDoc(html)
# image urls
urls = doc.body.getElements('p', value='card-text').a
if (urls):
for u in urls:
u['header']={'Referer': url['url']}
self.saveUrl(urls)
# next page urls
u = doc.body.getElementByText('Suivant',tag='a')
if (u):
u['href'] = utils.absoluteUrl(url.url,u.href)
self.saveUrl(u)
return True
SimplifiedMain.startThread(ImageSpider()) # start download
I fixed the indents in your code. This works for me. It downloads 30 images.
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests as rq
listimg = [] # all images
for page in range(2,4):
baseUrl = 'https://onepiecetreasurecruise.fr/Artwork/index.php?page=index'
plusUrl = baseUrl + str(page)
html = urlopen(plusUrl).read()
soup = BeautifulSoup(html, 'html.parser')
img = soup.find_all(class_='card-img-top')
for i in img:
listimg.append(i['src'])
n = 1
for index, img_link in enumerate(listimg):
img_data = rq.get(img_link).content
with open('./onepiece/' + str(index+1) + '.png', 'wb+') as f:
f.write(img_data)
n += 1
We are interested in the bert vectors for each token. With bert vector we mean the word vector for a specific token in berts output layer. So we would like to find out which token produces which bert vector. We wrote some code but we are not sure if it is correct or how to test it.
So in the code we process a sentence with bert. We construct a list of position ids and hand them to the model. Afterwards we use the same position ids to map the tokens to the output layer. Then there is some code that produces calculates the character offsets of each vector in the input sentence.
Is this the correct way how to use position_ids to generate
from transformers import BertModel, BertConfig, BertTokenizer
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
def sentence_to_vector(input_sentence):
tokens_encoded = tokenizer.encode(input_sentence, add_special_tokens=True)
input_ids = torch.tensor(tokens_encoded).unsqueeze(0) # Batch size 1
seq_length = input_ids.size(1)
# code to construct position_ids from here:
# https://github.com/huggingface/transformers/blob/8da280ebbeca5ebd7561fd05af78c65df9161f92/pytorch_pretrained_bert/modeling.py#L188:L189
position_ids = torch.arange(seq_length, dtype=torch.long, device=input_ids.device)
position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
outputs = model(input_ids, position_ids=position_ids)
tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
# from the BertModel documentation (example at the bottom):
# The last hidden-state is the first element of the output tuple
# https://huggingface.co/transformers/model_doc/bert.html#transformers.BertModel
#ttv = {} # token to vector
#for i in position_ids[0]:
# ttv[tokens[i]] = outputs[0][0][position_ids[0][i]]
data = []
last_offset = 0
for i in range(0, len(position_ids[0])):
token = tokens[position_ids[0][i]]
vector = outputs[0][0][position_ids[0][i]]
pos_begin = None
pos_end = None
if not token == "[CLS]" and not token == "[SEP]":
pos_begin = input_sentence.find(token, last_offset)
pos_end = pos_begin + len(token)
last_offset = pos_end
data.append({
"token": token,
"pos_begin": pos_begin,
"pos_end": pos_end,
"vector": vector
})
return data
input_sentence = "do the chicken dance!"
data = sentence_to_vector(input_sentence)
for token in data:
print(token["token"] + "\t" + str(token["pos_begin"]) + "\t" + str(token["pos_end"]) + "\t" + str(token["vector"][0:3]) + "..." )