I have to read the latest image in a folder using python. How can I do this?
Another similar way, with some pragmatic (non-foolproof) image validation added:
import os
def get_latest_image(dirpath, valid_extensions=('jpg','jpeg','png')):
"""
Get the latest image file in the given directory
"""
# get filepaths of all files and dirs in the given dir
valid_files = [os.path.join(dirpath, filename) for filename in os.listdir(dirpath)]
# filter out directories, no-extension, and wrong extension files
valid_files = [f for f in valid_files if '.' in f and \
f.rsplit('.',1)[-1] in valid_extensions and os.path.isfile(f)]
if not valid_files:
raise ValueError("No valid images in %s" % dirpath)
return max(valid_files, key=os.path.getmtime)
Walk over the filenames, get their modification time and keep track of the latest modification time you found:
import os
import glob
ts = 0
found = None
for file_name in glob.glob('/path/to/your/interesting/directory/*'):
fts = os.path.getmtime(file_name)
if fts > ts:
ts = fts
found = file_name
print(found)
Related
I am trying to download a batch of files from the following website to my local folder. But the download is very slow. Average size of each file is in the range of 30 - 60 MB. Is there a better way to improve this code so that I can download them fast?
import requests
from os import mkdir
from os.path import isdir
from bs4 import BeautifulSoup
from os import chdir, getcwd
url = "https://opendata.dwd.de/climate_environment/CDC/grids_germany/hourly/radolan/historical/asc/"
years = [str(year) for year in range(2005,2021)]
links = [url + i + "/" for i in years]
Creating an empty list to store list of list:
t_links = []
def get_tarlinks():
for i in links:
#create response object
r = requests.get(i)
#create beautiful object
soup = BeautifulSoup(r.content, 'html5lib')
#find all links on webpage
a_links = soup.find_all('a')
#filter the link sending with .tar
tar_links = [i + link['href'] for link in a_links if link['href'].endswith('.tar')]
t_links.append(tar_links)
return t_links
t_links = get_tarlinks()
src_path = "D:/Sandeep/Thesis/Data/"
Download files to local machine:
for i in t_links:
for j in i:
year,filename = j.split('/')[10:]
r = requests.get(j, allow_redirects=True)
if isdir(src_path+year) == False:
mkdir(src_path+year)
chdir(src_path+year)
open(filename, "wb").write(r.content)
else:
open(filename, "wb").write(r.content)
Note: Please check for the indentation when you copy this code to your IDE. Thanks!
I have a list of urls in a text file.i want the images to be downloaded to a particular folder ,how i can do it.is there any addons available in chrome or any other program to download images from url
Create a folder in your machine.
Place your text file of images URL in the folder.
cd to that folder.
Use wget -i images.txt
You will find all your downloaded files in the folder.
On Windows 10/11 this is fairly trivial using
for /F "eol=;" %f in (filelist.txt) do curl -O %f
Note the inclusion of eol=; allows us to mask individual exclusions by adding ; at the start of those lines in filelist.txt that we do not want this time. If using above in a batch file GetFileList.cmd then double those %%'s
Windows 7 has a FTP command, but that can often throw up a firewall dialog requiring a User Authorization response.
Currently running Windows 7 and wanting to download a list of URLs without downloading any wget.exe or other dependency like curl.exe (which would be simplest as the first command) the shortest compatible way is a power-shell command (not my favorite for speed, but if needs must.)
The file with URLs is filelist.txt and IWR is the PS near equivalent of wget.
The Security Protocol first command ensures we are using modern TLS1.2 protocol
-OutF ... split-path ... means the filenames will be the same as remote filenames but in CWD (current working directory), for scripting you can cd /d folder if necessary.
PS> [Net.ServicePointManager]::SecurityProtocol = "Tls12" ; GC filelist.txt | % {IWR $_ -OutF $(Split-Path $_ -Leaf)}
To run as a CMD use a slightly different set of quotes around 'Tls12'
PowerShell -C "& {[Net.ServicePointManager]::SecurityProtocol = 'Tls12' ; GC filelist.txt | % {IWR $_ -OutF $(Split-Path $_ -Leaf)}}"
This needs to be made into a function with error handling but it repeatedly downloads images for image classification projects
import requests
urls = pd.read_csv('cat_urls.csv') #save the url list as a dataframe
rows = []
for index, i in urls.iterrows():
rows.append(i[-1])
counter = 0
for i in rows:
file_name = 'cat' + str(counter) + '.jpg'
print(file_name)
response = requests.get(i)
file = open(file_name, "wb")
file.write(response.content)
file.close()
counter += 1
import os
import time
import sys
import urllib
from progressbar import ProgressBar
def get_raw_html(url):
version = (3,0)
curr_version = sys.version_info
if curr_version >= version: #If the Current Version of Python is 3.0 or above
import urllib.request #urllib library for Extracting web pages
try:
headers = {}
headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
request = urllib.request.Request(url, headers = headers)
resp = urllib.request.urlopen(request)
respData = str(resp.read())
return respData
except Exception as e:
print(str(e))
else: #If the Current Version of Python is 2.x
import urllib2
try:
headers = {}
headers['User-Agent'] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.27 Safari/537.17"
request = urllib2.Request(url, headers = headers)
try:
response = urllib2.urlopen(request)
except URLError: # Handling SSL certificate failed
context = ssl._create_unverified_context()
response = urlopen(req,context=context)
#response = urllib2.urlopen(req)
raw_html = response.read()
return raw_html
except:
return"Page Not found"
def next_link(s):
start_line = s.find('rg_di')
if start_line == -1: #If no links are found then give an error!
end_quote = 0
link = "no_links"
return link, end_quote
else:
start_line = s.find('"class="rg_meta"')
start_content = s.find('"ou"',start_line+1)
end_content = s.find(',"ow"',start_content+1)
content_raw = str(s[start_content+6:end_content-1])
return content_raw, end_content
def all_links(page):
links = []
while True:
link, end_content = next_link(page)
if link == "no_links":
break
else:
links.append(link) #Append all the links in the list named 'Links'
#time.sleep(0.1) #Timer could be used to slow down the request for image downloads
page = page[end_content:]
return links
def download_images(links, search_keyword):
choice = input("Do you want to save the links? [y]/[n]: ")
if choice=='y' or choice=='Y':
#write all the links into a test file.
f = open('links.txt', 'a') #Open the text file called links.txt
for link in links:
f.write(str(link))
f.write("\n")
f.close() #Close the file
num = input("Enter number of images to download (max 100): ")
counter = 1
errors=0
search_keyword = search_keyword.replace("%20","_")
directory = search_keyword+'/'
if not os.path.isdir(directory):
os.makedirs(directory)
pbar = ProgressBar()
for link in pbar(links):
if counter<=int(num):
file_extension = link.split(".")[-1]
filename = directory + str(counter) + "."+ file_extension
#print ("Downloading image: " + str(counter)+'/'+str(num))
try:
urllib.request.urlretrieve(link, filename)
except IOError:
errors+=1
#print ("\nIOError on Image" + str(counter))
except urllib.error.HTTPError as e:
errors+=1
#print ("\nHTTPError on Image"+ str(counter))
except urllib.error.URLError as e:
errors+=1
#print ("\nURLError on Image" + str(counter))
counter+=1
return errors
def search():
version = (3,0)
curr_version = sys.version_info
if curr_version >= version: #If the Current Version of Python is 3.0 or above
import urllib.request #urllib library for Extracting web pages
else:
import urllib2 #If current version of python is 2.x
search_keyword = input("Enter the search query: ")
#Download Image Links
links = []
search_keyword = search_keyword.replace(" ","%20")
url = 'https://www.google.com/search?q=' + search_keyword+ '&espv=2&biw=1366&bih=667&site=webhp&source=lnms&tbm=isch&sa=X&ei=XosDVaCXD8TasATItgE&ved=0CAcQ_AUoAg'
raw_html = (get_raw_html(url))
links = links + (all_links(raw_html))
print ("Total Image Links = "+str(len(links)))
print ("\n")
errors = download_images(links, search_keyword)
print ("Download Complete.\n"+ str(errors) +" errors while downloading.")
search()
In this python project I make a search in unsplash.com, which brings me a list of URL, then I save a number of them (pre-defined by the user) to a pre-defined folder. Check it out.
On Windows, install wget - https://sourceforge.net/projects/gnuwin32/files/wget/1.11.4-1/
and add C:\Program Files (x86)\GnuWin32\bin to your environment path.
create a folder with a txt file of all the images you want to download.
in the location bar at the top of the file explorer type cmd
When the command prompt opens enter the following.
wget -i images.txt --no-check-certificate
I need to import multiple images (10.000) in Matlab (2013b) from a subdirectory of the predefined directory of Matlab.
I don't know the exact names of the images.
I tried this:
file = dir('C:\Users\user\Documents\MATLAB\train');
NF = length(file);
for k = 1 : NF
img = imread(fullfile('C:\Users\user\Documents\MATLAB\train', file(k).name));
end
But it throws this error though I ran it with the Admin privileges:
Error using imread (line 347)
Can't open file "C:\Users\user\Documents\MATLAB\train\." for reading;
you may not have read permission.
The "dir" command returns the virtual directory elements "." (self directory) and ".." parent, as your error message shows.
A simple fix is to use a more specific dir call, based on your image types, perhaps:
file = dir('C:\Users\user\Documents\MATLAB\train\*.jpg');
Check the output of dir. The first two "files" are . and .., which is similar to the behaviour of the windows dir command.
file = dir('C:\Users\user\Documents\MATLAB\train');
NF = length(file);
for k = 3 : NF
img = imread(fullfile('C:\Users\user\Documents\MATLAB\train', file(k).name));
end
In R2013b you would have to do
file = dir('C:\Users\user\Documents\MATLAB\train\*.jpg');
If you have R2014b with the Computer Vision System Toolbox then you can use imageSet:
images = imageSet('C:\Users\user\Documents\MATLAB\train\');
This will create an object containing paths to all image files in the train directory, regardless of format. Then you can read the i-th image like this:
im = read(images, i);
I have downloaded a set of html files and saved the file paths which I saved them to in a .txt file. It has each path on a new line. I wanted to look at the first file in the list and then itterate through the whole list, opening the files and extracting data before going on to the next file.
My code works fine with a single path put in directly (for the first file) as:
path = r'C:\path\to\file.html'
and works if I itterate through the text file using:
file_list_fp = r'C:\path\to\file_with_pathlist.txt'
with open(file_list_fp, 'r') as file_list:
for filepath in file_list:
pathend = filepath.find('\n')
path = file[:pathend]
q = open(path, 'r').read()
but it fails when I try getting a single path using either:
with open(file_list_fp, 'r') as file_list:
path_n = file_list.readline()
end = path_n.find('\n')
path_bad1 = path_n[:end]
or:
with open(file_list_fp, 'r') as file_list:
path_bad2 = file_list.readline().split('\n')[0]
With these two my code exits just after that point. I can't figure out why. Any pointers very welcome. (I'm using Python 3.3.1 on windows.)
I'm just getting to grips with pandas (which is awesome) and what I need to do is read in compressed genomics type files from ftp sites into a pandas dataframe.
This is what I tried and got a ton of errors:
from pandas.io.parsers import *
chr1 = 'ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/chr_rpts/chr_1.txt.gz'
CHR1 = read_csv(chr1, sep='\t', compression = 'gzip', skiprows = 10)
print type(CHR1)
print CHR1.head(10)
Ideally I'd like to do something like this:
from pandas.io.data import *
AAPL = DataReader('AAPL', 'yahoo', start = '01/01/2006')
The interesting part of this question is how to stream a (gz) file from ftp, which is discussed here, where it's claimed that the following will work in Python 3.2 (but won't in 2.x, nor will it be backported), and on my system this is the case:
import urllib.request as ur
from gzip import GzipFile
req = ur.Request(chr1) # gz file on ftp (ensure startswith 'ftp://')
z_f = ur.urlopen(req)
# this line *may* work (but I haven't been able to confirm it)
# df = pd.read_csv(z_f, sep='\t', compression='gzip', skiprows=10)
# this works (*)
f = GzipFile(fileobj=z_f, mode="r")
df = pd.read_csv(f, sep='\t', skiprows=10)
(*) Here f is "file-like", in the sense that we can perform a readline (read it line-by-line), rather than having to download/open the entire file.
.
Note: I couldn't get the ftplib library to readline, it wasn't clear whether it ought to.