Scraping data from infinite scrolling page using scrapy via ajax get request - ajax
I'm new to web scraping and I want to scrape the information of all the events from a website Events in Oslo
I've written a sample code to scrape data which goes as:
'''A Python script to scrape data from 10times.com'''
import scrapy
import requests
class EventFinder(scrapy.Spider):
'''Creating a custom spider class'''
name = 'EventSpider' #name of the spider
start_urls = ['https://10times.com/oslo-no?datefrom=2020-08-01&dateto=2021-07-31']
custom_settings = {
'FEED_URI' : 'tmp/event_details.csv'
# All the scraped data will be stored in event_details.csv under temp folder
}
def fetch(url)
def parse(self, response):
'''Function to get each event url'''
event_url = response.css(".mb-0 .text-decoration-none::attr(href)")
for link in event_url:
yield response.follow(link.get(),callback = self.parse_links)
def parse_links(self, response):
'''function to scrape data and yield the data in a csv file'''
event_name = response.css("h1::text").extract()
event_date = response.css(".mb-0 span::text").extract()
event_timings = response.css("#hvrout1 td:nth-child(1)::text").extract()
event_location = response.css("#map_dirr span , #map_dirr h3").css("::text").extract()
event_type = response.css("#hvrout2::text").extract()
event_tags = response.css("#hvrout2 a::text").extract()
for item in zip(event_name,event_date,event_timings,event_location,event_type,event_tags):
scraped_info = {
'Event Name' : item[0],
'Date' : item[1],
'Timings' : item[2],
'Location' : item[3],
'Event Type' : item[4],
'Event Tags' : item[5],
}
yield scraped_info
The code I've written is able to scrape in data for all the events that are listed on first page but as we scroll down the page the page loads more data dynamically via Ajax GET requests and it is not able to scrape that data. I've watched some of the videos and read some articles to but I was not able to figure out how can I scroll data that is being generated dynamically on scrolling. Any help on this will be appreciated.
'''A Python script to scrape data from 10times.com'''
import scrapy
import requests
class EventFinder(scrapy.Spider):
name = 'EventSpider' # name of the spider
#start_urls = ['https://10times.com/oslo-no?datefrom=2020-08-01&dateto=2021-07-31']
url = 'https://10times.com/ajax?for=scroll&path=/oslo-no&datefrom=2020-08-01&dateto=2021-07-31&ajax=1&page='
page = 1
start_urls = [url + str(page)]
custom_settings = {
'FEED_URI': 'tmp/event_details.csv'
# All the scraped data will be stored in event_details.csv under temp folder
}
def parse(self, response):
'''Function to get each event url'''
event_url = response.css(".mb-0 .text-decoration-none::attr(href)")
for link in event_url:
yield response.follow(link.get(), callback=self.parse_links)
# ONLY TWO PAGES
next_page = self.url + str(self.page+1)
yield scrapy.Request(next_page, callback=self.parse)
def parse_links(self, response):
'''function to scrape data and yield the data in a csv file'''
event_name = response.css("h1::text").extract()
event_date = response.css(".mb-0 span::text").extract()
event_timings = response.css("#hvrout1 td:nth-child(1)::text").extract()
event_location = response.css("#map_dirr span , #map_dirr h3").css("::text").extract()
event_type = response.css("#hvrout2::text").extract()
event_tags = response.css("#hvrout2 a::text").extract()
for item in zip(event_name, event_date, event_timings, event_location, event_type, event_tags):
scraped_info = {
'Event Name': item[0],
'Date': item[1],
'Timings': item[2],
'Location': item[3],
'Event Type': item[4],
'Event Tags': item[5],
}
yield scraped_info
Output:
{'Event Name': 'Nasjonale Konferanse Om Hjerneslag', 'Date': '18 - 19 Feb 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Oslo Kongressenter Folkets Hus AS', 'Event Type': ' Trade Show', 'Event Tags': 'Medical & Pharma'}
{'Event Name': 'Education Fair in Oslo', 'Date': '17 - 18 Feb 2021', 'Timings': ' 10:00 AM - 07:00 PM (General)\n ', 'Location': '\n Oslo Spektrum', 'Event Type': ' Trade Show', 'Event Tags': 'Education & Training'}
{'Event Name': 'EAAE Deans Summit', 'Date': '22 - 23 Apr 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Education & Training'}
{'Event Name': 'NAFEMS Physics Based Digital Twins', 'Date': '23 - 24 Mar 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Industrial Engineering'}
{'Event Name': 'Oslo Life Science Conference', 'Date': '15 - 18 Feb 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n University of Oslo', 'Event Type': ' Conference', 'Event Tags': 'Science & Research'}
{'Event Name': 'European Academy of Paediatric Dentistry Interim seminar', 'Date': '23 - 24 Apr 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Oslo Kongressenter Folkets Hus AS', 'Event Type': ' Conference', 'Event Tags': 'Medical & Pharma'}
{'Event Name': 'GLOBVAC Conference', 'Date': '20 - 21 Apr 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Clarion Hotel The Hub', 'Event Type': ' Conference', 'Event Tags': 'Wellness, Health & Fitness'}
{'Event Name': 'European Conference on Community Psychology', 'Date': '03 - 04 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Red Cross Conference Center', 'Event Type': ' Conference', 'Event Tags': 'Wellness, Health & Fitness'}
{'Event Name': 'Baltic Nordic Acoustics Meeting', 'Date': '03 - 05 May 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Clarion Hotel Oslo', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': 'The European Port House Conference', 'Date': '27 - 28 May 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
{'Event Name': 'Petroleum Systems Conference', 'Date': '02 - 03 Feb 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Human Factors AS', 'Event Type': ' Conference', 'Event Tags': 'Power & Energy'}
{'Event Name': 'Oslo Yoga Festival', 'Date': '29 - 31 Jan 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Sagene samfunnshus', 'Event Type': ' Trade Show', 'Event Tags': 'Wellness, Health & Fitness'}
{'Event Name': 'NUGA Conference', 'Date': '28 - 30 Jan 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Clarion Hotel The Hub', 'Event Type': ' Conference', 'Event Tags': 'Medical & Pharma'}
{'Event Name': 'Anti-Corruption Nordics', 'Date': '26 - 28 Jan 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Education & Training'}
{'Event Name': 'Software', 'Date': '10 - 11 Feb 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': 'International Joint Conference on Metallurgical and Materials Engineering', 'Date': '18 - 20 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Industrial Engineering'}
{'Event Name': 'International Conference on Frontiers of Chemical Materials and Process', 'Date': '18 - 20 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Science & Research'}
{'Event Name': 'International Conference on Material Engineering and Advanced Manufacturing Technology', 'Date': '18 - 20 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Industrial Engineering'}
{'Event Name': '600Minutes Executive IT', 'Date': ' 02 Dec 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Science & Research'}
{'Event Name': "IDC's Multicloud Conference", 'Date': ' 18 Nov 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Gamle Logen - Selskapslokaler Oslo', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': 'European Intelligence and Security Informatics Conference', 'Date': '10 - 11 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n University of Oslo', 'Event Type': ' Conference', 'Event Tags': 'Security & Defense'}
{'Event Name': 'Digitalization of Automation Systems', 'Date': '25 - 26 Nov 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Meet Ullevaal', 'Event Type': ' Conference', 'Event Tags': 'Industrial Engineering'}
{'Event Name': 'Annual Privacy Forum', 'Date': '17 - 18 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': 'International Association of Lighting Designers Enlighten Europe', 'Date': '18 - 20 Nov 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Radisson Blu Scandinavia Hotel, Oslo', 'Event Type': ' Conference', 'Event Tags': 'Building & Construction'}
{'Event Name': 'Tedx Oslo', 'Date': ' 12 Nov 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
{'Event Name': 'Oslo World Music Festival', 'Date': '27 Oct - 01 Nov 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Oslo Visitor Centre', 'Event Type': ' Trade Show', 'Event Tags': 'Entertainment & Media'}
{'Event Name': 'Nordic Educational Meeting', 'Date': '10 - 11 Nov 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Radisson Blu Scandinavia Hotel, Oslo', 'Event Type': ' Conference', 'Event Tags': 'Education & Training'}
{'Event Name': 'Nordic Place Branding Conference', 'Date': ' 26 Oct 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n RĂ„dhuset', 'Event Type': ' Conference', 'Event Tags': 'Banking & Finance'}
{'Event Name': 'Specsavers Clinical Conference', 'Date': ' 13 Oct 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Radisson Blu Scandinavia Hotel, Oslo', 'Event Type': ' Conference', 'Event Tags': 'Medical & Pharma'}
{'Event Name': 'IDC Future of Work conference', 'Date': ' 28 Oct 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Gamle Logen - Selskapslokaler Oslo', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
{'Event Name': 'CMO Executive Forum NO', 'Date': ' 27 Oct 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Ekebergrestauranten', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
{'Event Name': "EARMA's Annual Conference", 'Date': '29 Sep - 01 Oct 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Oslo Kongressenter Folkets Hus AS', 'Event Type': ' Conference', 'Event Tags': 'Banking & Finance'}
{'Event Name': 'EOCCS Learning Community Symposium', 'Date': '24 - 25 Sep 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'BI Norwegian Business School', 'Event Type': ' Conference', 'Event Tags': 'Education & Training'}
{'Event Name': 'CHFR Symposium', 'Date': '23 - 25 Sep 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Scandic Holmenkollen Park', 'Event Type': ' Conference', 'Event Tags': 'Medical & Pharma'}
{'Event Name': 'Nordic and Baltic Stata Conference', 'Date': ' 24 Sep 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Cancer Registry Norway', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': '600Minutes CFO', 'Date': ' 13 Oct 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Clarion Hotel Oslo', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
{'Event Name': 'Healthy Buildings Europe', 'Date': '21 - 23 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Inland Norway University of Applied Sciences', 'Event Type': ' Conference', 'Event Tags': 'Building & Construction'}
{'Event Name': 'Access MBA Tour Oslo', 'Date': ' 24 Sep 2020', 'Timings': ' 04:30 PM - 09:30 PM', 'Location': 'Venue to be announced', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
{'Event Name': 'Oslo Urban Arena', 'Date': '10 - 11 Sep 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Samfunnssalen Event & Konferanse', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': 'World Congress on Cancer', 'Date': '14 - 16 Sep 2020', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n Soria Moria hotell og konferansesenter', 'Event Type': ' Conference', 'Event Tags': 'Medical & Pharma'}
{'Event Name': 'International Conference on Defects in Semiconductors', 'Date': '26 - 30 Jul 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n University of Oslo', 'Event Type': ' Conference', 'Event Tags': 'Electric & Electronics'}
{'Event Name': 'International Conference on Ict Systems Security and Privacy Protection', 'Date': '22 - 24 Jun 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': '\n University of Oslo', 'Event Type': ' Conference', 'Event Tags': 'IT & Technology'}
{'Event Name': 'International Conference on Intelligent Information Systems', 'Date': '17 - 18 Jul 2021', 'Timings': ' 09:00 AM-06:00 PM (expected)', 'Location': 'Scandic KNA Hotel', 'Event Type': ' Conference', 'Event Tags': 'Business Services'}
Related
Text processing: find a match in the middle of the line and then print that one and line before
I would like to search all of the lines for 2019 year and beyond, but only after "To:" string. Anything between "From:" and "To:" is not relevant. I have tried using grep with -A -B options, but grep on AIX doesn't have that options. Also I've tried something like but I can't figure out how to print line before the match, and how to search for a pattern in the middle of the line. awk '$13 >= 2019 {print $0}' file.txt In the end I would like to search for "2019" on every line, behind "To:" string. For example output would look something like this: certificate4 - From: Friday, October 16, 2009 1:22:18 PM CEST To: Wednesday, October 16, 2019 1:32:16 PM CEST
Guessing at what the OP has in mind: if a line includes the string 'To:' and the line also has a number in field #15 that is >= 2019 then print the previous line and the current line And some assumptions: first line in file could match consecutive lines could match the 'To:/>=2019' search: for all lines of interest the 'To' comes before field #15 Sample data based solely on the one line provided by OP: $ cat -n print15.dat 1 certificate1 - From: Friday, October 16, 2009 1:22:18 PM CEST To: Wednesday, October 16, 2019 1:32:16 PM CEST 2 this is line two 3 this is line three 4 certificate4 - From: Friday, October 16, 2009 1:22:18 PM CEST To: Wednesday, October 16, 2019 1:32:16 PM CEST 5 this is line five 6 this is line six 7 certificate7 - From: Friday, October 16, 2020 1:22:18 PM CEST To: Wednesday, October 16, 2017 1:32:16 PM CEST 8 this is line eight 9 this is line nine 10 certificate10 - From: Friday, October 16, 2009 1:22:18 PM CEST To: Wednesday, October 16, 2020 1:32:16 PM CEST 11 this is line eleven 12 certificate12 - From: Friday, October 16, 2009 1:22:18 PM CEST To: Wednesday, October 16, 2023 1:32:16 PM CEST 13 certificate13 - From: Friday, October 16, 2009 1:22:18 PM CEST To: Wednesday, October 16, 2024 1:32:16 PM CEST 14 this is line fourteen Applying the described logic we see that lines 1, 4, 10, 12 and 13 match 'To:./>=2019'. One awk solution: $ awk '/To:/ && $15 >= 2019 { printf "\n#############\n" if (length(prevline) > 0) { print prevline } print $0 printf "#############\n" } { prevline=$0 } ' print15.dat Explanation: /To:/ && $15 >= 2019 : matches any line with the patterns 'To:' and field #15 >= 2019 (granted, this doesn't enforce that 'To:' comes before field #15) print/######## : simple header/trailer to visibly distinguish between sets of matching rows if/length/print : if prevline is non empty then print it print $0 : print current line (that matches 'To:' and $15>=2019) prevline=$0 : set our 'prevline' variable to the current line (to be used as 'previous line' for the next line we process) And the output: ############# certificate1 - From: Friday, October 16, 2009 1:22:18 PM CEST To: Wednesday, October 16, 2019 1:32:16 PM CEST ############# ############# this is line three certificate4 - From: Friday, October 16, 2009 1:22:18 PM CEST To: Wednesday, October 16, 2019 1:32:16 PM CEST ############# ############# this is line nine certificate10 - From: Friday, October 16, 2009 1:22:18 PM CEST To: Wednesday, October 16, 2020 1:32:16 PM CEST ############# ############# this is line eleven certificate12 - From: Friday, October 16, 2009 1:22:18 PM CEST To: Wednesday, October 16, 2023 1:32:16 PM CEST ############# ############# certificate12 - From: Friday, October 16, 2009 1:22:18 PM CEST To: Wednesday, October 16, 2023 1:32:16 PM CEST certificate13 - From: Friday, October 16, 2009 1:22:18 PM CEST To: Wednesday, October 16, 2024 1:32:16 PM CEST #############
How to fetch 5th and 6th day of every month in Unix?
Is it possible to fetch 5th and/or 6th day of every month in Unix? I've tried this but it does not give me the desired output: echo $((($(date +%-d)-1)/5))
Use this: for month in {1..12}; do date -d "$month/5/2017" +"%c - is a %A"; done Output: Thu 05 Jan 2017 12:00:00 AM CET - is a Thursday Sun 05 Feb 2017 12:00:00 AM CET - is a Sunday Sun 05 Mar 2017 12:00:00 AM CET - is a Sunday Wed 05 Apr 2017 12:00:00 AM CEST - is a Wednesday Fri 05 May 2017 12:00:00 AM CEST - is a Friday Mon 05 Jun 2017 12:00:00 AM CEST - is a Monday Wed 05 Jul 2017 12:00:00 AM CEST - is a Wednesday Sat 05 Aug 2017 12:00:00 AM CEST - is a Saturday Tue 05 Sep 2017 12:00:00 AM CEST - is a Tuesday Thu 05 Oct 2017 12:00:00 AM CEST - is a Thursday Sun 05 Nov 2017 12:00:00 AM CET - is a Sunday Tue 05 Dec 2017 12:00:00 AM CET - is a Tuesday For the 6th of the month, it's done in a similar way.
I am not sure if this is what you are looking for: i am assuming you want to get the 5th(or 6th) day of a month sitting on any day. If so, you can make use of the current date and get the 5th day of current month like below: dd=`date '+%d'` if(( $dd > 5 )); then (( diff = dd - 5 )) myDate=`date -d "-$diff days"` else (( diff = 5 - dd )) myDate=`date -d "+$diff days"` fi echo $myDate For 6th day you can do similarly. Above code should work on Linux distro.
Listing missing months in an array of dates
I have a list of transactions in an array. => [Wed, 23 Oct 2013, Mon, 18 Nov 2013, Fri, 22 Nov 2013, Mon, 13 Jan 2014, Tue, 28 Jan 2014, Mon, 03 Feb 2014, Mon, 10 Feb 2014, Tue, 18 Feb 2014, Fri, 07 Mar 2014, Mon, 31 Mar 2014, Mon, 07 Apr 2014, Tue, 10 Jun 2014, Mon, 30 Jun 2014, Mon, 22 Sep 2014, Mon, 06 Oct 2014, Fri, 14 Nov 2014, Tue, 18 Nov 2014, Fri, 26 Dec 2014, Thu, 15 Jan 2015, Mon, 23 Mar 2015, Mon, 20 Apr 2015] I need to compare the dates of each transaction and list any months that are missing in the list of months and year. Here is what I have now... #find_transactions = (#user.transactions.find_all { |t| (t.name 'name' }) #trans_dates = #find_transactions.map(&:date).sort!.map { |s| Date.strptime(s, '%Y-%m') }.each_cons(2).map{ |d1,d2| d1.next_month == d2 } This method currently gives me a true or false if each month is there but I need to actually have the method print a list of months that are missing. I would like to have it print the month and year together. Here is the response this method gives me... => [true, false, true, false, false, true, false, true, false, false, false, true, true] I want a response like this... => [March 2015, December 2014, September 2014] Thanks in advance!
Edit: For array already being composed of date objects you can do: require 'date' dates = [Wed, 23 Oct 2013, Mon, 18 Nov 2013, Fri, 22 Nov 2013, Mon, 13 Jan 2014, Tue, 28 Jan 2014, Mon, 03 Feb 2014, Mon, 10 Feb 2014, Tue, 18 Feb 2014, Fri, 07 Mar 2014, Mon, 31 Mar 2014, Mon, 07 Apr 2014, Tue, 10 Jun 2014, Mon, 30 Jun 2014, Mon, 22 Sep 2014, Mon, 06 Oct 2014, Fri, 14 Nov 2014, Tue, 18 Nov 2014, Fri, 26 Dec 2014, Thu, 15 Jan 2015, Mon, 23 Mar 2015, Mon, 20 Apr 2015] all_dates = [] dates.first.upto(dates.last) {|x| all_dates << x.strftime('%b %Y') if x.day == 1 || x == dates.first} d = dates.map {|x| x.strftime('%b %Y')}.uniq p (all_dates - d) #=> ["Dec 2013", "May 2014", "Jul 2014", "Aug 2014", "Feb 2015"] Edit: Below methods are for an array of date strings You can try this: require 'date' dates = ["Wed, 23 Oct 2013", "Mon, 18 Nov 2013", "Fri, 22 Nov 2013", "Mon, 13 Jan 2014", "Tue, 28 Jan 2014", "Mon, 03 Feb 2014", "Mon, 10 Feb 2014", "Tue, 18 Feb 2014", "Fri, 07 Mar 2014", "Mon, 31 Mar 2014", "Mon, 07 Apr 2014", "Tue, 10 Jun 2014", "Mon, 30 Jun 2014", "Mon, 22 Sep 2014", "Mon, 06 Oct 2014", "Fri, 14 Nov 2014", "Tue, 18 Nov 2014", "Fri, 26 Dec 2014", "Thu, 15 Jan 2015", "Mon, 23 Mar 2015", "Mon, 20 Apr 2015"] all_dates = [] d = dates.map {|x| Date.parse(x[8..-1])}.uniq counter = d.first until counter == d.last all_dates << counter counter = counter.next_month end p (all_dates - d).map {|x| x.strftime('%b %Y')} #=> ["Dec 2013", "May 2014", "Jul 2014", "Aug 2014", "Feb 2015"] Another (more concise) way would be: require 'date' dates = ["Wed, 23 Oct 2013", "Mon, 18 Nov 2013", "Fri, 22 Nov 2013", "Mon, 13 Jan 2014", "Tue, 28 Jan 2014", "Mon, 03 Feb 2014", "Mon, 10 Feb 2014", "Tue, 18 Feb 2014", "Fri, 07 Mar 2014", "Mon, 31 Mar 2014", "Mon, 07 Apr 2014", "Tue, 10 Jun 2014", "Mon, 30 Jun 2014", "Mon, 22 Sep 2014", "Mon, 06 Oct 2014", "Fri, 14 Nov 2014", "Tue, 18 Nov 2014", "Fri, 26 Dec 2014", "Thu, 15 Jan 2015", "Mon, 23 Mar 2015", "Mon, 20 Apr 2015"] all_dates = [] d = dates.map {|x| Date.parse(x[8..-1])}.uniq d.first.upto(d.last) {|x| all_dates << x if x.day == 1} p (all_dates - d).map {|x| x.strftime('%b %Y')} #=> ["Dec 2013", "May 2014", "Jul 2014", "Aug 2014", "Feb 2015"]
This is one way you could do that. Code require 'date' def missing_months(dates) a = dates.map { |s| d = Date.strptime(s, '%a, %d %b %Y'); d - d.day + 1 } (all_months_in_range(*a.minmax) -a).map { |d| d.strftime('%b %Y') } end def all_months_in_range(f,l) (12*(l.year-f.year)+l.month-f.month+1).times.map do |i| y,m = (f.month+i).divmod(12) y += f.year (m=12; y-=1) if m ==0 Date.new(y,m) end end Example dates = ['Wed, 23 Oct 2013', 'Mon, 18 Nov 2013', 'Fri, 22 Nov 2013', 'Fri, 14 Nov 2014', 'Tue, 18 Nov 2014', 'Fri, 26 Dec 2014', 'Mon, 13 Jan 2014', 'Tue, 28 Jan 2014', 'Mon, 03 Feb 2014', 'Mon, 31 Mar 2014', 'Mon, 07 Apr 2014', 'Tue, 10 Jun 2014', 'Mon, 30 Jun 2014', 'Mon, 22 Sep 2014', 'Mon, 06 Oct 2014', 'Mon, 10 Feb 2014', 'Tue, 18 Feb 2014', 'Fri, 07 Mar 2014', 'Thu, 15 Jan 2015', 'Mon, 23 Mar 2015', 'Mon, 20 Apr 2015'] missing_months(dates) #=> ["Dec 2013", "May 2014", "Jul 2014", "Aug 2014", "Feb 2015"] Notice that the dates needn't be sorted. Explanation For the example above: a = dates.map { |s| d = Date.strptime(s, '%a, %d %b %Y'); d - d.day + 1 } #=> [#<Date: 2013-10-01 ((2456567j,0s,0n),+0s,2299161j)>, # #<Date: 2013-11-01 ((2456598j,0s,0n),+0s,2299161j)>, # ... # #<Date: 2015-04-01 ((2457114j,0s,0n),+0s,2299161j)>] Notice that each of these dates is on the first of the month. Next, obtain the first and last of these dates: f,l = a.minmax f #=> [#<Date: 2013-10-01 ((2456567j,0s,0n),+0s,2299161j)>, l #=> #<Date: 2015-04-01 ((2457114j,0s,0n),+0s,2299161j)>] Now pass f and l to all_months_in_range to create an array that contains a date object for the first day of each month between f and l. b = all_months_in_range(f,l) #=> [#<Date: 2013-10-01 ((2456567j,0s,0n),+0s,2299161j)>, # #<Date: 2013-11-01 ((2456598j,0s,0n),+0s,2299161j)>, # ... # #<Date: 2015-04-01 ((2457114j,0s,0n),+0s,2299161j)>] b.size #=> 19 I will skip an explanation of this helper method, as it is quite straightforward. Compute that difference between arrays b and a to obtain the missing beginning-of-month dates: c = b-a #=> [#<Date: 2013-12-01 ((2456628j,0s,0n),+0s,2299161j)>, # #<Date: 2014-05-01 ((2456779j,0s,0n),+0s,2299161j)>, # #<Date: 2014-07-01 ((2456840j,0s,0n),+0s,2299161j)>, # #<Date: 2014-08-01 ((2456871j,0s,0n),+0s,2299161j)>, # #<Date: 2015-02-01 ((2457055j,0s,0n),+0s,2299161j)>] Lastly, convert these dates to the desired format: c.map { |d| d.strftime('%b %Y') } #=> ["Dec 2013", "May 2014", "Jul 2014", "Aug 2014", "Feb 2015"] Addendum: after reading #Sid's answer, I see I could have saved myself some trouble in my helper method by using Date#next_month: def all_months_in_range(f,l) (12*(l.year-f.year)+l.month-f.month+1).times.map { |i| f.next_month(i) } end
This isn't very elegant, but it worked. I started with your original code, #SupremeA, and built off of that. require 'date' dates = ['Wed, 23 Oct 2013', 'Mon, 18 Nov 2013', 'Fri, 22 Nov 2013', 'Mon, 13 Jan 2014', 'Tue, 28 Jan 2014', 'Mon, 03 Feb 2014', 'Mon, 10 Feb 2014', 'Tue, 18 Feb 2014', 'Fri, 07 Mar 2014', 'Mon, 31 Mar 2014', 'Mon, 07 Apr 2014', 'Tue, 10 Jun 2014', 'Mon, 30 Jun 2014', 'Mon, 22 Sep 2014', 'Mon, 06 Oct 2014', 'Fri, 14 Nov 2014', 'Tue, 18 Nov 2014', 'Fri, 26 Dec 2014', 'Thu, 15 Jan 2015', 'Mon, 23 Mar 2015', 'Mon, 20 Apr 2015'] new_dates = [] dates.each { |d| new_dates.push(Date.parse(d).strftime('%B %Y')) } sorted_dates = new_dates.map { |s| Date.strptime(s, '%B %Y') }.sort.uniq missing_months = [] sorted_dates.each_cons(2) do |d1,d2| d = d1 while d.next_month != d2 missing_months.push(d.next_month.strftime('%B %Y')) d = d >> 1 end end p missing_months => ["December 2013", "May 2014", "July 2014", "August 2014", "February 2015"]
Count occasions when my birthday falls on a weekend
I want to calculate how many times my birthday is on a weekend. declare v_count number; v_birthday date := '22-07-1993'; v_sysdate date := sysdate; begin --1) first i have to know all the dates when it was my birthday till sysdate. --2) then i have to convert it to a char(?) and look if its in the weekend (saturday or sunday) --3) if yes, count have to be increased by one, if not, go to the next birthday till sysdate. --4) show count in dbms.output_put_line(v_count); -- ("for i_counter in 1..10 loop" and "while i_counter <=10 loop" end; I think I have to use a LOOP, but I only know LOOPs with numbers, not dates.
This code will count the number of days that JULY 22 will occur on a weekend (SATURDAY or SUNDAY) between 1993 and 2014 inclusive: set serveroutput on size 100000 declare V_DATE DATE; I NUMBER; WEEKEND_COUNT NUMBER; begin WEEKEND_COUNT := 0; FOR I IN 1993 .. 2014 LOOP V_DATE := TO_DATE('07/22/' || TO_CHAR(I), 'MM/DD/YYYY'); DBMS_OUTPUT.PUT_LINE( 'DATE: ' || V_DATE || ' ' || 'DAY_OF_WEEK: ' || TO_CHAR(V_DATE, 'DY')); IF TO_CHAR(V_DATE, 'DY') = 'SAT' OR TO_CHAR(V_DATE, 'DY') = 'SUN' THEN WEEKEND_COUNT := WEEKEND_COUNT +1; END IF; END LOOP; DBMS_OUTPUT.PUT_LINE ('NUMBER OF BIRTHDAYS ON WEEKENDS: ' || WEEKEND_COUNT); end; The output is: DATE: 22-JUL-93 DAY_OF_WEEK: THU DATE: 22-JUL-94 DAY_OF_WEEK: FRI DATE: 22-JUL-95 DAY_OF_WEEK: SAT DATE: 22-JUL-96 DAY_OF_WEEK: MON DATE: 22-JUL-97 DAY_OF_WEEK: TUE DATE: 22-JUL-98 DAY_OF_WEEK: WED DATE: 22-JUL-99 DAY_OF_WEEK: THU DATE: 22-JUL-00 DAY_OF_WEEK: SAT DATE: 22-JUL-01 DAY_OF_WEEK: SUN DATE: 22-JUL-02 DAY_OF_WEEK: MON DATE: 22-JUL-03 DAY_OF_WEEK: TUE DATE: 22-JUL-04 DAY_OF_WEEK: THU DATE: 22-JUL-05 DAY_OF_WEEK: FRI DATE: 22-JUL-06 DAY_OF_WEEK: SAT DATE: 22-JUL-07 DAY_OF_WEEK: SUN DATE: 22-JUL-08 DAY_OF_WEEK: TUE DATE: 22-JUL-09 DAY_OF_WEEK: WED DATE: 22-JUL-10 DAY_OF_WEEK: THU DATE: 22-JUL-11 DAY_OF_WEEK: FRI DATE: 22-JUL-12 DAY_OF_WEEK: SUN DATE: 22-JUL-13 DAY_OF_WEEK: MON DATE: 22-JUL-14 DAY_OF_WEEK: TUE NUMBER OF BIRTHDAYS ON WEEKENDS: 6
How do you get DateTime.parse to return a time in your time zone?
I need this require 'date' DateTime.parse "Mon, Dec 27 6:30pm" to return a DateTime for 6:30pm in the EDT timezone, but it returns one in UTC. How can I get a EST DateTime or convert the UTC one into an EDT DateTime with a 6:30pm value?
OK I'm going to offer an answer to my own question require 'time' ENV["TZ"] = "US/Eastern" Time.parse("Mon, Dec 27 6:30pm").to_datetime => #<DateTime: 2011-12-27T18:30:00-05:00 (117884327/48,-5/24,2299161)>
In Rails, this worked nicely for me DateTime.parse "Mon, Dec 27 6:30pm #{Time.zone}" It won't work in vanilla Ruby though.
Final answer ;-) require 'date' estHoursOffset = -5 estOffset = Rational(estHoursOffset, 24) date = (DateTime.parse("Mon, Dec 27 6:30pm") - (estHoursOffset/24.0)).new_offset(estOffset) (or -4 for EDT)
DateTime#change() You can try using change() after parsing it to alter the timezone offset: DateTime.parse( "Mon, Dec 27 6:30pm" ).change( offset: '-0400' ) # => Wed, 27 Dec 2017 18:30:00 -0400 You can also just use the hours: DateTime.parse( "Mon, Dec 27 6:30pm" ).change( offset: '-4' ) # => Wed, 27 Dec 2017 18:30:00 -0400 But, be careful, you cannot use an integer: DateTime.parse( "Mon, Dec 27 6:30pm" ).change( offset: -4 ) # => Wed, 27 Dec 2017 18:30:00 +0000 If you need to determine the correct offset to use based on a time zone you can do something like this: offset = ( Time.zone_offset('EDT') / 1.hour ).to_s # => "-4" DateTime.parse( "Mon, Dec 27 6:30pm" ).change( offset: offset ) # => Wed, 27 Dec 2017 18:30:00 -0400 You can also use change() to manually set other parts of the DateTime as well, like setting the hour to noon: DateTime.parse( "Mon, Dec 27 6:30pm" ).change( offset: '-4', hour: 12 ) # => Wed, 27 Dec 2017 12:00:00 -0400 Be careful with that one because you can see that it's cleared the minutes as well. Here's the docs for the change() method: http://api.rubyonrails.org/v5.1/classes/DateTime.html#method-i-change
If you're using Rails' ActiveSupport: "Mon, Dec 27 6:30pm".in_time_zone(-4.hours).to_datetime # => Mon, 27 Dec 2021 18:30:00 -0400 Time.find_zone(-4.hours).parse("Mon, Dec 27 6:30pm").to_datetime # => Mon, 27 Dec 2021 18:30:00 -0400 If you want to use the local daylight saving time (DST) rules, you could use: "Mon, Dec 27 6:30pm".in_time_zone("Eastern Time (US & Canada)") # => Mon, 27 Dec 2021 18:30:00 EST -05:00 Time.find_zone("Eastern Time (US & Canada)").parse("Mon, Dec 27 6:30pm") # => Mon, 27 Dec 2021 18:30:00 EST -05:00 Time.find_zone("Eastern Time (US & Canada)").parse("Mon, Dec 27 6:30pm").to_datetime # => Mon, 27 Dec 2021 18:30:00 -0500 Time.find_zone("Eastern Time (US & Canada)").parse("Mon, Jun 27 6:30pm") # => Sun, 27 Jun 2021 18:30:00 EDT -04:00 Time.find_zone("Eastern Time (US & Canada)").parse("Mon, Jun 27 6:30pm").to_datetime # => Sun, 27 Jun 2021 18:30:00 -0400 Time.find_zone("EST5EDT").parse("Mon, Jun 27 6:30pm").to_datetime # => Sun, 27 Jun 2021 18:30:00 -0400 Notice the date in June, above, is automatically set to EDT (-0400) because this date is in DST, contrary to the December date. To force EST regardless if date is within DST or not: Time.find_zone("EST").parse("Mon, Jun 27 6:30pm") # => Sun, 27 Jun 2021 18:30:00 EST -05:00 Time.find_zone("EST").parse("Mon, Jun 27 6:30pm").to_datetime # => Sun, 27 Jun 2021 18:30:00 -0500