Python Selenium Scrape The Whole Table
The purpose of this code is to scrape a data table form a some links then turn it into a pandas data frame. The problem is that this code only scrapes the first 7 rows only which a
Solution 1:
Check out the below script to get the whole table from that webpage. I've used harcoded delay within my script which is not a good practice. However, you can always define Explicit Wait
to make the code more robust:
import time
from selenium import webdriver
url = 'https://www.investing.com/economic-calendar/investing.com-eur-usd-index-1155'
driver = webdriver.Chrome()
driver.get(url)
item = driver.find_element_by_xpath('//*[contains(@id,"showMoreHistory")]/a')
driver.execute_script("arguments[0].click();", item)
time.sleep(2)
for table in driver.find_elements_by_xpath('//*[contains(@id,"eventHistoryTable")]//tr'):
data = [item.text for item in table.find_elements_by_xpath(".//*[self::td or self::th]")]
print(data)
driver.quit()
To get all the data exhausting the show more
button along with defining Explicit Wait
you can try the below script:
from selenium import webdriver
from selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
url = 'https://www.investing.com/economic-calendar/investing.com-eur-usd-index-1155'
driver = webdriver.Chrome()
driver.get(url)
wait = WebDriverWait(driver,10)
whileTrue:
try:
item = wait.until(EC.visibility_of_element_located((By.XPATH,'//*[contains(@id,"showMoreHistory")]/a')))
driver.execute_script("arguments[0].click();", item)
except Exception:break
for table in wait.until(EC.visibility_of_all_elements_located((By.XPATH,'//*[contains(@id,"eventHistoryTable")]//tr'))):
data = [item.textfor item in table.find_elements_by_xpath(".//*[self::td or self::th]")]
print(data)
driver.quit()
Solution 2:
As per your question and the url https://www.investing.com/economic-calendar/investing.com-eur-usd-index-1155
to scrape the whole table you can use the following solution:
Code Block:
# -*- coding: UTF-8 -*-from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException table_rows = [] options = webdriver.ChromeOptions() options.add_argument("start-maximized") options.add_argument('disable-infobars') driver=webdriver.Chrome(chrome_options=options, executable_path=r'C:\WebDrivers\chromedriver.exe') driver.get("https://www.investing.com/economic-calendar/investing.com-eur-usd-index-1155") show_more_button = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "table.genTbl.openTbl.ecHistoryTbl#eventHistoryTable1155 tr>th.left.symbol"))) driver.execute_script("arguments[0].scrollIntoView(true);",show_more_button); myLength = len(WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "table.genTbl.openTbl.ecHistoryTbl#eventHistoryTable1155 tr[event_attr_id='1155']")))) whileTrue: try: WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div#showMoreHistory1155>a"))).click() WebDriverWait(driver, 20).until(lambda driver: len(driver.find_elements_by_css_selector("table.genTbl.openTbl.ecHistoryTbl#eventHistoryTable1155 tr[event_attr_id='1155']")) > myLength) table_rows = driver.find_elements_by_css_selector("table.genTbl.openTbl.ecHistoryTbl#eventHistoryTable1155 tr[event_attr_id='1155']") myLength = len(table_rows) except TimeoutException: breakfor row in table_rows: print(row.text) driver.quit()
Console Output:
Sep24,2018 01:30Sep17,2018 01:3053.1%55.3%Sep10,2018 01:3055.3%49.0%Sep03,2018 01:3049.0%43.3%Aug27,2018 01:3043.3%49.7%Aug20,2018 01:3049.7%52.5%Aug13,2018 01:3052.5%59.9%Aug06,2018 01:3059.9%62.6%Jul30,2018 01:3062.6%52.8%Jul23,2018 01:3052.8%52.7%Jul16,2018 01:3052.7%46.2%Jul10,2018 01:3046.2%55.3%Jul02,2018 01:3055.3%53.1%Jun25,2018 01:3053.1%66.2%Jun18,2018 01:3066.2%65.2%Jun11,2018 01:3065.2%61.2%Jun04,2018 01:3061.2%63.9%May28,2018 01:3063.9%67.0%May21,2018 01:3067.0%63.2%May14,2018 01:3063.2%61.3%May07,2018 01:3061.3%57.6%Apr30,2018 01:3057.6%64.8%Apr23,2018 01:3064.8%65.2%Apr16,2018 01:3065.2%60.4%Apr09,2018 01:3060.4%63.3%Apr02,2018 01:3063.3%62.1%Mar26,2018 01:3062.1%65.7%Mar19,2018 02:3065.7%56.0%Mar12,2018 02:3056.0%62.3%Mar05,2018 02:3062.3%59.1%Feb26,2018 02:3059.1%52.8%Feb19,2018 02:3052.8%55.8%Feb12,2018 02:3055.8%51.7%Feb05,2018 02:3051.7%56.8%Jan29,2018 02:3056.8%52.2%Jan22,2018 02:3052.2%56.1%Jan15,2018 02:3056.1%60.2%Jan08,2018 02:3060.2%54.6%Jan01,2018 02:3054.6%48.4%Dec25,2017 02:3048.4%66.4%Dec18,2017 02:3066.4%58.9%Dec11,2017 02:3058.9%53.8%Dec04,2017 02:3053.8%55.9%Nov28,2017 02:3055.9%53.7%Nov20,2017 02:3053.7%58.6%Nov14,2017 02:3058.6%52.8%Nov06,2017 02:3052.8%57.6%Oct30,2017 01:3057.6%54.7%Oct23,2017 01:3054.7%58.9%Oct16,2017 01:3058.9%57.3%Oct09,2017 01:3057.3%64.0%Oct02,2017 01:3064.0%47.5%Sep25,2017 01:3047.5%52.2%Sep18,2017 01:3052.2%55.5%Sep11,2017 01:3055.5%54.3%Sep04,2017 01:3054.3%54.2%Aug28,2017 01:3054.2%51.4%Aug21,2017 01:3051.4%57.4%Aug14,2017 01:3057.4%51.2%Aug07,2017 01:3051.2%51.3%Jul31,2017 01:3051.3%52.8%Jul24,2017 01:3052.8%53.3%Jul17,2017 01:3053.3%54.1%Jul10,2017 01:3054.1%51.9%Jul03,2017 01:3051.9%40.6%Jun26,2017 01:3040.6%52.6%Jun19,2017 01:3052.6%51.0%Jun12,2017 01:3051.0%52.1%Jun05,2017 01:3052.1%59.1%May29,2017 01:3059.1%46.9%May22,2017 01:3046.9%53.0%May15,2017 01:3053.0%44.9%May08,2017 01:3044.9%37.0%May01,2017 01:3037.0%43.0%Apr24,2017 01:3043.0%52.4%Apr10,2017 01:3052.4%55.1%Apr03,2017 01:3055.1%43.5%Mar27,2017 02:3043.5%36.0%Mar20,2017 02:3036.0%32.3%Mar13,2017 02:3032.3%42.8%Mar06,2017 02:3042.8%39.1%Feb27,2017 02:3039.1%41.7%Feb20,2017 02:3041.7%43.2%Feb13,2017 02:3043.2%36.6%Feb06,2017 02:3036.6%39.7%Jan30,2017 02:3039.7%33.5%Jan23,2017 02:3033.5%36.8%Jan16,2017 03:3036.8%37.0%Jan09,2017 02:3037.0%41.6%Jan02,2017 02:3041.6%35.8%Dec26,2016 02:3035.8%42.3%Dec19,2016 02:3042.3%39.7%Dec12,2016 04:1539.7%33.8%Dec05,2016 02:3033.8%37.1%Nov29,2016 02:3037.1%41.9%Nov21,2016 02:3041.9%39.1%Nov15,2016 02:0039.1%20.5%Nov07,2016 02:3020.5%27.4%Oct31,2016 02:3027.4%33.4%Oct25,2016 02:3033.4%30.8%Oct18,2016 02:3030.8%26.6%Oct10,2016 02:3026.6%28.6%Oct05,2016 02:0028.6%26.2%Sep26,2016 02:3026.2%34.8%Sep19,2016 02:3034.8%21.2%Sep13,2016 02:3021.2%27.0%Sep05,2016 02:3027.0%32.7%Aug29,2016 02:3032.7%23.9%Aug22,2016 02:3023.9%28.8%Aug15,2016 02:3028.8%30.8%Aug08,2016 02:3030.8%20.3%Aug01,2016 02:3020.3%30.2%Jul25,2016 02:3030.2%29.5%Jul18,2016 02:3029.5%26.2%Jul11,2016 02:3026.2%27.5%Jul04,2016 02:3027.5%26.8%Jun27,2016 02:3026.8%35.1%Jun20,2016 02:3035.1%22.8%Jun13,2016 02:3022.8%32.5%Jun06,2016 02:3032.5%35.6%May30,2016 02:3035.6%39.5%May23,2016 02:3039.5%37.8%May16,2016 03:3037.8%39.5%May09,2016 02:3039.5%30.3%May02,2016 02:3030.3%32.9%Apr25,2016 02:3032.9%29.6%Apr18,2016 06:0029.6%30.5%Apr11,2016 02:3030.5%22.7%Apr04,2016 03:3022.7%32.1%Mar28,2016 03:3032.1%23.2%Mar21,2016 03:3023.2%26.7%Mar14,2016 03:3026.7%22.6%Mar07,2016 03:3022.6%33.7%Feb29,2016 03:3033.7%34.8%Feb22,2016 03:3034.8%33.3%Feb15,2016 03:3033.3%33.3%Feb08,2016 03:3033.3%34.3%Feb01,2016 03:3034.3%33.2%Jan25,2016 03:3033.2%27.0%Jan18,2016 03:3027.0%27.2%Jan11,2016 03:3027.2%30.0%Jan05,2016 03:3030.0%24.0%Dec29,2015 03:3024.0%33.3%Dec21,2015 03:3033.3%31.2%Dec14,2015 04:3031.2%27.1%Dec07,2015 03:0027.1%29.8%Dec01,2015 03:0029.8%27.5%Nov23,2015 03:0027.5%33.1%Nov17,2015 04:0033.1%26.8%Nov09,2015 02:3026.8%24.3%Nov02,2015 01:3024.3%36.4%Oct26,2015 01:3036.4%28.6%Oct19,2015 01:3028.6%25.5%Oct11,2015 04:3025.5%29.6%Oct06,2015 01:0029.6%28.5%Sep28,2015 01:3028.5%29.1%Sep21,2015 01:3029.1%21.2%Sep14,2015 01:3021.2%29.8%Sep07,2015 01:3029.8%36.3%Aug31,2015 01:3036.3%35.6%Aug24,2015 01:3035.6%26.4%Aug17,2015 01:3026.4%24.8%Aug10,2015 01:3024.8%29.7%Aug03,2015 01:3029.7%24.8%Jul27,2015 01:3024.8%30.7%Jul20,2015 01:3030.7%27.9%Jul13,2015 01:3027.9%27.4%Jul07,2015 01:3027.4%26.8%Jun29,2015 01:3026.8%33.1%Jun22,2015 01:3033.1%33.6%Jun15,2015 03:3033.6%28.9%Jun08,2015 01:3028.9%23.0%Jun01,2015 01:3023.0%34.0%May25,2015 04:0034.0%28.9%May18,2015 01:3028.9%28.8%May11,2015 01:3028.8%28.3%May04,2015 02:0028.3%23.7%Apr27,2015 01:3023.7%27.2%Apr20,2015 01:3027.2%33.7%Apr13,2015 02:0033.7%23.2%Apr06,2015 02:0023.2%19.8%Mar30,2015 02:3019.8%24.1%Mar23,2015 02:3024.1%27.2%Mar16,2015 03:0027.2%35.6%Mar09,2015 02:3035.6%34.4%Mar02,2015 02:3034.4%30.2%Feb23,2015 02:3030.2%26.6%Feb16,2015 03:3026.6%23.8%Feb09,2015 02:3023.8%26.4%Feb02,2015 02:3026.4%23.9%Jan26,2015 02:3023.9%28.9%Jan19,2015 02:3028.9%35.5%Jan12,2015 02:3035.5%38.1%Jan06,2015 03:3038.1%40.6%Jan01,2015 02:3040.6%45.2%Dec22,2014 02:0045.2%39.8%Dec15,2014 02:0039.8%41.7%Dec07,2014 21:0041.7%33.8%Dec02,2014 03:0033.8%38.6%Nov24,2014 01:3038.6%39.2%Nov17,2014 01:0039.2%33.1%Nov10,2014 01:0033.1%35.4%Nov04,2014 03:0035.4%37.3%Oct27,2014 02:0037.3%33.7%Oct19,2014 22:0033.7%36.2%Oct13,2014 01:0036.2%44.5%Oct06,2014 01:0044.5%41.3%Sep29,2014 01:0041.3%50.3%Sep21,2014 22:3550.3%39.5%Sep15,2014 00:4539.5%39.9%Sep08,2014 01:0039.9%42.8%Sep01,2014 02:3542.8%41.9%Aug25,2014 01:0041.9%38.9%Aug18,2014 01:0038.9%34.0%Aug11,2014 01:0034.0%38.2%Aug04,2014 01:0038.2%38.4%Jul28,2014 01:0038.4%42.3%Jul21,2014 01:0042.3%37.2%Jul14,2014 01:0037.2%39.6%Jul07,2014 01:0039.6%39.8%Jun30,2014 01:0039.8%36.1%Jun23,2014 00:3036.1%37.6%Jun16,2014 00:3037.6%36.5%Jun09,2014 00:3036.5%44.1%Jun01,2014 22:0044.1%49.4%May26,2014 00:3049.4%41.0%May19,2014 00:0041.0%55.0%May12,2014 00:0055.0%41.1%May04,2014 06:0041.1%43.5%Apr27,2014 06:0043.5%40.3%Apr06,2014 06:0040.3%
Post a Comment for "Python Selenium Scrape The Whole Table"