|
|
|
from selenium import webdriver |
|
from selenium.webdriver.common.by import By |
|
from selenium.webdriver.support.ui import WebDriverWait |
|
import time |
|
|
|
|
|
""" |
|
This code uses Selenium to scrape data from a webpage. |
|
It initializes a Chrome webdriver and loads the webpage defined by the url variable. |
|
It then clicks a modal pop-up that appears when the webpage is loaded. |
|
The code then enters a loop to scrape data from each page of the webpage. |
|
It locates the data tables and extracts the table rows. |
|
It loops through each row of the table and extracts the name, region, and description from the row. |
|
It appends these values to the dai_values list. |
|
The code then finds the 'Next' button and checks if it is disabled. |
|
If the button is disabled, the loop is broken. |
|
If the button is not disabled, the code clicks the button, scrolls to it, and waits for 2 seconds before moving on to the next page. |
|
|
|
Finally, the code quits the webdriver. |
|
""" |
|
|
|
|
|
url = 'https://www.hdrn.ca/en/inventory/' |
|
|
|
|
|
driver = webdriver.Chrome() |
|
driver.get(url) |
|
|
|
|
|
wait = WebDriverWait(driver, 2) |
|
|
|
|
|
dai_values = [] |
|
|
|
|
|
driver.find_element(By.ID, 'myModal').click() |
|
|
|
|
|
while True: |
|
|
|
data_tables_scroll = driver.find_elements(By.CLASS_NAME, 'dataTables_scrollBody')[-1] |
|
table = data_tables_scroll.find_elements(By.TAG_NAME, 'tr') |
|
|
|
|
|
for row in table: |
|
|
|
row_values = row.find_elements(By.TAG_NAME, 'td') |
|
|
|
if len(row_values) < 2: |
|
continue |
|
|
|
name, region, description = row_values |
|
dai_values.append({ |
|
'name': name.text, |
|
'region': region.text, |
|
'description': description.text |
|
}) |
|
|
|
|
|
next_button = driver.find_elements(By.ID, 'thelist_next') |
|
if 'disabled' in next_button[0].get_attribute('class'): |
|
|
|
break |
|
else: |
|
|
|
driver.click() |
|
driver.execute_script("arguments[0].scrollIntoView();", next_button[0]) |
|
time.sleep(2) |
|
next_button[0].click() |
|
|
|
|
|
driver.quit() |
|
|