-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Description
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.headless = False
options.set_preference("permissions.default.image", 2)
options.set_preference("dom.disable_open_during_load", True)
driver = webdriver.Firefox(options=options)
driver.set_page_load_timeout(30)
driver.implicitly_wait(3)
wait = WebDriverWait(driver, 15)
BASE_URL = "https://bina.az/alqi-satqi/menziller"
TARGET = 2000
driver.get(BASE_URL)
rows = []
SCROLL_STEP = 600
scroll_y = 0
same_height_count = 0
try:
wait.until(
EC.presence_of_all_elements_located(
(By.CSS_SELECTOR, 'div[data-cy="item-card"]')
)
)
except TimeoutException:
print("İlk kartlar tapılmadı, çıxıram.")
driver.quit()
exit()
while len(rows) < TARGET:
try:
cards = driver.find_elements(By.CSS_SELECTOR, 'div[data-cy="item-card"]')
except Exception:
cards = []
print(f"Scroll pozisiyası: {scroll_y}, kart sayı: {len(cards)}")
for c in cards:
try:
a = c.find_element(By.CSS_SELECTOR, 'a[data-cy="item-card-link"]')
href = a.get_attribute("href") or ""
href = href.split("?")[0]
aria = a.get_attribute("aria-label") or ""
text = c.text.strip()
rows.append(
{
"link": href,
"aria_label": aria,
"card_text": text,
}
)
if len(rows) >= TARGET:
break
except Exception:
continue
if len(rows) >= TARGET:
break
scroll_y += SCROLL_STEP
driver.execute_script(f"window.scrollTo(0, {scroll_y});")
time.sleep(2)
new_height = driver.execute_script("return document.body.scrollHeight")
if scroll_y + 800 >= new_height:
same_height_count += 1
if same_height_count >= 5:
print("Səhifənin sonuna çatdı, yeni kontent gəlmir.")
break
else:
same_height_count = 0
driver.quit()
df = pd.DataFrame(rows[:TARGET])
df.to_csv("bina_scroll.csv", index=False, encoding="utf-8-sig")
print("Hazırdır: bina_scroll.csv")
print("Sətir sayı:", len(df))