diff --git a/naver_review/config.sample.py b/naver_review/config.sample.py index 8703c00..df2ea42 100644 --- a/naver_review/config.sample.py +++ b/naver_review/config.sample.py @@ -1,3 +1,5 @@ # config.py -PLACE_ID = "네이버 플레이스 ID" # 네이버 플레이스 ID -MAX_REVIEWS = 30 # 수집할 최대 리뷰 수 +PLACE_IDS = ["플레이스 ID 1", "플레이스 ID 2"] # 여러 플레이스 ID 가능 +MAX_REVIEWS = 100 # 각 플레이스당 최대 수집 수 +START_DATE = "2025-07-01" # 필터링 시작일 +END_DATE = "2025-07-03" # 필터링 종료일 diff --git a/naver_review/main.py b/naver_review/main.py index 879bb51..d10b0b1 100644 --- a/naver_review/main.py +++ b/naver_review/main.py @@ -3,11 +3,11 @@ from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC -import config # config.py에서 PLACE_ID, MAX_REVIEWS를 불러옴 +from datetime import datetime +import config def setup_driver(): chrome_options = Options() - # 헤드리스 모드 필요하면 아래 주석 해제 chrome_options.add_argument("--headless=new") chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") @@ -15,42 +15,80 @@ def setup_driver(): "--user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 13_5 like Mac OS X) " "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Mobile/15E148 Safari/604.1" ) - print("[INFO] Chrome WebDriver 실행 중...") return webdriver.Chrome(options=chrome_options) -def crawl_naver_place_reviews(place_id, max_reviews=50): +def click_more(driver): + try: + wait = WebDriverWait(driver, 5) + more_btn = wait.until(EC.element_to_be_clickable(( + By.XPATH, '//div[contains(@class, "place_section_content")]/following-sibling::div//a[.//span[text()="더보기"]]'))) + driver.execute_script("arguments[0].click();", more_btn) + print("[INFO] '더보기' 버튼 클릭됨") + except Exception: + print("[INFO] '더보기' 버튼 없음 또는 클릭 실패") + +def parse_date(date_str): + try: + return datetime.strptime(date_str.replace(".", "-"), "%Y-%m-%d") + except Exception: + return None + +def is_within_range(date_str, start, end): + d = parse_date(date_str) + if not d: + return False + return start <= d <= end + +def get_place_name(driver): + try: + title_div = driver.find_element(By.ID, "_title") + name = title_div.find_element(By.XPATH, './div[1]/span').text.strip() + return name + except Exception as e: + print(f"[WARN] 업체명 추출 실패: {e}") + return "업체명 없음" + +def crawl_naver_place_reviews(place_id, max_reviews, start_date, end_date): url = f"https://m.place.naver.com/place/{place_id}/review/visitor?reviewSort=recent" driver = setup_driver() - print(f"[INFO] 리뷰 페이지 접속: {url}") driver.get(url) wait = WebDriverWait(driver, 10) wait.until(EC.presence_of_element_located((By.ID, "_review_list"))) + place_name = get_place_name(driver) + click_more(driver) + ul = driver.find_element(By.ID, "_review_list") items = ul.find_elements(By.XPATH, './/li[contains(@class, "place_apply_pui")]') reviews = [] - for item in items: try: # 작성자 + writer = "익명" try: - writer = item.find_element(By.XPATH, './div[1]/a[2]/div[1]/span/span').text.strip() + writer = item.find_element(By.XPATH, './div[1]/a[2]/div/span/span').text.strip() except: - writer = "익명" + pass - # 날짜 (요청하신 정확한 위치) + # 작성일 + date = "" try: - date = item.find_element(By.XPATH, './div[7]/div[2]/div/span[1]/span[2]').text.strip() + date_elem = item.find_element(By.XPATH, './div[7]/div[2]/div/span[1]/span[2]') + date = date_elem.text.strip().replace(" ", "").replace("년", "-").replace("월", "-").replace("일", "") except: - date = "날짜 없음" + continue + + if not is_within_range(date, start_date, end_date): + continue # 리뷰 본문 + text = "" try: text = item.find_element(By.XPATH, './div[5]/a').text.strip() except: - text = "" + continue if text: reviews.append({ @@ -62,20 +100,19 @@ def crawl_naver_place_reviews(place_id, max_reviews=50): if len(reviews) >= max_reviews: break except Exception as e: - print(f"[WARN] 리뷰 추출 실패: {e}") - - - - + print(f"[WARN] 리뷰 파싱 실패: {e}") driver.quit() - print(f"[DONE] 총 {len(reviews)}개 리뷰 수집 완료") - return reviews + print(f"[DONE] {place_name} ({place_id}) → 총 {len(reviews)}개 리뷰 수집") + return place_name, reviews if __name__ == "__main__": - result = crawl_naver_place_reviews(config.PLACE_ID, config.MAX_REVIEWS) + start_dt = datetime.strptime(config.START_DATE, "%Y-%m-%d") + end_dt = datetime.strptime(config.END_DATE, "%Y-%m-%d") - print("\n==== [리뷰 목록] ====") - for i, r in enumerate(result, 1): - print(f"{i}. 작성자: {r['writer']}, 날짜: {r['date']}") - print(f" 내용: {r['text']}\n") + for place_id in config.PLACE_IDS: + place_name, result = crawl_naver_place_reviews(place_id, config.MAX_REVIEWS, start_dt, end_dt) + print(f"\n==== [업체명: {place_name}] ====") + for i, r in enumerate(result, 1): + print(f"{i}. 작성자: {r['writer']}, 날짜: {r['date']}") + print(f" 내용: {r['text']}\n")