diff --git a/lib/naver_review_crawler.py b/lib/naver_review_crawler.py index 2ce6b36..a11f03b 100644 --- a/lib/naver_review_crawler.py +++ b/lib/naver_review_crawler.py @@ -1,3 +1,4 @@ +# naver_review_crawler.py import os, sys from datetime import datetime from selenium.webdriver.common.by import By @@ -7,9 +8,9 @@ from selenium.webdriver.support import expected_conditions as EC sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from conf.config import ( - PLACE_IDS, START_DATE, END_DATE, DEBUG, + PLACE_IDS, START_DATE, END_DATE, DEBUG, MESSAGE_PLATFORMS, MATTERMOST_URL, MATTERMOST_BOT_TOKEN, MATTERMOST_CHANNEL_ID - ) +) from lib.send_message import MessageSender from lib.lib import ( create_mobile_driver, @@ -21,7 +22,13 @@ from lib.lib import ( send_failure_message ) -class NaverReviewMapCollector: + +def debug(msg): + if DEBUG: + print(f"[DEBUG] {msg}") + + +class NaverMapReviewCollector: def __init__(self): self.driver = None self.total_reviews = 0 @@ -30,40 +37,46 @@ class NaverReviewMapCollector: def extract_reviews(self): reviews = [] - WebDriverWait(self.driver, 10).until( - EC.presence_of_element_located((By.ID, "_review_list")) - ) - ul = self.driver.find_element(By.ID, "_review_list") - items = ul.find_elements(By.XPATH, './/li[contains(@class, "place_apply_pui")]') - for item in items: - try: - writer = "익명" - try: - writer = item.find_element(By.XPATH, "./div[1]/a[2]/div/span/span").text.strip() - except: - pass + try: + WebDriverWait(self.driver, 10).until( + EC.presence_of_element_located((By.ID, "_review_list")) + ) + ul = self.driver.find_element(By.ID, "_review_list") + items = ul.find_elements(By.XPATH, './/li[contains(@class, "place_apply_pui")]') - date_obj = None + for item in items: try: - date_text = item.find_element(By.XPATH, "./div[7]/div[2]/div/span[1]/span[2]").text.strip() - date_obj = parse_korean_date(date_text) - except: - continue + writer = "익명" + try: + writer = item.find_element(By.XPATH, "./div[1]/a[2]/div/span/span").text.strip() + except Exception: + pass - text = "" - try: - text = item.find_element(By.XPATH, "./div[5]/a").get_attribute("innerHTML") - except: - continue + try: + date_text = item.find_element(By.XPATH, "./div[7]/div[2]/div/span[1]/span[2]").text.strip() + date_obj = parse_korean_date(date_text) + except Exception: + continue + + if not (self.start_date <= date_obj <= self.end_date): + continue + + try: + text_html = item.find_element(By.XPATH, "./div[5]/a").get_attribute("innerHTML") + content = clean_html_text(text_html) + except Exception: + continue - if date_obj and (self.start_date <= date_obj <= self.end_date): reviews.append({ "작성자": writer, "작성일": date_obj, - "내용": clean_html_text(text) + "내용": content }) - except Exception as e: - print(f"[WARN] 리뷰 추출 실패: {e}") + + except Exception as e: + debug(f"[WARN] 리뷰 항목 처리 중 오류: {e}") + except Exception as e: + debug(f"[ERROR] 리뷰 리스트 접근 실패: {e}") return reviews def send_to_message(self): @@ -91,9 +104,10 @@ class NaverReviewMapCollector: lines.append("---") message = "\n".join(lines) + if not MESSAGE_PLATFORMS: - print("[WARN] 메시지 전송 플랫폼 없음") - print(f"[DEBUG] {message}") + print("[WARN] 메시지 전송 플랫폼이 지정되지 않음") + debug(message) return sender = MessageSender( @@ -103,8 +117,8 @@ class NaverReviewMapCollector: ) if DEBUG: - print("[DEBUG] 디버그 모드로 메시지 미전송") - print(message) + debug("디버그 모드로 메시지 전송 생략") + debug(message) else: sender.send(message, platforms=MESSAGE_PLATFORMS, use_webhook=False) @@ -114,10 +128,15 @@ class NaverReviewMapCollector: for place_id in PLACE_IDS: url = f"https://m.place.naver.com/place/{place_id}/review/visitor?reviewSort=recent" print(f"[INFO] 접근: {url}") - self.driver.get(url) - shop_name = extract_shop_name(self.driver) + try: + self.driver.get(url) + shop_name = extract_shop_name(self.driver) + except Exception as e: + print(f"[ERROR] {place_id} 매장 접근 오류: {e}") + continue + all_reviews = [] - seen = set() + seen = set() # (작성자, 작성일, 내용) 기준으로 중복 제거 while True: new_reviews = self.extract_reviews() @@ -135,6 +154,7 @@ class NaverReviewMapCollector: break all_reviews.extend(filtered) + if not click_more(self.driver): break @@ -154,6 +174,7 @@ class NaverReviewMapCollector: else: self.send_to_message() + if __name__ == "__main__": - collector = NaverReviewMapCollector() + collector = NaverMapReviewCollector() collector.run()