From bbb17ef362acafe4a97784e4ed61d75f8e2c92cb Mon Sep 17 00:00:00 2001 From: KWON Date: Fri, 11 Jul 2025 09:27:48 +0900 Subject: [PATCH] =?UTF-8?q?=EC=9E=AC=EA=B7=80=20=EC=A0=9C=EA=B1=B0=20run()?= =?UTF-8?q?=20=EB=82=B4=EB=B6=80=20=EC=9E=AC=ED=98=B8=EC=B6=9C=20=EC=A0=9C?= =?UTF-8?q?=EA=B1=B0,=20=EB=A3=A8=ED=94=84=20=EC=9E=AC=EC=8B=9C=EB=8F=84?= =?UTF-8?q?=20=EA=B5=AC=EC=A1=B0=EB=A1=9C=20=EA=B0=9C=EC=84=A0=20except=20?= =?UTF-8?q?=EB=B2=94=EC=9C=84=20=EB=AA=85=ED=99=95=ED=99=94=20except:=20?= =?UTF-8?q?=E2=86=92=20except=20Exception:=20=EB=A6=AC=EB=B7=B0=20?= =?UTF-8?q?=EB=82=A0=EC=A7=9C=20=ED=8C=8C=EC=8B=B1=20=EC=95=88=EC=A0=84?= =?UTF-8?q?=ED=99=94=20strptime=20=EC=82=AC=EC=9A=A9=20=EC=8B=9C=20?= =?UTF-8?q?=EC=98=88=EC=99=B8=20=EB=8C=80=EB=B9=84=20=EC=A4=91=EB=B3=B5=20?= =?UTF-8?q?=EB=A1=9C=EC=A7=81=20=ED=95=A8=EC=88=98=ED=99=94=20=EC=9E=91?= =?UTF-8?q?=EC=84=B1=EC=9D=BC=20=EC=B6=94=EC=B6=9C,=20=EB=B3=B8=EB=AC=B8?= =?UTF-8?q?=20=EC=B6=94=EC=B6=9C=20=EB=93=B1=20=ED=95=A8=EC=88=98=EB=A1=9C?= =?UTF-8?q?=20=EB=B6=84=EB=A6=AC=20=EB=94=94=EB=B2=84=EA=B7=B8=20=EB=A1=9C?= =?UTF-8?q?=EA=B7=B8=20=ED=95=A8=EC=88=98=20=EC=B6=94=EA=B0=80=20debug()?= =?UTF-8?q?=20=ED=95=A8=EC=88=98=EB=A1=9C=20=EB=A1=9C=EA=B9=85=20=ED=86=B5?= =?UTF-8?q?=EC=9D=BC=20=EB=A9=94=EC=8B=9C=EC=A7=80=20=EC=A0=84=EC=86=A1=20?= =?UTF-8?q?=EC=8B=A4=ED=8C=A8=EC=8B=9C=20=EB=AA=85=ED=99=95=ED=95=9C=20?= =?UTF-8?q?=EC=95=8C=EB=A6=BC=20=EC=8B=A4=ED=8C=A8=20=EC=8B=9C=EC=97=90?= =?UTF-8?q?=EB=8F=84=20=EB=A1=9C=EA=B7=B8=20+=20=EC=A0=84=EC=86=A1=20?= =?UTF-8?q?=EC=8B=9C=EB=8F=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/biz_crawler.py | 145 ++++++++++++++++++++++++--------------------- 1 file changed, 79 insertions(+), 66 deletions(-) diff --git a/lib/biz_crawler.py b/lib/biz_crawler.py index a4b9a2f..67b31d4 100644 --- a/lib/biz_crawler.py +++ b/lib/biz_crawler.py @@ -1,3 +1,4 @@ +# biz_crawler.py import os, sys import time from datetime import datetime @@ -22,6 +23,10 @@ from lib.lib import ( clean_html_text ) +def debug(msg): + if DEBUG: + print(f"[DEBUG] {msg}") + class NaverReviewCollector: def __init__(self, headless=HEADLESS): self.headless = headless @@ -41,7 +46,7 @@ class NaverReviewCollector: try: modal = wait.until(EC.presence_of_element_located((By.ID, "modal-root"))) modal.find_element(By.XPATH, './/button').click() - except: + except Exception: pass try: @@ -49,7 +54,6 @@ class NaverReviewCollector: self.driver.find_element(By.ID, 'pw').send_keys(NAVER_PW) self.driver.find_element(By.XPATH, '//button[@type="submit"]').click() except Exception: - self.driver.quit() return False time.sleep(3) @@ -70,9 +74,30 @@ class NaverReviewCollector: EC.presence_of_element_located((By.XPATH, '//*[starts-with(@class, "Header_btn_select_")]')) ) return el.text.strip() - except: + except Exception: return "알수없음" + def extract_written_date(self, spans, li): + labels = [s.text.strip() for s in spans] + try: + if "작성일" in labels: + idx = labels.index("작성일") + return spans[idx + 1].find_element(By.TAG_NAME, "time").text.strip() + elif "예약자" in labels: + return li.find_element(By.XPATH, ".//div[3]/div[1]/span[2]/time").text.strip() + except Exception: + return None + + def extract_review_text(self, li): + for i in range(4, 7): + try: + el = li.find_element(By.XPATH, f"./div[{i}]/a") + if el and el.text.strip(): + return clean_html_text(el.get_attribute("innerHTML")) + except Exception: + continue + return None + def extract_reviews(self): reviews = [] try: @@ -81,59 +106,44 @@ class NaverReviewCollector: ) lis = self.driver.find_elements(By.XPATH, "//ul[starts-with(@class, 'Review_columns_list')]/li") for li in lis: - if "Review_banner__" in li.get_attribute("class"): - continue - try: + if "Review_banner__" in li.get_attribute("class"): + continue + author = li.find_element(By.XPATH, ".//div[1]/a[2]/div/span/span").text.strip() visit_text = li.find_element(By.XPATH, ".//div[2]/div[1]/span[2]/time").text.strip() - visit_date = datetime.strptime(visit_text.split("(")[0].replace(". ", "-").replace(".", ""), "%Y-%m-%d").strftime("%Y-%m-%d") + visit_date = datetime.strptime( + visit_text.split("(")[0].replace(". ", "-").replace(".", ""), "%Y-%m-%d" + ).strftime("%Y-%m-%d") spans = li.find_elements(By.XPATH, ".//div[2]/div[2]/span") - labels = [s.text.strip() for s in spans] - written_text = None - - if "작성일" in labels: - idx = labels.index("작성일") - written_text = spans[idx + 1].find_element(By.TAG_NAME, "time").text.strip() - elif "예약자" in labels: - try: - written_text = li.find_element(By.XPATH, ".//div[3]/div[1]/span[2]/time").text.strip() - except: - continue - + written_text = self.extract_written_date(spans, li) if not written_text: continue - written_date = datetime.strptime(written_text.split("(")[0].replace(". ", "-").replace(".", ""), "%Y-%m-%d").date() + try: + written_date = datetime.strptime( + written_text.split("(")[0].replace(". ", "-").replace(".", ""), "%Y-%m-%d" + ).date() + except ValueError: + continue + if not (self.start_date <= written_date <= self.end_date): continue - content_el = None - for i in range(4, 7): - try: - el = li.find_element(By.XPATH, f"./div[{i}]/a") - if el and el.text.strip(): - content_el = el - break - except: - continue - if content_el is None: + text = self.extract_review_text(li) + if not text: continue - html = content_el.get_attribute("innerHTML") - text = clean_html_text(html) - reviews.append({ "작성자": author, "방문일": visit_date, "작성일": written_date, "내용": text }) - - except: + except Exception: continue - except: + except Exception: pass return reviews @@ -163,9 +173,10 @@ class NaverReviewCollector: lines.append("---") message = "\n".join(lines) + if not MESSAGE_PLATFORMS: print("[WARN] 메시지 전송 플랫폼이 지정되지 않음. 미전송") - print(f"[DEBUG] {message}") + debug(message) return sender = MessageSender( @@ -175,38 +186,40 @@ class NaverReviewCollector: ) if DEBUG: - print(f"[DEBUG] message platform : {MESSAGE_PLATFORMS}") - print("[DEBUG] 디버그 모드 메시지 미전송") - print(f"[DEBUG] {message}") + debug(f"메시지 플랫폼: {MESSAGE_PLATFORMS}") + debug("디버그 모드: 메시지 전송 생략") + debug(message) else: sender.send(message, platforms=MESSAGE_PLATFORMS, use_webhook=False) def run(self): - self.create_driver() - self.driver.get("https://naver.com") - time.sleep(1) + while True: + self.create_driver() + self.driver.get("https://naver.com") + time.sleep(1) - if os.path.exists(COOKIE_FILE): - try: - load_cookies(self.driver, COOKIE_FILE) - self.driver.get("https://naver.com") - time.sleep(1) - except: - os.remove(COOKIE_FILE) + if os.path.exists(COOKIE_FILE): + try: + load_cookies(self.driver, COOKIE_FILE) + self.driver.get("https://naver.com") + time.sleep(1) + except Exception: + os.remove(COOKIE_FILE) + self.driver.quit() + self.headless = False + continue + else: + if self.headless: + self.driver.quit() + self.headless = False + continue + if not self.perform_login(): + self.driver.quit() + return self.driver.quit() - NaverReviewCollector(headless=False).run() - return - else: - if self.headless: - self.driver.quit() - NaverReviewCollector(headless=False).run() - return - if not self.perform_login(): - self.driver.quit() - return - self.driver.quit() - NaverReviewCollector(headless=self.headless).run() - return + continue + + break # 쿠키 로딩 또는 로그인 성공 시 루프 종료 for biz_id in BIZ_ID: place_name = self.access_review_page(biz_id) @@ -215,8 +228,8 @@ class NaverReviewCollector: print("[WARN] 세션 만료 또는 쿠키 무효. 로그인 재진행") os.remove(COOKIE_FILE) self.driver.quit() - NaverReviewCollector(headless=False).run() - return + self.headless = False + return self.run() try: reviews = self.extract_reviews() @@ -232,7 +245,7 @@ class NaverReviewCollector: if not self.reviews_by_place: sender = MessageSender( mattermost_url=MATTERMOST_URL, - mattermost_bot_token=MATTERMOST_BOT_TOKEN, + mattermost_token=MATTERMOST_BOT_TOKEN, mattermost_channel_id=MATTERMOST_CHANNEL_ID, ) send_failure_message(sender, MESSAGE_PLATFORMS)