재귀 제거 run() 내부 재호출 제거, 루프 재시도 구조로 개선

except 범위 명확화 except: → except Exception:
리뷰 날짜 파싱 안전화 strptime 사용 시 예외 대비
중복 로직 함수화 작성일 추출, 본문 추출 등 함수로 분리
디버그 로그 함수 추가 debug() 함수로 로깅 통일
메시지 전송 실패시 명확한 알림 실패 시에도 로그 + 전송 시도
This commit is contained in:
2025-07-11 09:27:48 +09:00
parent c3488e7bc9
commit bbb17ef362

View File

@ -1,3 +1,4 @@
# biz_crawler.py
import os, sys
import time
from datetime import datetime
@ -22,6 +23,10 @@ from lib.lib import (
clean_html_text
)
def debug(msg):
if DEBUG:
print(f"[DEBUG] {msg}")
class NaverReviewCollector:
def __init__(self, headless=HEADLESS):
self.headless = headless
@ -41,7 +46,7 @@ class NaverReviewCollector:
try:
modal = wait.until(EC.presence_of_element_located((By.ID, "modal-root")))
modal.find_element(By.XPATH, './/button').click()
except:
except Exception:
pass
try:
@ -49,7 +54,6 @@ class NaverReviewCollector:
self.driver.find_element(By.ID, 'pw').send_keys(NAVER_PW)
self.driver.find_element(By.XPATH, '//button[@type="submit"]').click()
except Exception:
self.driver.quit()
return False
time.sleep(3)
@ -70,9 +74,30 @@ class NaverReviewCollector:
EC.presence_of_element_located((By.XPATH, '//*[starts-with(@class, "Header_btn_select_")]'))
)
return el.text.strip()
except:
except Exception:
return "알수없음"
def extract_written_date(self, spans, li):
labels = [s.text.strip() for s in spans]
try:
if "작성일" in labels:
idx = labels.index("작성일")
return spans[idx + 1].find_element(By.TAG_NAME, "time").text.strip()
elif "예약자" in labels:
return li.find_element(By.XPATH, ".//div[3]/div[1]/span[2]/time").text.strip()
except Exception:
return None
def extract_review_text(self, li):
for i in range(4, 7):
try:
el = li.find_element(By.XPATH, f"./div[{i}]/a")
if el and el.text.strip():
return clean_html_text(el.get_attribute("innerHTML"))
except Exception:
continue
return None
def extract_reviews(self):
reviews = []
try:
@ -81,48 +106,34 @@ class NaverReviewCollector:
)
lis = self.driver.find_elements(By.XPATH, "//ul[starts-with(@class, 'Review_columns_list')]/li")
for li in lis:
try:
if "Review_banner__" in li.get_attribute("class"):
continue
try:
author = li.find_element(By.XPATH, ".//div[1]/a[2]/div/span/span").text.strip()
visit_text = li.find_element(By.XPATH, ".//div[2]/div[1]/span[2]/time").text.strip()
visit_date = datetime.strptime(visit_text.split("(")[0].replace(". ", "-").replace(".", ""), "%Y-%m-%d").strftime("%Y-%m-%d")
visit_date = datetime.strptime(
visit_text.split("(")[0].replace(". ", "-").replace(".", ""), "%Y-%m-%d"
).strftime("%Y-%m-%d")
spans = li.find_elements(By.XPATH, ".//div[2]/div[2]/span")
labels = [s.text.strip() for s in spans]
written_text = None
if "작성일" in labels:
idx = labels.index("작성일")
written_text = spans[idx + 1].find_element(By.TAG_NAME, "time").text.strip()
elif "예약자" in labels:
try:
written_text = li.find_element(By.XPATH, ".//div[3]/div[1]/span[2]/time").text.strip()
except:
continue
written_text = self.extract_written_date(spans, li)
if not written_text:
continue
written_date = datetime.strptime(written_text.split("(")[0].replace(". ", "-").replace(".", ""), "%Y-%m-%d").date()
try:
written_date = datetime.strptime(
written_text.split("(")[0].replace(". ", "-").replace(".", ""), "%Y-%m-%d"
).date()
except ValueError:
continue
if not (self.start_date <= written_date <= self.end_date):
continue
content_el = None
for i in range(4, 7):
try:
el = li.find_element(By.XPATH, f"./div[{i}]/a")
if el and el.text.strip():
content_el = el
break
except:
text = self.extract_review_text(li)
if not text:
continue
if content_el is None:
continue
html = content_el.get_attribute("innerHTML")
text = clean_html_text(html)
reviews.append({
"작성자": author,
@ -130,10 +141,9 @@ class NaverReviewCollector:
"작성일": written_date,
"내용": text
})
except:
except Exception:
continue
except:
except Exception:
pass
return reviews
@ -163,9 +173,10 @@ class NaverReviewCollector:
lines.append("---")
message = "\n".join(lines)
if not MESSAGE_PLATFORMS:
print("[WARN] 메시지 전송 플랫폼이 지정되지 않음. 미전송")
print(f"[DEBUG] {message}")
debug(message)
return
sender = MessageSender(
@ -175,13 +186,14 @@ class NaverReviewCollector:
)
if DEBUG:
print(f"[DEBUG] message platform : {MESSAGE_PLATFORMS}")
print("[DEBUG] 디버그 모드 메시지 전송")
print(f"[DEBUG] {message}")
debug(f"메시지 플랫폼: {MESSAGE_PLATFORMS}")
debug("디버그 모드: 메시지 전송 생략")
debug(message)
else:
sender.send(message, platforms=MESSAGE_PLATFORMS, use_webhook=False)
def run(self):
while True:
self.create_driver()
self.driver.get("https://naver.com")
time.sleep(1)
@ -191,22 +203,23 @@ class NaverReviewCollector:
load_cookies(self.driver, COOKIE_FILE)
self.driver.get("https://naver.com")
time.sleep(1)
except:
except Exception:
os.remove(COOKIE_FILE)
self.driver.quit()
NaverReviewCollector(headless=False).run()
return
self.headless = False
continue
else:
if self.headless:
self.driver.quit()
NaverReviewCollector(headless=False).run()
return
self.headless = False
continue
if not self.perform_login():
self.driver.quit()
return
self.driver.quit()
NaverReviewCollector(headless=self.headless).run()
return
continue
break # 쿠키 로딩 또는 로그인 성공 시 루프 종료
for biz_id in BIZ_ID:
place_name = self.access_review_page(biz_id)
@ -215,8 +228,8 @@ class NaverReviewCollector:
print("[WARN] 세션 만료 또는 쿠키 무효. 로그인 재진행")
os.remove(COOKIE_FILE)
self.driver.quit()
NaverReviewCollector(headless=False).run()
return
self.headless = False
return self.run()
try:
reviews = self.extract_reviews()
@ -232,7 +245,7 @@ class NaverReviewCollector:
if not self.reviews_by_place:
sender = MessageSender(
mattermost_url=MATTERMOST_URL,
mattermost_bot_token=MATTERMOST_BOT_TOKEN,
mattermost_token=MATTERMOST_BOT_TOKEN,
mattermost_channel_id=MATTERMOST_CHANNEL_ID,
)
send_failure_message(sender, MESSAGE_PLATFORMS)