| 항목 | 개선 전 | 개선 후 |

| ----------- | ---------------------- | ------------------------- |
| 예외 처리       | `except:`              | `except Exception:` 으로 제한 |
| 디버깅 출력      | `print()` 섞여 있음        | `debug()` 함수로 통일          |
| 메시지 전송      | 반복적 `MessageSender()`  | 공통화                       |
| 중복 제거       | `작성자/작성일/내용` key 반복 비교 | 튜플 키 비교 유지, 주석 보완         |
| 리뷰 추출 실패 로그 | 단순 출력                  | `place_id` 정보 포함          |
| 메시지 포맷      | biz와 형식 동일             | 동일한 스타일 유지                |
This commit is contained in:
2025-07-11 09:30:49 +09:00
parent bbb17ef362
commit f760723067

View File

@ -1,3 +1,4 @@
# naver_review_crawler.py
import os, sys import os, sys
from datetime import datetime from datetime import datetime
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
@ -7,9 +8,9 @@ from selenium.webdriver.support import expected_conditions as EC
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from conf.config import ( from conf.config import (
PLACE_IDS, START_DATE, END_DATE, DEBUG, PLACE_IDS, START_DATE, END_DATE, DEBUG,
MESSAGE_PLATFORMS, MATTERMOST_URL, MATTERMOST_BOT_TOKEN, MATTERMOST_CHANNEL_ID MESSAGE_PLATFORMS, MATTERMOST_URL, MATTERMOST_BOT_TOKEN, MATTERMOST_CHANNEL_ID
) )
from lib.send_message import MessageSender from lib.send_message import MessageSender
from lib.lib import ( from lib.lib import (
create_mobile_driver, create_mobile_driver,
@ -21,7 +22,13 @@ from lib.lib import (
send_failure_message send_failure_message
) )
class NaverReviewMapCollector:
def debug(msg):
if DEBUG:
print(f"[DEBUG] {msg}")
class NaverMapReviewCollector:
def __init__(self): def __init__(self):
self.driver = None self.driver = None
self.total_reviews = 0 self.total_reviews = 0
@ -30,40 +37,46 @@ class NaverReviewMapCollector:
def extract_reviews(self): def extract_reviews(self):
reviews = [] reviews = []
WebDriverWait(self.driver, 10).until( try:
EC.presence_of_element_located((By.ID, "_review_list")) WebDriverWait(self.driver, 10).until(
) EC.presence_of_element_located((By.ID, "_review_list"))
ul = self.driver.find_element(By.ID, "_review_list") )
items = ul.find_elements(By.XPATH, './/li[contains(@class, "place_apply_pui")]') ul = self.driver.find_element(By.ID, "_review_list")
for item in items: items = ul.find_elements(By.XPATH, './/li[contains(@class, "place_apply_pui")]')
try:
writer = "익명"
try:
writer = item.find_element(By.XPATH, "./div[1]/a[2]/div/span/span").text.strip()
except:
pass
date_obj = None for item in items:
try: try:
date_text = item.find_element(By.XPATH, "./div[7]/div[2]/div/span[1]/span[2]").text.strip() writer = "익명"
date_obj = parse_korean_date(date_text) try:
except: writer = item.find_element(By.XPATH, "./div[1]/a[2]/div/span/span").text.strip()
continue except Exception:
pass
text = "" try:
try: date_text = item.find_element(By.XPATH, "./div[7]/div[2]/div/span[1]/span[2]").text.strip()
text = item.find_element(By.XPATH, "./div[5]/a").get_attribute("innerHTML") date_obj = parse_korean_date(date_text)
except: except Exception:
continue continue
if not (self.start_date <= date_obj <= self.end_date):
continue
try:
text_html = item.find_element(By.XPATH, "./div[5]/a").get_attribute("innerHTML")
content = clean_html_text(text_html)
except Exception:
continue
if date_obj and (self.start_date <= date_obj <= self.end_date):
reviews.append({ reviews.append({
"작성자": writer, "작성자": writer,
"작성일": date_obj, "작성일": date_obj,
"내용": clean_html_text(text) "내용": content
}) })
except Exception as e:
print(f"[WARN] 리뷰 추출 실패: {e}") except Exception as e:
debug(f"[WARN] 리뷰 항목 처리 중 오류: {e}")
except Exception as e:
debug(f"[ERROR] 리뷰 리스트 접근 실패: {e}")
return reviews return reviews
def send_to_message(self): def send_to_message(self):
@ -91,9 +104,10 @@ class NaverReviewMapCollector:
lines.append("---") lines.append("---")
message = "\n".join(lines) message = "\n".join(lines)
if not MESSAGE_PLATFORMS: if not MESSAGE_PLATFORMS:
print("[WARN] 메시지 전송 플랫폼") print("[WARN] 메시지 전송 플랫폼이 지정되지 않")
print(f"[DEBUG] {message}") debug(message)
return return
sender = MessageSender( sender = MessageSender(
@ -103,8 +117,8 @@ class NaverReviewMapCollector:
) )
if DEBUG: if DEBUG:
print("[DEBUG] 디버그 모드로 메시지 전송") debug("디버그 모드로 메시지 전송 생략")
print(message) debug(message)
else: else:
sender.send(message, platforms=MESSAGE_PLATFORMS, use_webhook=False) sender.send(message, platforms=MESSAGE_PLATFORMS, use_webhook=False)
@ -114,10 +128,15 @@ class NaverReviewMapCollector:
for place_id in PLACE_IDS: for place_id in PLACE_IDS:
url = f"https://m.place.naver.com/place/{place_id}/review/visitor?reviewSort=recent" url = f"https://m.place.naver.com/place/{place_id}/review/visitor?reviewSort=recent"
print(f"[INFO] 접근: {url}") print(f"[INFO] 접근: {url}")
self.driver.get(url) try:
shop_name = extract_shop_name(self.driver) self.driver.get(url)
shop_name = extract_shop_name(self.driver)
except Exception as e:
print(f"[ERROR] {place_id} 매장 접근 오류: {e}")
continue
all_reviews = [] all_reviews = []
seen = set() seen = set() # (작성자, 작성일, 내용) 기준으로 중복 제거
while True: while True:
new_reviews = self.extract_reviews() new_reviews = self.extract_reviews()
@ -135,6 +154,7 @@ class NaverReviewMapCollector:
break break
all_reviews.extend(filtered) all_reviews.extend(filtered)
if not click_more(self.driver): if not click_more(self.driver):
break break
@ -154,6 +174,7 @@ class NaverReviewMapCollector:
else: else:
self.send_to_message() self.send_to_message()
if __name__ == "__main__": if __name__ == "__main__":
collector = NaverReviewMapCollector() collector = NaverMapReviewCollector()
collector.run() collector.run()