| 항목 | 개선 전 | 개선 후 |
| ----------- | ---------------------- | ------------------------- | | 예외 처리 | `except:` | `except Exception:` 으로 제한 | | 디버깅 출력 | `print()` 섞여 있음 | `debug()` 함수로 통일 | | 메시지 전송 | 반복적 `MessageSender()` | 공통화 | | 중복 제거 | `작성자/작성일/내용` key 반복 비교 | 튜플 키 비교 유지, 주석 보완 | | 리뷰 추출 실패 로그 | 단순 출력 | `place_id` 정보 포함 | | 메시지 포맷 | biz와 형식 동일 | 동일한 스타일 유지 |
This commit is contained in:
@ -1,3 +1,4 @@
|
|||||||
|
# naver_review_crawler.py
|
||||||
import os, sys
|
import os, sys
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from selenium.webdriver.common.by import By
|
from selenium.webdriver.common.by import By
|
||||||
@ -7,9 +8,9 @@ from selenium.webdriver.support import expected_conditions as EC
|
|||||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||||
|
|
||||||
from conf.config import (
|
from conf.config import (
|
||||||
PLACE_IDS, START_DATE, END_DATE, DEBUG,
|
PLACE_IDS, START_DATE, END_DATE, DEBUG,
|
||||||
MESSAGE_PLATFORMS, MATTERMOST_URL, MATTERMOST_BOT_TOKEN, MATTERMOST_CHANNEL_ID
|
MESSAGE_PLATFORMS, MATTERMOST_URL, MATTERMOST_BOT_TOKEN, MATTERMOST_CHANNEL_ID
|
||||||
)
|
)
|
||||||
from lib.send_message import MessageSender
|
from lib.send_message import MessageSender
|
||||||
from lib.lib import (
|
from lib.lib import (
|
||||||
create_mobile_driver,
|
create_mobile_driver,
|
||||||
@ -21,7 +22,13 @@ from lib.lib import (
|
|||||||
send_failure_message
|
send_failure_message
|
||||||
)
|
)
|
||||||
|
|
||||||
class NaverReviewMapCollector:
|
|
||||||
|
def debug(msg):
|
||||||
|
if DEBUG:
|
||||||
|
print(f"[DEBUG] {msg}")
|
||||||
|
|
||||||
|
|
||||||
|
class NaverMapReviewCollector:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.driver = None
|
self.driver = None
|
||||||
self.total_reviews = 0
|
self.total_reviews = 0
|
||||||
@ -30,40 +37,46 @@ class NaverReviewMapCollector:
|
|||||||
|
|
||||||
def extract_reviews(self):
|
def extract_reviews(self):
|
||||||
reviews = []
|
reviews = []
|
||||||
WebDriverWait(self.driver, 10).until(
|
try:
|
||||||
EC.presence_of_element_located((By.ID, "_review_list"))
|
WebDriverWait(self.driver, 10).until(
|
||||||
)
|
EC.presence_of_element_located((By.ID, "_review_list"))
|
||||||
ul = self.driver.find_element(By.ID, "_review_list")
|
)
|
||||||
items = ul.find_elements(By.XPATH, './/li[contains(@class, "place_apply_pui")]')
|
ul = self.driver.find_element(By.ID, "_review_list")
|
||||||
for item in items:
|
items = ul.find_elements(By.XPATH, './/li[contains(@class, "place_apply_pui")]')
|
||||||
try:
|
|
||||||
writer = "익명"
|
|
||||||
try:
|
|
||||||
writer = item.find_element(By.XPATH, "./div[1]/a[2]/div/span/span").text.strip()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
date_obj = None
|
for item in items:
|
||||||
try:
|
try:
|
||||||
date_text = item.find_element(By.XPATH, "./div[7]/div[2]/div/span[1]/span[2]").text.strip()
|
writer = "익명"
|
||||||
date_obj = parse_korean_date(date_text)
|
try:
|
||||||
except:
|
writer = item.find_element(By.XPATH, "./div[1]/a[2]/div/span/span").text.strip()
|
||||||
continue
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
text = ""
|
try:
|
||||||
try:
|
date_text = item.find_element(By.XPATH, "./div[7]/div[2]/div/span[1]/span[2]").text.strip()
|
||||||
text = item.find_element(By.XPATH, "./div[5]/a").get_attribute("innerHTML")
|
date_obj = parse_korean_date(date_text)
|
||||||
except:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if not (self.start_date <= date_obj <= self.end_date):
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
text_html = item.find_element(By.XPATH, "./div[5]/a").get_attribute("innerHTML")
|
||||||
|
content = clean_html_text(text_html)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
if date_obj and (self.start_date <= date_obj <= self.end_date):
|
|
||||||
reviews.append({
|
reviews.append({
|
||||||
"작성자": writer,
|
"작성자": writer,
|
||||||
"작성일": date_obj,
|
"작성일": date_obj,
|
||||||
"내용": clean_html_text(text)
|
"내용": content
|
||||||
})
|
})
|
||||||
except Exception as e:
|
|
||||||
print(f"[WARN] 리뷰 추출 실패: {e}")
|
except Exception as e:
|
||||||
|
debug(f"[WARN] 리뷰 항목 처리 중 오류: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
debug(f"[ERROR] 리뷰 리스트 접근 실패: {e}")
|
||||||
return reviews
|
return reviews
|
||||||
|
|
||||||
def send_to_message(self):
|
def send_to_message(self):
|
||||||
@ -91,9 +104,10 @@ class NaverReviewMapCollector:
|
|||||||
lines.append("---")
|
lines.append("---")
|
||||||
|
|
||||||
message = "\n".join(lines)
|
message = "\n".join(lines)
|
||||||
|
|
||||||
if not MESSAGE_PLATFORMS:
|
if not MESSAGE_PLATFORMS:
|
||||||
print("[WARN] 메시지 전송 플랫폼 없음")
|
print("[WARN] 메시지 전송 플랫폼이 지정되지 않음")
|
||||||
print(f"[DEBUG] {message}")
|
debug(message)
|
||||||
return
|
return
|
||||||
|
|
||||||
sender = MessageSender(
|
sender = MessageSender(
|
||||||
@ -103,8 +117,8 @@ class NaverReviewMapCollector:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
print("[DEBUG] 디버그 모드로 메시지 미전송")
|
debug("디버그 모드로 메시지 전송 생략")
|
||||||
print(message)
|
debug(message)
|
||||||
else:
|
else:
|
||||||
sender.send(message, platforms=MESSAGE_PLATFORMS, use_webhook=False)
|
sender.send(message, platforms=MESSAGE_PLATFORMS, use_webhook=False)
|
||||||
|
|
||||||
@ -114,10 +128,15 @@ class NaverReviewMapCollector:
|
|||||||
for place_id in PLACE_IDS:
|
for place_id in PLACE_IDS:
|
||||||
url = f"https://m.place.naver.com/place/{place_id}/review/visitor?reviewSort=recent"
|
url = f"https://m.place.naver.com/place/{place_id}/review/visitor?reviewSort=recent"
|
||||||
print(f"[INFO] 접근: {url}")
|
print(f"[INFO] 접근: {url}")
|
||||||
self.driver.get(url)
|
try:
|
||||||
shop_name = extract_shop_name(self.driver)
|
self.driver.get(url)
|
||||||
|
shop_name = extract_shop_name(self.driver)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[ERROR] {place_id} 매장 접근 오류: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
all_reviews = []
|
all_reviews = []
|
||||||
seen = set()
|
seen = set() # (작성자, 작성일, 내용) 기준으로 중복 제거
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
new_reviews = self.extract_reviews()
|
new_reviews = self.extract_reviews()
|
||||||
@ -135,6 +154,7 @@ class NaverReviewMapCollector:
|
|||||||
break
|
break
|
||||||
|
|
||||||
all_reviews.extend(filtered)
|
all_reviews.extend(filtered)
|
||||||
|
|
||||||
if not click_more(self.driver):
|
if not click_more(self.driver):
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -154,6 +174,7 @@ class NaverReviewMapCollector:
|
|||||||
else:
|
else:
|
||||||
self.send_to_message()
|
self.send_to_message()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
collector = NaverReviewMapCollector()
|
collector = NaverMapReviewCollector()
|
||||||
collector.run()
|
collector.run()
|
||||||
|
|||||||
Reference in New Issue
Block a user