환경 변수에서 네이버 비즈니스 프로필 리뷰를 가지고 올지, 네이버 지도를 가져올지 선택하도록 함, 공통 코드 분리

This commit is contained in:
2025-07-10 15:07:49 +09:00
parent 906102fa90
commit ab3306fdd6
6 changed files with 328 additions and 87 deletions

View File

@ -1,9 +1,6 @@
import os, sys
import re
import pickle
import time
from datetime import datetime, timedelta
import undetected_chromedriver as uc
from datetime import datetime
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
@ -15,59 +12,26 @@ from conf.config import (
COOKIE_FILE, DEBUG,
MESSAGE_PLATFORMS, MATTERMOST_URL, MATTERMOST_BOT_TOKEN, MATTERMOST_CHANNEL_ID
)
from lib.send_message import MessageSender
def get_start_end_dates():
if DEBUG:
# 문자열 → datetime.date 변환
start = datetime.strptime(CFG_START, "%Y-%m-%d").date()
end = datetime.strptime(CFG_END, "%Y-%m-%d").date()
return start, end
today = datetime.today()
weekday = today.weekday() # 0 = Monday
if weekday == 0: # 월요일 → 금~일
start = today - timedelta(days=3)
end = today - timedelta(days=1)
else: # 그 외 요일 → 어제
start = end = today - timedelta(days=1)
return start.date(), end.date()
from lib.lib import (
create_mobile_driver,
save_cookies,
load_cookies,
get_start_end_dates,
send_failure_message,
clean_html_text
)
class NaverReviewCollector:
def __init__(self, headless=HEADLESS):
self.headless = headless
self.driver = None
self.total_reviews = 0
self.start_date, self.end_date = get_start_end_dates()
self.reviews_by_place = {}
self.start_date, self.end_date = get_start_end_dates(DEBUG, CFG_START, CFG_END)
self.reviews_by_place = {}
def create_driver(self):
options = uc.ChromeOptions()
options.add_argument('--window-size=375,812')
if self.headless:
options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
options.add_argument("--user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) "
"AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15A372 Safari/604.1")
self.driver = uc.Chrome(options=options)
self.driver.set_window_size(375, 812)
def save_cookies(self):
cookies = self.driver.get_cookies()
for c in cookies:
c.pop("sameSite", None)
if "expiry" in c:
c["expires"] = c.pop("expiry")
with open(COOKIE_FILE, "wb") as f:
pickle.dump(cookies, f)
def load_cookies(self):
with open(COOKIE_FILE, "rb") as f:
cookies = pickle.load(f)
for cookie in cookies:
self.driver.add_cookie(cookie)
self.driver = create_mobile_driver(self.headless)
def perform_login(self):
wait = WebDriverWait(self.driver, 20)
@ -92,7 +56,7 @@ class NaverReviewCollector:
if "captcha" in self.driver.page_source.lower():
input("CAPTCHA 수동 입력 후 Enter: ")
self.save_cookies()
save_cookies(self.driver, COOKIE_FILE)
return True
def is_login_required(self):
@ -158,15 +122,13 @@ class NaverReviewCollector:
continue
html = content_el.get_attribute("innerHTML")
html = re.sub(r'<br\s*/?>', '\n', html)
html = re.sub(r'<span class="pui__blind">.*?<\/span>', '', html, flags=re.DOTALL)
html = re.sub(r'<.*?>', '', html)
text = clean_html_text(html)
reviews.append({
"작성자": author,
"방문일": visit_date,
"작성일": written_date,
"내용": html.strip()
"내용": text
})
except:
@ -174,44 +136,35 @@ class NaverReviewCollector:
except:
pass
return reviews
# 메시지 보내기
def send_to_message(self):
today_str = datetime.today().strftime("%Y년 %m월 %d")
now_str = datetime.now().strftime("%H:%M:%S") # 현재 시각
now_str = datetime.now().strftime("%H:%M:%S")
lines = [f"##### {today_str} 네이버 리뷰 크롤링 결과", ""]
lines.append(f"**수집 시간 :** {now_str}") # 수집 시간 추가
lines.append(f"**수집 시간 :** {now_str}")
lines.append(f"**총 리뷰 수 :** {self.total_reviews}")
lines.append("")
for place_name, reviews in self.reviews_by_place.items():
lines.append(f"- {place_name}: {len(reviews)}")
lines.append("")
lines.append("---")
lines.append("")
lines.append("\n---\n")
for idx, (place_name, reviews) in enumerate(self.reviews_by_place.items(), start=1):
lines.append(f"**{idx}. {place_name}** ")
lines.append("")
if not reviews:
lines.append("- 리뷰 없음")
lines.append("")
lines.append("- 리뷰 없음\n")
else:
for r in reviews:
lines.append(f"- **작성일** : {r['작성일']} ")
lines.append(f" **방문일** : {r['방문일']} ")
lines.append(f" **작성자** : {r['작성자']} ")
lines.append(f" **내용** : {r['내용']}")
lines.append("")
lines.append(f" **내용** : {r['내용']}\n")
lines.append("---")
lines.append("")
message = "\n".join(lines)
if not MESSAGE_PLATFORMS:
print("[WARN] 메시지 전송 플랫폼이 지정되지 않아 메시지를 발송하지 않습니다.")
print("[WARN] 메시지 전송 플랫폼이 지정되지 않음. 미전송")
print(f"[DEBUG] {message}")
return
@ -221,13 +174,12 @@ class NaverReviewCollector:
mattermost_channel_id=MATTERMOST_CHANNEL_ID,
)
if not DEBUG:
sender.send(message, platforms=MESSAGE_PLATFORMS, use_webhook=False)
else:
if DEBUG:
print(f"[DEBUG] message platform : {MESSAGE_PLATFORMS}")
print("[DEBUG] 디버그 모드에서는 메시지를 발송하지 않습니다.")
print("[DEBUG] 디버그 모드 메시지 미전송")
print(f"[DEBUG] {message}")
else:
sender.send(message, platforms=MESSAGE_PLATFORMS, use_webhook=False)
def run(self):
self.create_driver()
@ -236,7 +188,7 @@ class NaverReviewCollector:
if os.path.exists(COOKIE_FILE):
try:
self.load_cookies()
load_cookies(self.driver, COOKIE_FILE)
self.driver.get("https://naver.com")
time.sleep(1)
except:
@ -259,7 +211,6 @@ class NaverReviewCollector:
for biz_id in BIZ_ID:
place_name = self.access_review_page(biz_id)
print(f"\n=== [{place_name}({biz_id})] 리뷰 수집 시작 ===")
if self.is_login_required():
print("[WARN] 세션 만료 또는 쿠키 무효. 로그인 재진행")
os.remove(COOKIE_FILE)
@ -271,33 +222,24 @@ class NaverReviewCollector:
reviews = self.extract_reviews()
print(f"[RESULT] 리뷰 {len(reviews)}개 수집됨")
self.total_reviews += len(reviews)
# 플레이스별 리뷰 저장
self.reviews_by_place[place_name] = reviews
except Exception as e:
print(f"[ERROR] {biz_id} 처리 중 오류:", e)
self.reviews_by_place[place_name] = []
self.driver.quit()
# 리뷰 수집 시도 자체가 실패했을 경우 (빈 딕셔너리)
if not self.reviews_by_place:
from lib.send_message import MessageSender
from conf.config import MESSAGE_PLATFORMS, MATTERMOST_URL, MATTERMOST_BOT_TOKEN, MATTERMOST_CHANNEL_ID
sender = MessageSender(
mattermost_url=MATTERMOST_URL,
mattermost_bot_token=MATTERMOST_BOT_TOKEN,
mattermost_channel_id=MATTERMOST_CHANNEL_ID,
)
for platform in MESSAGE_PLATFORMS:
sender.send("# ❌ 리뷰 수집 실패: 플레이스 접근 또는 파싱 오류", platform=platform, use_webhook=False)
send_failure_message(sender, MESSAGE_PLATFORMS)
else:
self.send_to_message()
if __name__ == "__main__":
collector = NaverReviewCollector(headless=HEADLESS)
collector.run()