diff --git a/conf/__init__.py b/conf/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/lib/__init__.py b/lib/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/lib/biz_crawler.py b/lib/biz_crawler.py
index 345564b..a4b9a2f 100644
--- a/lib/biz_crawler.py
+++ b/lib/biz_crawler.py
@@ -1,9 +1,6 @@
import os, sys
-import re
-import pickle
import time
-from datetime import datetime, timedelta
-import undetected_chromedriver as uc
+from datetime import datetime
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
@@ -15,59 +12,26 @@ from conf.config import (
COOKIE_FILE, DEBUG,
MESSAGE_PLATFORMS, MATTERMOST_URL, MATTERMOST_BOT_TOKEN, MATTERMOST_CHANNEL_ID
)
-
from lib.send_message import MessageSender
-
-def get_start_end_dates():
- if DEBUG:
- # 문자열 → datetime.date 변환
- start = datetime.strptime(CFG_START, "%Y-%m-%d").date()
- end = datetime.strptime(CFG_END, "%Y-%m-%d").date()
- return start, end
-
- today = datetime.today()
- weekday = today.weekday() # 0 = Monday
- if weekday == 0: # 월요일 → 금~일
- start = today - timedelta(days=3)
- end = today - timedelta(days=1)
- else: # 그 외 요일 → 어제
- start = end = today - timedelta(days=1)
- return start.date(), end.date()
+from lib.lib import (
+ create_mobile_driver,
+ save_cookies,
+ load_cookies,
+ get_start_end_dates,
+ send_failure_message,
+ clean_html_text
+)
class NaverReviewCollector:
def __init__(self, headless=HEADLESS):
self.headless = headless
self.driver = None
self.total_reviews = 0
- self.start_date, self.end_date = get_start_end_dates()
- self.reviews_by_place = {}
+ self.start_date, self.end_date = get_start_end_dates(DEBUG, CFG_START, CFG_END)
+ self.reviews_by_place = {}
def create_driver(self):
- options = uc.ChromeOptions()
- options.add_argument('--window-size=375,812')
- if self.headless:
- options.add_argument("--headless=new")
- options.add_argument("--disable-gpu")
-
- options.add_argument("--user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) "
- "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15A372 Safari/604.1")
- self.driver = uc.Chrome(options=options)
- self.driver.set_window_size(375, 812)
-
- def save_cookies(self):
- cookies = self.driver.get_cookies()
- for c in cookies:
- c.pop("sameSite", None)
- if "expiry" in c:
- c["expires"] = c.pop("expiry")
- with open(COOKIE_FILE, "wb") as f:
- pickle.dump(cookies, f)
-
- def load_cookies(self):
- with open(COOKIE_FILE, "rb") as f:
- cookies = pickle.load(f)
- for cookie in cookies:
- self.driver.add_cookie(cookie)
+ self.driver = create_mobile_driver(self.headless)
def perform_login(self):
wait = WebDriverWait(self.driver, 20)
@@ -92,7 +56,7 @@ class NaverReviewCollector:
if "captcha" in self.driver.page_source.lower():
input("CAPTCHA 수동 입력 후 Enter: ")
- self.save_cookies()
+ save_cookies(self.driver, COOKIE_FILE)
return True
def is_login_required(self):
@@ -158,15 +122,13 @@ class NaverReviewCollector:
continue
html = content_el.get_attribute("innerHTML")
- html = re.sub(r'
', '\n', html)
- html = re.sub(r'.*?<\/span>', '', html, flags=re.DOTALL)
- html = re.sub(r'<.*?>', '', html)
+ text = clean_html_text(html)
reviews.append({
"작성자": author,
"방문일": visit_date,
"작성일": written_date,
- "내용": html.strip()
+ "내용": text
})
except:
@@ -174,44 +136,35 @@ class NaverReviewCollector:
except:
pass
return reviews
-
- # 메시지 보내기
+
def send_to_message(self):
today_str = datetime.today().strftime("%Y년 %m월 %d일")
- now_str = datetime.now().strftime("%H:%M:%S") # 현재 시각
+ now_str = datetime.now().strftime("%H:%M:%S")
lines = [f"##### {today_str} 네이버 리뷰 크롤링 결과", ""]
- lines.append(f"**수집 시간 :** {now_str}") # 수집 시간 추가
+ lines.append(f"**수집 시간 :** {now_str}")
lines.append(f"**총 리뷰 수 :** {self.total_reviews}")
lines.append("")
for place_name, reviews in self.reviews_by_place.items():
lines.append(f"- {place_name}: {len(reviews)}건 ")
- lines.append("")
- lines.append("---")
- lines.append("")
+ lines.append("\n---\n")
for idx, (place_name, reviews) in enumerate(self.reviews_by_place.items(), start=1):
lines.append(f"**{idx}. {place_name}** ")
lines.append("")
-
if not reviews:
- lines.append("- 리뷰 없음")
- lines.append("")
+ lines.append("- 리뷰 없음\n")
else:
for r in reviews:
lines.append(f"- **작성일** : {r['작성일']} ")
lines.append(f" **방문일** : {r['방문일']} ")
lines.append(f" **작성자** : {r['작성자']} ")
- lines.append(f" **내용** : {r['내용']}")
- lines.append("")
-
+ lines.append(f" **내용** : {r['내용']}\n")
lines.append("---")
- lines.append("")
message = "\n".join(lines)
-
if not MESSAGE_PLATFORMS:
- print("[WARN] 메시지 전송 플랫폼이 지정되지 않아 메시지를 발송하지 않습니다.")
+ print("[WARN] 메시지 전송 플랫폼이 지정되지 않음. 미전송")
print(f"[DEBUG] {message}")
return
@@ -221,13 +174,12 @@ class NaverReviewCollector:
mattermost_channel_id=MATTERMOST_CHANNEL_ID,
)
- if not DEBUG:
- sender.send(message, platforms=MESSAGE_PLATFORMS, use_webhook=False)
- else:
+ if DEBUG:
print(f"[DEBUG] message platform : {MESSAGE_PLATFORMS}")
- print("[DEBUG] 디버그 모드에서는 메시지를 발송하지 않습니다.")
+ print("[DEBUG] 디버그 모드 메시지 미전송")
print(f"[DEBUG] {message}")
-
+ else:
+ sender.send(message, platforms=MESSAGE_PLATFORMS, use_webhook=False)
def run(self):
self.create_driver()
@@ -236,7 +188,7 @@ class NaverReviewCollector:
if os.path.exists(COOKIE_FILE):
try:
- self.load_cookies()
+ load_cookies(self.driver, COOKIE_FILE)
self.driver.get("https://naver.com")
time.sleep(1)
except:
@@ -259,7 +211,6 @@ class NaverReviewCollector:
for biz_id in BIZ_ID:
place_name = self.access_review_page(biz_id)
print(f"\n=== [{place_name}({biz_id})] 리뷰 수집 시작 ===")
-
if self.is_login_required():
print("[WARN] 세션 만료 또는 쿠키 무효. 로그인 재진행")
os.remove(COOKIE_FILE)
@@ -271,33 +222,24 @@ class NaverReviewCollector:
reviews = self.extract_reviews()
print(f"[RESULT] 리뷰 {len(reviews)}개 수집됨")
self.total_reviews += len(reviews)
-
- # 플레이스별 리뷰 저장
self.reviews_by_place[place_name] = reviews
-
except Exception as e:
print(f"[ERROR] {biz_id} 처리 중 오류:", e)
self.reviews_by_place[place_name] = []
self.driver.quit()
- # 리뷰 수집 시도 자체가 실패했을 경우 (빈 딕셔너리)
if not self.reviews_by_place:
- from lib.send_message import MessageSender
- from conf.config import MESSAGE_PLATFORMS, MATTERMOST_URL, MATTERMOST_BOT_TOKEN, MATTERMOST_CHANNEL_ID
-
sender = MessageSender(
mattermost_url=MATTERMOST_URL,
mattermost_bot_token=MATTERMOST_BOT_TOKEN,
mattermost_channel_id=MATTERMOST_CHANNEL_ID,
)
-
- for platform in MESSAGE_PLATFORMS:
- sender.send("# ❌ 리뷰 수집 실패: 플레이스 접근 또는 파싱 오류", platform=platform, use_webhook=False)
-
+ send_failure_message(sender, MESSAGE_PLATFORMS)
else:
self.send_to_message()
+
if __name__ == "__main__":
collector = NaverReviewCollector(headless=HEADLESS)
collector.run()
diff --git a/lib/lib.py b/lib/lib.py
new file mode 100644
index 0000000..a12e0b3
--- /dev/null
+++ b/lib/lib.py
@@ -0,0 +1,112 @@
+# lib/lib.py
+
+import os
+import sys
+import time
+import pickle
+import re
+from datetime import datetime, timedelta
+import undetected_chromedriver as uc
+from selenium.webdriver.common.by import By
+
+# 공통 설정 경로 추가 (필요 시)
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+# ─────────────────────────────────────────────
+# ✅ 드라이버 생성 함수
+# ─────────────────────────────────────────────
+def create_mobile_driver(headless=True):
+ options = uc.ChromeOptions()
+ options.add_argument('--window-size=375,812')
+ if headless:
+ options.add_argument('--headless=new')
+ options.add_argument('--disable-gpu')
+
+ options.add_argument("--user-agent=Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) "
+ "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15A372 Safari/604.1")
+ driver = uc.Chrome(options=options)
+ driver.set_window_size(375, 812)
+ return driver
+
+# ─────────────────────────────────────────────
+# ✅ 쿠키 저장 및 로드
+# ─────────────────────────────────────────────
+def save_cookies(driver, cookie_file):
+ cookies = driver.get_cookies()
+ for c in cookies:
+ c.pop("sameSite", None)
+ if "expiry" in c:
+ c["expires"] = c.pop("expiry")
+ with open(cookie_file, "wb") as f:
+ pickle.dump(cookies, f)
+
+def load_cookies(driver, cookie_file):
+ with open(cookie_file, "rb") as f:
+ cookies = pickle.load(f)
+ for cookie in cookies:
+ driver.add_cookie(cookie)
+
+# ─────────────────────────────────────────────
+# ✅ 날짜 계산 유틸리티
+# ─────────────────────────────────────────────
+def get_start_end_dates(debug, cfg_start, cfg_end):
+ if debug:
+ return (
+ datetime.strptime(cfg_start, "%Y-%m-%d").date(),
+ datetime.strptime(cfg_end, "%Y-%m-%d").date(),
+ )
+
+ today = datetime.today()
+ weekday = today.weekday()
+ if weekday == 0:
+ start = today - timedelta(days=3)
+ end = today - timedelta(days=1)
+ else:
+ start = end = today - timedelta(days=1)
+ return start.date(), end.date()
+
+# ─────────────────────────────────────────────
+# ✅ 메시지 전송 실패 시 기본 메시지 전송
+# ─────────────────────────────────────────────
+def send_failure_message(sender, platforms):
+ for platform in platforms:
+ sender.send("# ❌ 리뷰 수집 실패: 플레이스 접근 또는 파싱 오류", platform=platform, use_webhook=False)
+
+# ─────────────────────────────────────────────
+# ✅ HTML 본문 정리 (리뷰 등)
+# ─────────────────────────────────────────────
+def clean_html_text(html):
+ html = re.sub(r'
', '\n', html)
+ html = re.sub(r'.*?<\/span>', '', html, flags=re.DOTALL)
+ html = re.sub(r'<.*?>', '', html)
+ return html.strip()
+
+def parse_korean_date(date_str):
+ try:
+ date_clean = " ".join(date_str.strip().split(" ")[:3]) # 요일 제거
+ return datetime.strptime(date_clean, "%Y년 %m월 %d일").date()
+ except Exception as e:
+ print(f"[WARN] 날짜 파싱 실패: {date_str} ({e})")
+ return None
+
+
+def click_more(driver):
+ try:
+ container = driver.find_element(By.CLASS_NAME, "place_section_content")
+ more_div = container.find_element(By.XPATH, "./following-sibling::div[1]")
+ more_btn = more_div.find_element(By.TAG_NAME, "a")
+ driver.execute_script("arguments[0].click();", more_btn)
+ time.sleep(2) # 클릭 후 대기
+ return True
+ except Exception:
+ return False
+
+def extract_shop_name(driver):
+ try:
+ main = driver.find_element(By.CSS_SELECTOR, 'div[role="main"]')
+ title = main.find_element(By.ID, "_title")
+ name = title.find_element(By.TAG_NAME, "span").text.strip()
+ return name
+ except Exception as e:
+ print(f"[WARN] 업체명 추출 실패: {e}")
+ return "업체명 없음"
diff --git a/lib/naver_review_crawler.py b/lib/naver_review_crawler.py
new file mode 100644
index 0000000..2ce6b36
--- /dev/null
+++ b/lib/naver_review_crawler.py
@@ -0,0 +1,159 @@
+import os, sys
+from datetime import datetime
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from conf.config import (
+ PLACE_IDS, START_DATE, END_DATE, DEBUG,
+ MESSAGE_PLATFORMS, MATTERMOST_URL, MATTERMOST_BOT_TOKEN, MATTERMOST_CHANNEL_ID
+ )
+from lib.send_message import MessageSender
+from lib.lib import (
+ create_mobile_driver,
+ get_start_end_dates,
+ parse_korean_date,
+ click_more,
+ extract_shop_name,
+ clean_html_text,
+ send_failure_message
+)
+
+class NaverReviewMapCollector:
+ def __init__(self):
+ self.driver = None
+ self.total_reviews = 0
+ self.start_date, self.end_date = get_start_end_dates(DEBUG, START_DATE, END_DATE)
+ self.reviews_by_place = {}
+
+ def extract_reviews(self):
+ reviews = []
+ WebDriverWait(self.driver, 10).until(
+ EC.presence_of_element_located((By.ID, "_review_list"))
+ )
+ ul = self.driver.find_element(By.ID, "_review_list")
+ items = ul.find_elements(By.XPATH, './/li[contains(@class, "place_apply_pui")]')
+ for item in items:
+ try:
+ writer = "익명"
+ try:
+ writer = item.find_element(By.XPATH, "./div[1]/a[2]/div/span/span").text.strip()
+ except:
+ pass
+
+ date_obj = None
+ try:
+ date_text = item.find_element(By.XPATH, "./div[7]/div[2]/div/span[1]/span[2]").text.strip()
+ date_obj = parse_korean_date(date_text)
+ except:
+ continue
+
+ text = ""
+ try:
+ text = item.find_element(By.XPATH, "./div[5]/a").get_attribute("innerHTML")
+ except:
+ continue
+
+ if date_obj and (self.start_date <= date_obj <= self.end_date):
+ reviews.append({
+ "작성자": writer,
+ "작성일": date_obj,
+ "내용": clean_html_text(text)
+ })
+ except Exception as e:
+ print(f"[WARN] 리뷰 추출 실패: {e}")
+ return reviews
+
+ def send_to_message(self):
+ today_str = datetime.today().strftime("%Y년 %m월 %d일")
+ now_str = datetime.now().strftime("%H:%M:%S")
+ lines = [f"##### {today_str} 네이버 지도 리뷰 크롤링 결과", ""]
+ lines.append(f"**수집 시간 :** {now_str}")
+ lines.append(f"**총 리뷰 수 :** {self.total_reviews}")
+ lines.append("")
+
+ for place_name, reviews in self.reviews_by_place.items():
+ lines.append(f"- {place_name}: {len(reviews)}건 ")
+ lines.append("\n---\n")
+
+ for idx, (place_name, reviews) in enumerate(self.reviews_by_place.items(), start=1):
+ lines.append(f"**{idx}. {place_name}** ")
+ lines.append("")
+ if not reviews:
+ lines.append("- 리뷰 없음\n")
+ else:
+ for r in reviews:
+ lines.append(f"- **작성일** : {r['작성일']} ")
+ lines.append(f" **작성자** : {r['작성자']} ")
+ lines.append(f" **내용** : {r['내용']}\n")
+ lines.append("---")
+
+ message = "\n".join(lines)
+ if not MESSAGE_PLATFORMS:
+ print("[WARN] 메시지 전송 플랫폼 없음")
+ print(f"[DEBUG] {message}")
+ return
+
+ sender = MessageSender(
+ mattermost_url=MATTERMOST_URL,
+ mattermost_token=MATTERMOST_BOT_TOKEN,
+ mattermost_channel_id=MATTERMOST_CHANNEL_ID,
+ )
+
+ if DEBUG:
+ print("[DEBUG] 디버그 모드로 메시지 미전송")
+ print(message)
+ else:
+ sender.send(message, platforms=MESSAGE_PLATFORMS, use_webhook=False)
+
+ def run(self):
+ self.driver = create_mobile_driver()
+
+ for place_id in PLACE_IDS:
+ url = f"https://m.place.naver.com/place/{place_id}/review/visitor?reviewSort=recent"
+ print(f"[INFO] 접근: {url}")
+ self.driver.get(url)
+ shop_name = extract_shop_name(self.driver)
+ all_reviews = []
+ seen = set()
+
+ while True:
+ new_reviews = self.extract_reviews()
+ if not new_reviews:
+ break
+
+ filtered = []
+ for r in new_reviews:
+ key = (r["작성자"], r["작성일"], r["내용"])
+ if key not in seen:
+ seen.add(key)
+ filtered.append(r)
+
+ if not filtered:
+ break
+
+ all_reviews.extend(filtered)
+ if not click_more(self.driver):
+ break
+
+ print(f"[DONE] {shop_name}: {len(all_reviews)}건 수집")
+ self.total_reviews += len(all_reviews)
+ self.reviews_by_place[shop_name] = all_reviews
+
+ self.driver.quit()
+
+ if not self.reviews_by_place:
+ sender = MessageSender(
+ mattermost_url=MATTERMOST_URL,
+ mattermost_token=MATTERMOST_BOT_TOKEN,
+ mattermost_channel_id=MATTERMOST_CHANNEL_ID,
+ )
+ send_failure_message(sender, MESSAGE_PLATFORMS)
+ else:
+ self.send_to_message()
+
+if __name__ == "__main__":
+ collector = NaverReviewMapCollector()
+ collector.run()
diff --git a/run.py b/run.py
new file mode 100644
index 0000000..3c59c2c
--- /dev/null
+++ b/run.py
@@ -0,0 +1,28 @@
+import os
+import sys
+from dotenv import load_dotenv
+
+
+# 환경 변수 로드
+load_dotenv()
+
+# 프로젝트 루트 기준 경로 추가
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'lib')))
+
+# 실행 모드 확인
+mode = os.getenv("MODE", "").strip().lower()
+
+if mode == "biz":
+ from lib.biz_crawler import NaverReviewCollector
+ print("[INFO] 비즈니스 리뷰 수집기 실행")
+ collector = NaverReviewCollector()
+ collector.run()
+
+elif mode == "map":
+ from lib.naver_review_crawler import NaverMapReviewCollector
+ print("[INFO] 지도 리뷰 수집기 실행")
+ collector = NaverMapReviewCollector()
+ collector.run()
+
+else:
+ print("[ERROR] .env 파일에서 MODE 값을 설정해주세요. (biz 또는 map)")