diff --git a/lib/common.py b/lib/common.py index 2ef9416..41b8417 100644 --- a/lib/common.py +++ b/lib/common.py @@ -1,6 +1,8 @@ # common.py import os, yaml import logging +import time +import glob def load_config(): """ @@ -19,3 +21,11 @@ def get_logger(name): logger.addHandler(handler) logger.setLevel(logging.INFO) return logger + +def wait_download_complete(download_dir, ext, timeout=60): + for _ in range(timeout): + files = glob.glob(os.path.join(download_dir, f"*.{ext.strip('.')}")) + if files: + return files[0] + time.sleep(1) + raise TimeoutError("다운로드 대기 시간 초과") \ No newline at end of file diff --git a/lib/cookie_util.py b/lib/cookie_util.py new file mode 100644 index 0000000..e77cc17 --- /dev/null +++ b/lib/cookie_util.py @@ -0,0 +1,21 @@ +import json +import os + +COOKIE_PATH = os.path.join(os.path.dirname(__file__), '..', 'conf', 'cookies.json') + +def save_cookies(driver): + cookies = driver.get_cookies() + with open(COOKIE_PATH, 'w', encoding='utf-8') as f: + json.dump(cookies, f, indent=2) + +def load_cookies(driver, url='https://asp.upsolution.co.kr/'): + if not os.path.exists(COOKIE_PATH): + raise FileNotFoundError("쿠키 파일이 존재하지 않습니다. 먼저 수동 로그인 후 쿠키를 저장해주세요.") + + driver.get(url) + with open(COOKIE_PATH, 'r', encoding='utf-8') as f: + cookies = json.load(f) + for cookie in cookies: + # domain 필드는 문제가 될 수 있으므로 제거 + cookie.pop('domain', None) + driver.add_cookie(cookie) diff --git a/lib/save_cookie_once.py b/lib/save_cookie_once.py new file mode 100644 index 0000000..ad49d77 --- /dev/null +++ b/lib/save_cookie_once.py @@ -0,0 +1,21 @@ +# 실행: python lib/save_cookie_once.py + +import os, sys +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +import time + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +from lib.cookie_util import save_cookies + +options = Options() +driver = webdriver.Chrome(options=options) + +driver.get('https://asp.upsolution.co.kr/') +print("브라우저가 열렸습니다. 로그인 완료 후 Enter를 누르세요...") +input() +save_cookies(driver) +print("✅ 쿠키 저장 완료: conf/cookies.json") + +driver.quit() diff --git a/lib/upsolution_crwaler.py b/lib/upsolution_crwaler.py new file mode 100644 index 0000000..6f3e55f --- /dev/null +++ b/lib/upsolution_crwaler.py @@ -0,0 +1,136 @@ +import os, sys +import time +import shutil +from datetime import datetime, timedelta +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) + +# 경로 설정 +BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) +DOWNLOAD_DIR = os.path.join(BASE_DIR, 'data') + +from lib.common import load_config, get_logger, wait_download_complete + + +# 💡 여기서 날짜 수동 설정 +START_DATE_STR = '2021-05-11' +END_DATE_STR = '2021-05-31' + +# datetime 객체로 변환 +start_date = datetime.strptime(START_DATE_STR, '%Y-%m-%d') +end_date = datetime.strptime(END_DATE_STR, '%Y-%m-%d') + +# 로거 설정 +logger = get_logger('upsolution') + +def login(driver, config): + logger.info("사이트 접속 및 로그인 시도") + driver.get("https://asp.upsolution.co.kr/") + + driver.find_element(By.XPATH, '/html/body/form/div/div/div[2]/div[2]/input').send_keys(config['upsolution']['id']) + driver.find_element(By.XPATH, '/html/body/form/div/div/div[2]/div[3]/input').send_keys(config['upsolution']['code']) + driver.find_element(By.XPATH, '/html/body/form/div/div/div[2]/div[4]/input').send_keys(config['upsolution']['pw']) + + login_btn_xpath = '/html/body/form/div/div/div[2]/div[6]/div/button' + + WebDriverWait(driver, 10).until( + EC.element_to_be_clickable((By.XPATH, login_btn_xpath)) + ) + driver.find_element(By.XPATH, login_btn_xpath).click() + + logger.info("로그인 버튼 클릭 완료") + +def set_date_range(driver, start_date, end_date): + logger.info(f"날짜 설정: {start_date.strftime('%Y-%m-%d')} ~ {end_date.strftime('%Y-%m-%d')}") + driver.get("https://asp.upsolution.co.kr/SalesReport/SalesByReceiptDetail/100482") + + WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/dl[1]/dd/span[1]/span/input')) + ) + + start_input = driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/dl[1]/dd/span[1]/span/input') + end_input = driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/dl[1]/dd/span[2]/span/input') + + start_input.clear() + start_input.send_keys(start_date.strftime('%Y-%m-%d')) + + end_input.clear() + end_input.send_keys(end_date.strftime('%Y-%m-%d')) + + # 조회 클릭 + driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/div/div/a[1]').click() + logger.info("조회 버튼 클릭") + time.sleep(2) + +def get_item_count(driver): + try: + count_text = driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[3]/div/div[4]/span').text + count = int(count_text.split('of')[1].split('items')[0].strip().replace(',', '')) + logger.info(f"아이템 수: {count}") + return count + except: + logger.warning("아이템 수를 가져오지 못함, 0으로 처리") + return 0 + +def download_excel(driver): + logger.info("엑셀 다운로드 버튼 클릭") + driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/div/div/a[2]').click() + +def move_and_rename_file(download_dir, start_date, end_date): + downloaded_file = wait_download_complete(download_dir, ".xls") + new_name = f"sales_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}.xls" + target_path = os.path.join(download_dir, new_name) + shutil.move(downloaded_file, target_path) + logger.info(f"[DONE] 파일 저장 완료: {target_path}") + +def main(): + config = load_config() + chrome_options = Options() + chrome_options.add_experimental_option('prefs', { + "download.default_directory": DOWNLOAD_DIR, + "download.prompt_for_download": False, + "safebrowsing.enabled": True + }) + + driver = webdriver.Chrome(options=chrome_options) + try: + login(driver, config) + + start = start_date + end = end_date + + while True: + set_date_range(driver, start, end) + count = get_item_count(driver) + + if count <= 20000: + download_excel(driver) + time.sleep(5) + move_and_rename_file(DOWNLOAD_DIR, start, end) + break + else: + diff_days = (end - start).days + logger.warning(f"{count}건 초과 → 날짜 범위 축소 필요 ({diff_days}일)") + if diff_days > 10: + end = start + timedelta(days=10) + elif diff_days > 5: + end = start + timedelta(days=5) + elif diff_days > 1: + end = start + timedelta(days=1) + else: + logger.info("이미 1일 범위, 바로 다운로드 시도") + download_excel(driver) + time.sleep(5) + move_and_rename_file(DOWNLOAD_DIR, start, end) + break + finally: + driver.quit() + logger.info("브라우저 종료") + +if __name__ == "__main__": + main() \ No newline at end of file