upsolution pos 데이터 크롤링 - 클라우드플레어 캡차로 인해 실패

This commit is contained in:
2025-07-29 11:33:16 +09:00
parent d539ffa626
commit 39046f20a5
4 changed files with 188 additions and 0 deletions

View File

@ -1,6 +1,8 @@
# common.py
import os, yaml
import logging
import time
import glob
def load_config():
"""
@ -19,3 +21,11 @@ def get_logger(name):
logger.addHandler(handler)
logger.setLevel(logging.INFO)
return logger
def wait_download_complete(download_dir, ext, timeout=60):
for _ in range(timeout):
files = glob.glob(os.path.join(download_dir, f"*.{ext.strip('.')}"))
if files:
return files[0]
time.sleep(1)
raise TimeoutError("다운로드 대기 시간 초과")

21
lib/cookie_util.py Normal file
View File

@ -0,0 +1,21 @@
import json
import os
COOKIE_PATH = os.path.join(os.path.dirname(__file__), '..', 'conf', 'cookies.json')
def save_cookies(driver):
cookies = driver.get_cookies()
with open(COOKIE_PATH, 'w', encoding='utf-8') as f:
json.dump(cookies, f, indent=2)
def load_cookies(driver, url='https://asp.upsolution.co.kr/'):
if not os.path.exists(COOKIE_PATH):
raise FileNotFoundError("쿠키 파일이 존재하지 않습니다. 먼저 수동 로그인 후 쿠키를 저장해주세요.")
driver.get(url)
with open(COOKIE_PATH, 'r', encoding='utf-8') as f:
cookies = json.load(f)
for cookie in cookies:
# domain 필드는 문제가 될 수 있으므로 제거
cookie.pop('domain', None)
driver.add_cookie(cookie)

21
lib/save_cookie_once.py Normal file
View File

@ -0,0 +1,21 @@
# 실행: python lib/save_cookie_once.py
import os, sys
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from lib.cookie_util import save_cookies
options = Options()
driver = webdriver.Chrome(options=options)
driver.get('https://asp.upsolution.co.kr/')
print("브라우저가 열렸습니다. 로그인 완료 후 Enter를 누르세요...")
input()
save_cookies(driver)
print("✅ 쿠키 저장 완료: conf/cookies.json")
driver.quit()

136
lib/upsolution_crwaler.py Normal file
View File

@ -0,0 +1,136 @@
import os, sys
import time
import shutil
from datetime import datetime, timedelta
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
# 경로 설정
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
DOWNLOAD_DIR = os.path.join(BASE_DIR, 'data')
from lib.common import load_config, get_logger, wait_download_complete
# 💡 여기서 날짜 수동 설정
START_DATE_STR = '2021-05-11'
END_DATE_STR = '2021-05-31'
# datetime 객체로 변환
start_date = datetime.strptime(START_DATE_STR, '%Y-%m-%d')
end_date = datetime.strptime(END_DATE_STR, '%Y-%m-%d')
# 로거 설정
logger = get_logger('upsolution')
def login(driver, config):
logger.info("사이트 접속 및 로그인 시도")
driver.get("https://asp.upsolution.co.kr/")
driver.find_element(By.XPATH, '/html/body/form/div/div/div[2]/div[2]/input').send_keys(config['upsolution']['id'])
driver.find_element(By.XPATH, '/html/body/form/div/div/div[2]/div[3]/input').send_keys(config['upsolution']['code'])
driver.find_element(By.XPATH, '/html/body/form/div/div/div[2]/div[4]/input').send_keys(config['upsolution']['pw'])
login_btn_xpath = '/html/body/form/div/div/div[2]/div[6]/div/button'
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, login_btn_xpath))
)
driver.find_element(By.XPATH, login_btn_xpath).click()
logger.info("로그인 버튼 클릭 완료")
def set_date_range(driver, start_date, end_date):
logger.info(f"날짜 설정: {start_date.strftime('%Y-%m-%d')} ~ {end_date.strftime('%Y-%m-%d')}")
driver.get("https://asp.upsolution.co.kr/SalesReport/SalesByReceiptDetail/100482")
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/dl[1]/dd/span[1]/span/input'))
)
start_input = driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/dl[1]/dd/span[1]/span/input')
end_input = driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/dl[1]/dd/span[2]/span/input')
start_input.clear()
start_input.send_keys(start_date.strftime('%Y-%m-%d'))
end_input.clear()
end_input.send_keys(end_date.strftime('%Y-%m-%d'))
# 조회 클릭
driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/div/div/a[1]').click()
logger.info("조회 버튼 클릭")
time.sleep(2)
def get_item_count(driver):
try:
count_text = driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[3]/div/div[4]/span').text
count = int(count_text.split('of')[1].split('items')[0].strip().replace(',', ''))
logger.info(f"아이템 수: {count}")
return count
except:
logger.warning("아이템 수를 가져오지 못함, 0으로 처리")
return 0
def download_excel(driver):
logger.info("엑셀 다운로드 버튼 클릭")
driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/div/div/a[2]').click()
def move_and_rename_file(download_dir, start_date, end_date):
downloaded_file = wait_download_complete(download_dir, ".xls")
new_name = f"sales_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}.xls"
target_path = os.path.join(download_dir, new_name)
shutil.move(downloaded_file, target_path)
logger.info(f"[DONE] 파일 저장 완료: {target_path}")
def main():
config = load_config()
chrome_options = Options()
chrome_options.add_experimental_option('prefs', {
"download.default_directory": DOWNLOAD_DIR,
"download.prompt_for_download": False,
"safebrowsing.enabled": True
})
driver = webdriver.Chrome(options=chrome_options)
try:
login(driver, config)
start = start_date
end = end_date
while True:
set_date_range(driver, start, end)
count = get_item_count(driver)
if count <= 20000:
download_excel(driver)
time.sleep(5)
move_and_rename_file(DOWNLOAD_DIR, start, end)
break
else:
diff_days = (end - start).days
logger.warning(f"{count}건 초과 → 날짜 범위 축소 필요 ({diff_days}일)")
if diff_days > 10:
end = start + timedelta(days=10)
elif diff_days > 5:
end = start + timedelta(days=5)
elif diff_days > 1:
end = start + timedelta(days=1)
else:
logger.info("이미 1일 범위, 바로 다운로드 시도")
download_excel(driver)
time.sleep(5)
move_and_rename_file(DOWNLOAD_DIR, start, end)
break
finally:
driver.quit()
logger.info("브라우저 종료")
if __name__ == "__main__":
main()