upsolution pos 데이터 크롤링 - 클라우드플레어 캡차로 인해 실패
This commit is contained in:
@ -1,6 +1,8 @@
|
||||
# common.py
|
||||
import os, yaml
|
||||
import logging
|
||||
import time
|
||||
import glob
|
||||
|
||||
def load_config():
|
||||
"""
|
||||
@ -19,3 +21,11 @@ def get_logger(name):
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel(logging.INFO)
|
||||
return logger
|
||||
|
||||
def wait_download_complete(download_dir, ext, timeout=60):
|
||||
for _ in range(timeout):
|
||||
files = glob.glob(os.path.join(download_dir, f"*.{ext.strip('.')}"))
|
||||
if files:
|
||||
return files[0]
|
||||
time.sleep(1)
|
||||
raise TimeoutError("다운로드 대기 시간 초과")
|
||||
21
lib/cookie_util.py
Normal file
21
lib/cookie_util.py
Normal file
@ -0,0 +1,21 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
COOKIE_PATH = os.path.join(os.path.dirname(__file__), '..', 'conf', 'cookies.json')
|
||||
|
||||
def save_cookies(driver):
|
||||
cookies = driver.get_cookies()
|
||||
with open(COOKIE_PATH, 'w', encoding='utf-8') as f:
|
||||
json.dump(cookies, f, indent=2)
|
||||
|
||||
def load_cookies(driver, url='https://asp.upsolution.co.kr/'):
|
||||
if not os.path.exists(COOKIE_PATH):
|
||||
raise FileNotFoundError("쿠키 파일이 존재하지 않습니다. 먼저 수동 로그인 후 쿠키를 저장해주세요.")
|
||||
|
||||
driver.get(url)
|
||||
with open(COOKIE_PATH, 'r', encoding='utf-8') as f:
|
||||
cookies = json.load(f)
|
||||
for cookie in cookies:
|
||||
# domain 필드는 문제가 될 수 있으므로 제거
|
||||
cookie.pop('domain', None)
|
||||
driver.add_cookie(cookie)
|
||||
21
lib/save_cookie_once.py
Normal file
21
lib/save_cookie_once.py
Normal file
@ -0,0 +1,21 @@
|
||||
# 실행: python lib/save_cookie_once.py
|
||||
|
||||
import os, sys
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
import time
|
||||
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
from lib.cookie_util import save_cookies
|
||||
|
||||
options = Options()
|
||||
driver = webdriver.Chrome(options=options)
|
||||
|
||||
driver.get('https://asp.upsolution.co.kr/')
|
||||
print("브라우저가 열렸습니다. 로그인 완료 후 Enter를 누르세요...")
|
||||
input()
|
||||
save_cookies(driver)
|
||||
print("✅ 쿠키 저장 완료: conf/cookies.json")
|
||||
|
||||
driver.quit()
|
||||
136
lib/upsolution_crwaler.py
Normal file
136
lib/upsolution_crwaler.py
Normal file
@ -0,0 +1,136 @@
|
||||
import os, sys
|
||||
import time
|
||||
import shutil
|
||||
from datetime import datetime, timedelta
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
# 경로 설정
|
||||
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
DOWNLOAD_DIR = os.path.join(BASE_DIR, 'data')
|
||||
|
||||
from lib.common import load_config, get_logger, wait_download_complete
|
||||
|
||||
|
||||
# 💡 여기서 날짜 수동 설정
|
||||
START_DATE_STR = '2021-05-11'
|
||||
END_DATE_STR = '2021-05-31'
|
||||
|
||||
# datetime 객체로 변환
|
||||
start_date = datetime.strptime(START_DATE_STR, '%Y-%m-%d')
|
||||
end_date = datetime.strptime(END_DATE_STR, '%Y-%m-%d')
|
||||
|
||||
# 로거 설정
|
||||
logger = get_logger('upsolution')
|
||||
|
||||
def login(driver, config):
|
||||
logger.info("사이트 접속 및 로그인 시도")
|
||||
driver.get("https://asp.upsolution.co.kr/")
|
||||
|
||||
driver.find_element(By.XPATH, '/html/body/form/div/div/div[2]/div[2]/input').send_keys(config['upsolution']['id'])
|
||||
driver.find_element(By.XPATH, '/html/body/form/div/div/div[2]/div[3]/input').send_keys(config['upsolution']['code'])
|
||||
driver.find_element(By.XPATH, '/html/body/form/div/div/div[2]/div[4]/input').send_keys(config['upsolution']['pw'])
|
||||
|
||||
login_btn_xpath = '/html/body/form/div/div/div[2]/div[6]/div/button'
|
||||
|
||||
WebDriverWait(driver, 10).until(
|
||||
EC.element_to_be_clickable((By.XPATH, login_btn_xpath))
|
||||
)
|
||||
driver.find_element(By.XPATH, login_btn_xpath).click()
|
||||
|
||||
logger.info("로그인 버튼 클릭 완료")
|
||||
|
||||
def set_date_range(driver, start_date, end_date):
|
||||
logger.info(f"날짜 설정: {start_date.strftime('%Y-%m-%d')} ~ {end_date.strftime('%Y-%m-%d')}")
|
||||
driver.get("https://asp.upsolution.co.kr/SalesReport/SalesByReceiptDetail/100482")
|
||||
|
||||
WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/dl[1]/dd/span[1]/span/input'))
|
||||
)
|
||||
|
||||
start_input = driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/dl[1]/dd/span[1]/span/input')
|
||||
end_input = driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/dl[1]/dd/span[2]/span/input')
|
||||
|
||||
start_input.clear()
|
||||
start_input.send_keys(start_date.strftime('%Y-%m-%d'))
|
||||
|
||||
end_input.clear()
|
||||
end_input.send_keys(end_date.strftime('%Y-%m-%d'))
|
||||
|
||||
# 조회 클릭
|
||||
driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/div/div/a[1]').click()
|
||||
logger.info("조회 버튼 클릭")
|
||||
time.sleep(2)
|
||||
|
||||
def get_item_count(driver):
|
||||
try:
|
||||
count_text = driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[3]/div/div[4]/span').text
|
||||
count = int(count_text.split('of')[1].split('items')[0].strip().replace(',', ''))
|
||||
logger.info(f"아이템 수: {count}")
|
||||
return count
|
||||
except:
|
||||
logger.warning("아이템 수를 가져오지 못함, 0으로 처리")
|
||||
return 0
|
||||
|
||||
def download_excel(driver):
|
||||
logger.info("엑셀 다운로드 버튼 클릭")
|
||||
driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/div/div/a[2]').click()
|
||||
|
||||
def move_and_rename_file(download_dir, start_date, end_date):
|
||||
downloaded_file = wait_download_complete(download_dir, ".xls")
|
||||
new_name = f"sales_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}.xls"
|
||||
target_path = os.path.join(download_dir, new_name)
|
||||
shutil.move(downloaded_file, target_path)
|
||||
logger.info(f"[DONE] 파일 저장 완료: {target_path}")
|
||||
|
||||
def main():
|
||||
config = load_config()
|
||||
chrome_options = Options()
|
||||
chrome_options.add_experimental_option('prefs', {
|
||||
"download.default_directory": DOWNLOAD_DIR,
|
||||
"download.prompt_for_download": False,
|
||||
"safebrowsing.enabled": True
|
||||
})
|
||||
|
||||
driver = webdriver.Chrome(options=chrome_options)
|
||||
try:
|
||||
login(driver, config)
|
||||
|
||||
start = start_date
|
||||
end = end_date
|
||||
|
||||
while True:
|
||||
set_date_range(driver, start, end)
|
||||
count = get_item_count(driver)
|
||||
|
||||
if count <= 20000:
|
||||
download_excel(driver)
|
||||
time.sleep(5)
|
||||
move_and_rename_file(DOWNLOAD_DIR, start, end)
|
||||
break
|
||||
else:
|
||||
diff_days = (end - start).days
|
||||
logger.warning(f"{count}건 초과 → 날짜 범위 축소 필요 ({diff_days}일)")
|
||||
if diff_days > 10:
|
||||
end = start + timedelta(days=10)
|
||||
elif diff_days > 5:
|
||||
end = start + timedelta(days=5)
|
||||
elif diff_days > 1:
|
||||
end = start + timedelta(days=1)
|
||||
else:
|
||||
logger.info("이미 1일 범위, 바로 다운로드 시도")
|
||||
download_excel(driver)
|
||||
time.sleep(5)
|
||||
move_and_rename_file(DOWNLOAD_DIR, start, end)
|
||||
break
|
||||
finally:
|
||||
driver.quit()
|
||||
logger.info("브라우저 종료")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user