Files
static/lib/upsolution_crwaler.py

136 lines
5.1 KiB
Python

import os, sys
import time
import shutil
from datetime import datetime, timedelta
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
# 경로 설정
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
DOWNLOAD_DIR = os.path.join(BASE_DIR, 'data')
from lib.common import load_config, get_logger, wait_download_complete
# 💡 여기서 날짜 수동 설정
START_DATE_STR = '2021-05-11'
END_DATE_STR = '2021-05-31'
# datetime 객체로 변환
start_date = datetime.strptime(START_DATE_STR, '%Y-%m-%d')
end_date = datetime.strptime(END_DATE_STR, '%Y-%m-%d')
# 로거 설정
logger = get_logger('upsolution')
def login(driver, config):
logger.info("사이트 접속 및 로그인 시도")
driver.get("https://asp.upsolution.co.kr/")
driver.find_element(By.XPATH, '/html/body/form/div/div/div[2]/div[2]/input').send_keys(config['upsolution']['id'])
driver.find_element(By.XPATH, '/html/body/form/div/div/div[2]/div[3]/input').send_keys(config['upsolution']['code'])
driver.find_element(By.XPATH, '/html/body/form/div/div/div[2]/div[4]/input').send_keys(config['upsolution']['pw'])
login_btn_xpath = '/html/body/form/div/div/div[2]/div[6]/div/button'
WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, login_btn_xpath))
)
driver.find_element(By.XPATH, login_btn_xpath).click()
logger.info("로그인 버튼 클릭 완료")
def set_date_range(driver, start_date, end_date):
logger.info(f"날짜 설정: {start_date.strftime('%Y-%m-%d')} ~ {end_date.strftime('%Y-%m-%d')}")
driver.get("https://asp.upsolution.co.kr/SalesReport/SalesByReceiptDetail/100482")
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/dl[1]/dd/span[1]/span/input'))
)
start_input = driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/dl[1]/dd/span[1]/span/input')
end_input = driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/dl[1]/dd/span[2]/span/input')
start_input.clear()
start_input.send_keys(start_date.strftime('%Y-%m-%d'))
end_input.clear()
end_input.send_keys(end_date.strftime('%Y-%m-%d'))
# 조회 클릭
driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/div/div/a[1]').click()
logger.info("조회 버튼 클릭")
time.sleep(2)
def get_item_count(driver):
try:
count_text = driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[3]/div/div[4]/span').text
count = int(count_text.split('of')[1].split('items')[0].strip().replace(',', ''))
logger.info(f"아이템 수: {count}")
return count
except:
logger.warning("아이템 수를 가져오지 못함, 0으로 처리")
return 0
def download_excel(driver):
logger.info("엑셀 다운로드 버튼 클릭")
driver.find_element(By.XPATH, '/html/body/div[6]/div/div/div[11]/div[2]/div[2]/div/div/div/a[2]').click()
def move_and_rename_file(download_dir, start_date, end_date):
downloaded_file = wait_download_complete(download_dir, ".xls")
new_name = f"sales_{start_date.strftime('%Y%m%d')}_{end_date.strftime('%Y%m%d')}.xls"
target_path = os.path.join(download_dir, new_name)
shutil.move(downloaded_file, target_path)
logger.info(f"[DONE] 파일 저장 완료: {target_path}")
def main():
config = load_config()
chrome_options = Options()
chrome_options.add_experimental_option('prefs', {
"download.default_directory": DOWNLOAD_DIR,
"download.prompt_for_download": False,
"safebrowsing.enabled": True
})
driver = webdriver.Chrome(options=chrome_options)
try:
login(driver, config)
start = start_date
end = end_date
while True:
set_date_range(driver, start, end)
count = get_item_count(driver)
if count <= 20000:
download_excel(driver)
time.sleep(5)
move_and_rename_file(DOWNLOAD_DIR, start, end)
break
else:
diff_days = (end - start).days
logger.warning(f"{count}건 초과 → 날짜 범위 축소 필요 ({diff_days}일)")
if diff_days > 10:
end = start + timedelta(days=10)
elif diff_days > 5:
end = start + timedelta(days=5)
elif diff_days > 1:
end = start + timedelta(days=1)
else:
logger.info("이미 1일 범위, 바로 다운로드 시도")
download_excel(driver)
time.sleep(5)
move_and_rename_file(DOWNLOAD_DIR, start, end)
break
finally:
driver.quit()
logger.info("브라우저 종료")
if __name__ == "__main__":
main()