데이터가 정상적으로 들어가지 않는 부분 수정, 영수증번호와 품명, 수량이 동일한 경우 중복값으로 인식시키고 덮어씌우도록 수정함(일부 데이터가 중복 데이터가 존재)

This commit is contained in:
2025-07-29 15:49:08 +09:00
parent bf44f13a51
commit ac54673983
2 changed files with 50 additions and 48 deletions

View File

@ -38,76 +38,69 @@ def load_excel_data(filepath: str):
df = df.dropna(subset=required_cols)
return df
def process_file(filepath: str, engine, session, table):
batch_size = 1000
inserted, updated, errors = 0, 0, 0
count = 0
def process_file(filepath: str, engine, session, table):
try:
df = load_excel_data(filepath)
logger.info(f"[LOAD] {os.path.basename(filepath)} - {len(df)}")
for _, row in df.iterrows():
try:
def safe_int(val, default=0):
try:
return int(val)
except Exception:
return default
inserted, updated, errors = 0, 0, 0
for idx, row in df.iterrows():
data = None # 미리 초기화
try:
data = {
"sale_date": pd.to_datetime(row["매출일시"]),
"shop_name": str(row.get("매장명", "")).strip(),
"pos_no": safe_int(row.get("포스번호")),
"bill_no": safe_int(row.get("영수증 번호")),
"product_cd": str(row.get("품목", "")).strip(),
"ca01": nan_to_none(row.get("대분류")),
"ca02": nan_to_none(row.get("중분류")),
"ca03": nan_to_none(row.get("소분류")),
"product_name": str(row.get("품목명", "")).strip(),
"barcode": nan_to_none(row.get("바코드")),
"amt": safe_int(row.get("단가")),
"qty": safe_int(row.get("수량")),
"tot_sale_amt": safe_int(row.get("주문 금액")),
"dc_amt": safe_int(row.get("할인 금액")),
"dcm_sale_amt": safe_int(row.get("공급가액")),
"net_amt": safe_int(row.get("세금")),
"vat_amt": safe_int(row.get("부가세")),
"cash_receipt": safe_int(row.get("현금영수증")),
"card": safe_int(row.get("카드")),
"shop_name": str(row["매장명"]).strip(),
"pos_no": str(row["포스"]).strip(),
"bill_no": str(row["영수증 번호"]).strip(),
"product_cd": str(row["품목"]).strip(),
"product_name": str(row["품목명"]).strip(),
"qty": int(row["수량"]),
"ca01": nan_to_none(row.get("대분류", None)),
"ca02": nan_to_none(row.get("중분류", None)),
"ca03": nan_to_none(row.get("소분류", None)),
"barcode": nan_to_none(row.get("바코드", None)),
"amt": int(row.get("단가", 0)),
"tot_sale_amt": int(row.get("주문 금액", 0)),
"dc_amt": int(row.get("할인 금액", 0)),
"dcm_sale_amt": int(row.get("공급가액", 0)),
"vat_amt": int(row.get("세금", 0)),
"net_amt": int(row.get("결제 금액", 0)),
"cash_receipt": int(row.get("현금영수증", 0)),
"card": int(row.get("카드", 0)),
}
stmt = mysql_insert(table).values(**data)
update_stmt = stmt.on_duplicate_key_update({
col.name: stmt.inserted[col.name]
for col in table.columns
if col.name not in ['sale_date', 'shop_name', 'pos_no', 'bill_no', 'product_cd']
})
result = session.execute(update_stmt)
update_cols = {col.name: stmt.inserted[col.name] for col in table.columns
if col.name not in ['sale_date', 'shop_name', 'pos_no', 'bill_no', 'product_cd']}
upsert_stmt = stmt.on_duplicate_key_update(update_cols)
result = session.execute(upsert_stmt)
if result.rowcount == 1:
inserted += 1
elif result.rowcount == 2:
updated += 1
count += 1
if count % batch_size == 0:
session.commit()
logger.info(f"[COMMIT] {count}건 처리 완료")
except Exception as e:
logger.warning(f"[ERROR:ROW] {e}")
if data is not None:
logger.warning(f"[ERROR:ROW] {e} / 데이터: {data}")
else:
logger.warning(f"[ERROR:ROW] {e} / 데이터가 생성되지 않음")
errors += 1
if (idx + 1) % 1000 == 0:
logger.info(f"[PROGRESS] {idx + 1} / {len(df)} 처리 중...")
session.commit()
logger.info(f"[DONE] 삽입: {inserted}, 업데이트: {updated}, 오류: {errors}")
shutil.move(filepath, os.path.join(FINISH_DIR, os.path.basename(filepath)))
logger.info(f"[MOVE] 완료: {os.path.join(FINISH_DIR, os.path.basename(filepath))}")
except SQLAlchemyError as e:
logger.error(f"[FAIL] DB 처리 중 오류 발생 - 롤백: {e}")
session.rollback()
except Exception as e:
logger.error(f"[FAIL] 파일 처리 중 오류 발생 - {e}")
session.rollback()