데이터가 정상적으로 들어가지 않는 부분 수정, 영수증번호와 품명, 수량이 동일한 경우 중복값으로 인식시키고 덮어씌우도록 수정함(일부 데이터가 중복 데이터가 존재)

This commit is contained in:
2025-07-29 15:49:08 +09:00
parent bf44f13a51
commit ac54673983
2 changed files with 50 additions and 48 deletions

View File

@ -1,7 +1,7 @@
# db_schema.py # db_schema.py
import os import os
import yaml import yaml
from sqlalchemy import Table, Column, Date, Integer, String, Float, Text, MetaData, UniqueConstraint, DateTime, Time, PrimaryKeyConstraint from sqlalchemy import Table, Column, Date, Integer, String, Float, Text, MetaData, UniqueConstraint, DateTime, Time, PrimaryKeyConstraint, Index
from sqlalchemy.sql import func from sqlalchemy.sql import func
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
@ -218,8 +218,8 @@ pos_ups_billdata = Table(
get_full_table_name('pos_ups_billdata'), metadata, get_full_table_name('pos_ups_billdata'), metadata,
Column('sale_date', DateTime, nullable=False), Column('sale_date', DateTime, nullable=False),
Column('shop_name', String(100), nullable=False), Column('shop_name', String(100), nullable=False),
Column('pos_no', Integer, nullable=False), Column('pos_no', String(20), nullable=False),
Column('bill_no', Integer, nullable=False), Column('bill_no', String(20), nullable=False),
Column('product_cd', String(20), nullable=False), Column('product_cd', String(20), nullable=False),
Column('ca01', String(50)), Column('ca01', String(50)),
Column('ca02', String(50)), Column('ca02', String(50)),
@ -235,9 +235,18 @@ pos_ups_billdata = Table(
Column('vat_amt', Integer), Column('vat_amt', Integer),
Column('cash_receipt', Integer), Column('cash_receipt', Integer),
Column('card', Integer), Column('card', Integer),
PrimaryKeyConstraint('sale_date', 'shop_name', 'pos_no', 'bill_no', 'product_cd') # PrimaryKeyConstraint 생략
mysql_engine='InnoDB',
mysql_charset='utf8mb4'
) )
# 인덱스 추가
Index('idx_sale_shop_pos_product', pos_ups_billdata.c.sale_date, pos_ups_billdata.c.shop_name, pos_ups_billdata.c.pos_no, pos_ups_billdata.c.product_cd)
Index('idx_category', pos_ups_billdata.c.ca01, pos_ups_billdata.c.ca02, pos_ups_billdata.c.ca03)
Index('idx_product_barcode', pos_ups_billdata.c.product_name, pos_ups_billdata.c.barcode)
pos_shop_name = Table( pos_shop_name = Table(
get_full_table_name('pos_shop_name'), metadata, get_full_table_name('pos_shop_name'), metadata,
Column('shop_cd', String(20), primary_key=True, nullable=False), Column('shop_cd', String(20), primary_key=True, nullable=False),

View File

@ -38,76 +38,69 @@ def load_excel_data(filepath: str):
df = df.dropna(subset=required_cols) df = df.dropna(subset=required_cols)
return df return df
def process_file(filepath: str, engine, session, table):
batch_size = 1000
inserted, updated, errors = 0, 0, 0
count = 0
def process_file(filepath: str, engine, session, table):
try: try:
df = load_excel_data(filepath) df = load_excel_data(filepath)
logger.info(f"[LOAD] {os.path.basename(filepath)} - {len(df)}") logger.info(f"[LOAD] {os.path.basename(filepath)} - {len(df)}")
for _, row in df.iterrows(): inserted, updated, errors = 0, 0, 0
try:
def safe_int(val, default=0):
try:
return int(val)
except Exception:
return default
for idx, row in df.iterrows():
data = None # 미리 초기화
try:
data = { data = {
"sale_date": pd.to_datetime(row["매출일시"]), "sale_date": pd.to_datetime(row["매출일시"]),
"shop_name": str(row.get("매장명", "")).strip(), "shop_name": str(row["매장명"]).strip(),
"pos_no": safe_int(row.get("포스번호")), "pos_no": str(row["포스"]).strip(),
"bill_no": safe_int(row.get("영수증 번호")), "bill_no": str(row["영수증 번호"]).strip(),
"product_cd": str(row.get("품목", "")).strip(), "product_cd": str(row["품목"]).strip(),
"ca01": nan_to_none(row.get("대분류")), "product_name": str(row["품목명"]).strip(),
"ca02": nan_to_none(row.get("중분류")), "qty": int(row["수량"]),
"ca03": nan_to_none(row.get("소분류")),
"product_name": str(row.get("품목명", "")).strip(), "ca01": nan_to_none(row.get("대분류", None)),
"barcode": nan_to_none(row.get("바코드")), "ca02": nan_to_none(row.get("중분류", None)),
"amt": safe_int(row.get("단가")), "ca03": nan_to_none(row.get("소분류", None)),
"qty": safe_int(row.get("수량")), "barcode": nan_to_none(row.get("바코드", None)),
"tot_sale_amt": safe_int(row.get("주문 금액")), "amt": int(row.get("단가", 0)),
"dc_amt": safe_int(row.get("할인 금액")), "tot_sale_amt": int(row.get("주문 금액", 0)),
"dcm_sale_amt": safe_int(row.get("공급가액")), "dc_amt": int(row.get("할인 금액", 0)),
"net_amt": safe_int(row.get("세금")), "dcm_sale_amt": int(row.get("공급가액", 0)),
"vat_amt": safe_int(row.get("부가세")), "vat_amt": int(row.get("세금", 0)),
"cash_receipt": safe_int(row.get("현금영수증")), "net_amt": int(row.get("결제 금액", 0)),
"card": safe_int(row.get("카드")), "cash_receipt": int(row.get("현금영수증", 0)),
"card": int(row.get("카드", 0)),
} }
stmt = mysql_insert(table).values(**data) stmt = mysql_insert(table).values(**data)
update_stmt = stmt.on_duplicate_key_update({ update_cols = {col.name: stmt.inserted[col.name] for col in table.columns
col.name: stmt.inserted[col.name] if col.name not in ['sale_date', 'shop_name', 'pos_no', 'bill_no', 'product_cd']}
for col in table.columns upsert_stmt = stmt.on_duplicate_key_update(update_cols)
if col.name not in ['sale_date', 'shop_name', 'pos_no', 'bill_no', 'product_cd']
}) result = session.execute(upsert_stmt)
result = session.execute(update_stmt)
if result.rowcount == 1: if result.rowcount == 1:
inserted += 1 inserted += 1
elif result.rowcount == 2: elif result.rowcount == 2:
updated += 1 updated += 1
count += 1
if count % batch_size == 0:
session.commit()
logger.info(f"[COMMIT] {count}건 처리 완료")
except Exception as e: except Exception as e:
logger.warning(f"[ERROR:ROW] {e}") if data is not None:
logger.warning(f"[ERROR:ROW] {e} / 데이터: {data}")
else:
logger.warning(f"[ERROR:ROW] {e} / 데이터가 생성되지 않음")
errors += 1 errors += 1
if (idx + 1) % 1000 == 0:
logger.info(f"[PROGRESS] {idx + 1} / {len(df)} 처리 중...")
session.commit() session.commit()
logger.info(f"[DONE] 삽입: {inserted}, 업데이트: {updated}, 오류: {errors}") logger.info(f"[DONE] 삽입: {inserted}, 업데이트: {updated}, 오류: {errors}")
shutil.move(filepath, os.path.join(FINISH_DIR, os.path.basename(filepath))) shutil.move(filepath, os.path.join(FINISH_DIR, os.path.basename(filepath)))
logger.info(f"[MOVE] 완료: {os.path.join(FINISH_DIR, os.path.basename(filepath))}") logger.info(f"[MOVE] 완료: {os.path.join(FINISH_DIR, os.path.basename(filepath))}")
except SQLAlchemyError as e:
logger.error(f"[FAIL] DB 처리 중 오류 발생 - 롤백: {e}")
session.rollback()
except Exception as e: except Exception as e:
logger.error(f"[FAIL] 파일 처리 중 오류 발생 - {e}") logger.error(f"[FAIL] 파일 처리 중 오류 발생 - {e}")
session.rollback() session.rollback()