From 387aa2398f92670d6cb966687f5537c4531298ef Mon Sep 17 00:00:00 2001 From: KWON Date: Wed, 9 Jul 2025 17:35:00 +0900 Subject: [PATCH] =?UTF-8?q?=EA=B0=80=EC=A4=91=EC=B9=98=20=EB=B0=98?= =?UTF-8?q?=EC=98=81=ED=95=98=EC=97=AC=20=EA=B3=84=EC=82=B0=ED=95=98?= =?UTF-8?q?=EB=8F=84=EB=A1=9D=20=EC=88=98=EC=A0=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/weekly_visitor_forecast_prophet.py | 59 +++++++++++++++++--------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/lib/weekly_visitor_forecast_prophet.py b/lib/weekly_visitor_forecast_prophet.py index fab4c8b..e3d45ca 100644 --- a/lib/weekly_visitor_forecast_prophet.py +++ b/lib/weekly_visitor_forecast_prophet.py @@ -23,14 +23,22 @@ ga4 = db_schema.ga4_by_date weather = db_schema.weather air = db_schema.air +# config 불러오기 config = load_config() serviceKey = config['DATA_API']['serviceKey'] +weight_cfg = config.get('FORECAST_WEIGHT', {}) + VISITOR_CA = tuple(config['POS']['VISITOR_CA']) - +visitor_forecast_multiplier = weight_cfg.get('visitor_forecast_multiplier', 1.0) +minTa_weight = weight_cfg.get('minTa', 1.0) +maxTa_weight = weight_cfg.get('maxTa', 1.0) +sumRn_weight = weight_cfg.get('sumRn', 1.0) +avgRhm_weight = weight_cfg.get('avgRhm', 1.0) +pm25_weight = weight_cfg.get('pm25', 1.0) +is_holiday_weight = weight_cfg.get('is_holiday', 1.0) # --- 데이터 로딩 및 전처리 --- - def get_date_range(start_date, end_date): return pd.date_range(start_date, end_date).to_pydatetime().tolist() @@ -140,7 +148,19 @@ def prepare_prophet_df(df): return prophet_df def train_and_predict_prophet(prophet_df, forecast_days=7): + # 가중치 적용 - 훈련 데이터의 기상/환경 변수 컬럼별 곱하기 + prophet_df = prophet_df.copy() + prophet_df['minTa'] *= minTa_weight + prophet_df['maxTa'] *= maxTa_weight + prophet_df['sumRn'] *= sumRn_weight + prophet_df['avgRhm'] *= avgRhm_weight + prophet_df['pm25'] *= pm25_weight + prophet_df['is_holiday'] *= is_holiday_weight + + # 기존 fix_zero_visitors_weighted 함수 호출 (필요 시) prophet_df = fix_zero_visitors_weighted(prophet_df) + + # 결측치 처리 prophet_df.fillna({ 'minTa': 0, 'maxTa': 0, @@ -160,22 +180,22 @@ def train_and_predict_prophet(prophet_df, forecast_days=7): m.fit(prophet_df) future = m.make_future_dataframe(periods=forecast_days) - future_dates = future['ds'].dt.strftime('%Y%m%d').tolist() - weekly_precip = get_weekly_precip(serviceKey) # {'YYYYMMDD': {'sumRn': x, 'minTa': y, 'maxTa': z, 'avgRhm': w}, ...} + # 미래 데이터에 날씨 예보 값 가져와서 가중치 적용 + weekly_precip = get_weekly_precip(serviceKey) - # 미래 데이터에 강수량 및 기온/습도 반영 sumRn_list = [] minTa_list = [] maxTa_list = [] avgRhm_list = [] - for dt_str in future_dates: + for dt in future['ds']: + dt_str = dt.strftime('%Y%m%d') day_forecast = weekly_precip.get(dt_str, None) if day_forecast: - sumRn_list.append(float(day_forecast.get('sumRn', 0))) - minTa_list.append(float(day_forecast.get('minTa', 0))) - maxTa_list.append(float(day_forecast.get('maxTa', 0))) - avgRhm_list.append(float(day_forecast.get('avgRhm', 0))) + sumRn_list.append(float(day_forecast.get('sumRn', 0)) * sumRn_weight) + minTa_list.append(float(day_forecast.get('minTa', 0)) * minTa_weight) + maxTa_list.append(float(day_forecast.get('maxTa', 0)) * maxTa_weight) + avgRhm_list.append(float(day_forecast.get('avgRhm', 0)) * avgRhm_weight) else: sumRn_list.append(0) minTa_list.append(0) @@ -187,16 +207,21 @@ def train_and_predict_prophet(prophet_df, forecast_days=7): future['maxTa'] = maxTa_list future['avgRhm'] = avgRhm_list - # pm25는 과거 마지막 데이터 복사 + # pm25는 과거 마지막 데이터 * 가중치 적용 last_known = prophet_df.iloc[-1] - future['pm25'] = last_known['pm25'] + future['pm25'] = last_known['pm25'] * pm25_weight - # is_holiday 계산 - future['is_holiday'] = future['ds'].apply(lambda d: 1 if is_korean_holiday(d.date()) else 0) + # 휴일 여부도 가중치 곱해서 적용 + future['is_holiday'] = future['ds'].apply(lambda d: 1 if is_korean_holiday(d.date()) else 0) * is_holiday_weight forecast = m.predict(future) - # 예측 결과 저장 + # 최종 방문객 예측에 multiplier 곱하기 + forecast['yhat'] = (forecast['yhat'] * visitor_forecast_multiplier).round().astype(int) + forecast['yhat_lower'] = (forecast['yhat_lower'] * visitor_forecast_multiplier).round().astype(int) + forecast['yhat_upper'] = (forecast['yhat_upper'] * visitor_forecast_multiplier).round().astype(int) + + # csv 저장 및 반환 output_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data', 'prophet_result.csv')) os.makedirs(os.path.dirname(output_path), exist_ok=True) @@ -204,13 +229,9 @@ def train_and_predict_prophet(prophet_df, forecast_days=7): df_to_save.columns = ['date', 'visitor_forecast'] df_to_save['date'] = df_to_save['date'].dt.strftime("%Y-%m-%d") - # 오늘 날짜 이후 데이터만 필터링 today_str = date.today().strftime("%Y-%m-%d") df_to_save = df_to_save[df_to_save['date'] >= today_str] - # visitor_forecast를 정수로 변환 - df_to_save['visitor_forecast'] = df_to_save['visitor_forecast'].round().astype(int) - df_to_save.to_csv(output_path, index=False) return forecast