feat: initial commit - unified FGTools from static, weather, mattermost-noti

2025-12-31 09:56:37 +09:00
commit 4ff5dba4b1
29 changed files with 5786 additions and 0 deletions
--- a/services/analytics/init.py
+++ b/services/analytics/init.py
@ -0,0 +1,18 @@
+# ===================================================================
+# services/analytics/__init__.py
+# 분석 서비스 패키지 초기화
+# ===================================================================
+# GA4, 대기질, 방문객 예측 등 분석 관련 서비스를 제공합니다.
+# ===================================================================
+
+from .ga4 import GA4Client, GA4DataCollector
+from .air_quality import AirQualityCollector, get_air_quality
+from .visitor_forecast import VisitorForecaster
+
+__all__ = [
+    'GA4Client',
+    'GA4DataCollector',
+    'AirQualityCollector',
+    'get_air_quality',
+    'VisitorForecaster',
+]
--- a/services/analytics/air_quality.py
+++ b/services/analytics/air_quality.py
@ -0,0 +1,426 @@
+# ===================================================================
+# services/analytics/air_quality.py
+# 대기질 데이터 수집 서비스 모듈
+# ===================================================================
+# 한국환경공단 API를 통해 대기질(미세먼지) 데이터를 수집합니다.
+# 측정소별 PM2.5, PM10, SO2, CO, NO2, O3 데이터를 저장합니다.
+# ===================================================================
+"""
+대기질 데이터 수집 서비스 모듈
+
+한국환경공단 공공데이터 API를 통해 대기질 데이터를 수집합니다.
+측정소별 일평균 대기오염물질 농도를 조회할 수 있습니다.
+
+사용 예시:
+    from services.analytics.air_quality import AirQualityCollector, get_air_quality
+    
+    # 간단한 데이터 조회
+    data = get_air_quality(service_key, '운정', '20240101', '20240131')
+    
+    # 자동 데이터 수집 및 저장
+    collector = AirQualityCollector(config, engine, table)
+    collector.run()
+"""
+
+import os
+import json
+import traceback
+from datetime import datetime, timedelta, date
+from typing import Dict, List, Optional, Any
+
+from sqlalchemy import select, func, and_, Table
+from sqlalchemy.dialects.mysql import insert as mysql_insert
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.engine import Engine, Connection
+
+from core.logging_utils import get_logger
+from core.http_client import create_retry_session
+from core.config import get_config
+
+logger = get_logger(__name__)
+
+# API URL
+AIR_QUALITY_API_URL = "http://apis.data.go.kr/B552584/ArpltnStatsSvc/getMsrstnAcctoRDyrg"
+
+
+def get_air_quality(
+    service_key: str,
+    station_name: str,
+    start_date: str,
+    end_date: str,
+    num_of_rows: int = 100,
+    page_no: int = 1
+) -> List[Dict]:
+    """
+    대기질 데이터 조회
+    
+    한국환경공단 API를 호출하여 측정소별 대기질 데이터를 조회합니다.
+    
+    Args:
+        service_key: 공공데이터포털 API 키
+        station_name: 측정소명 (예: '운정', '서울')
+        start_date: 시작 날짜 (YYYYMMDD)
+        end_date: 종료 날짜 (YYYYMMDD)
+        num_of_rows: 페이지당 결과 수
+        page_no: 페이지 번호
+    
+    Returns:
+        대기질 데이터 리스트
+    
+    데이터 항목:
+        - msurDt: 측정일 (YYYY-MM-DD)
+        - pm25Value: 초미세먼지 농도 (㎍/㎥)
+        - pm10Value: 미세먼지 농도 (㎍/㎥)
+        - so2Value: 아황산가스 농도 (ppm)
+        - coValue: 일산화탄소 농도 (ppm)
+        - no2Value: 이산화질소 농도 (ppm)
+        - o3Value: 오존 농도 (ppm)
+    """
+    params = {
+        'serviceKey': service_key,
+        'returnType': 'json',
+        'numOfRows': str(num_of_rows),
+        'pageNo': str(page_no),
+        'inqBginDt': start_date,
+        'inqEndDt': end_date,
+        'msrstnName': station_name,
+    }
+    
+    session = create_retry_session(retries=3)
+    
+    try:
+        response = session.get(AIR_QUALITY_API_URL, params=params, timeout=20)
+        response.raise_for_status()
+        data = response.json()
+        
+        items = data.get('response', {}).get('body', {}).get('items', [])
+        logger.debug(f"대기질 데이터 조회: {station_name}, {len(items)}건")
+        return items if items else []
+    
+    except Exception as e:
+        logger.error(f"대기질 API 요청 실패: {e}")
+        traceback.print_exc()
+        return []
+    finally:
+        session.close()
+
+
+class AirQualityCollector:
+    """
+    대기질 데이터 자동 수집기
+    
+    설정에 따라 대기질 데이터를 자동으로 수집하고 DB에 저장합니다.
+    
+    Attributes:
+        api_key: API 서비스 키
+        station_list: 측정소 목록
+        engine: SQLAlchemy 엔진
+        table: 대상 테이블
+        start_date: 수집 시작일
+        force_update: 강제 업데이트 여부
+        debug: 디버그 모드
+    """
+    
+    # 캐시 파일 경로
+    CACHE_FILE = 'cache/air_num_rows.json'
+    
+    def __init__(
+        self,
+        engine: Engine,
+        table: Table,
+        api_key: Optional[str] = None,
+        station_list: Optional[List[str]] = None,
+        start_date: Optional[str] = None,
+        force_update: bool = False,
+        debug: bool = False
+    ):
+        """
+        Args:
+            engine: SQLAlchemy 엔진
+            table: 대상 테이블
+            api_key: API 키 (None이면 설정에서 로드)
+            station_list: 측정소 목록 (None이면 설정에서 로드)
+            start_date: 수집 시작일 (YYYYMMDD)
+            force_update: 기존 데이터 덮어쓰기 여부
+            debug: 디버그 모드
+        """
+        config = get_config()
+        
+        self.api_key = api_key or config.data_api.get('service_key', '')
+        self.station_list = station_list or config.data_api.get('air_stations', ['운정'])
+        
+        if start_date:
+            self.start_date = datetime.strptime(start_date, '%Y%m%d').date()
+        else:
+            self.start_date = datetime.strptime(
+                config.data_api.get('start_date', '20170101'), '%Y%m%d'
+            ).date()
+        
+        self.engine = engine
+        self.table = table
+        self.force_update = force_update
+        self.debug = debug
+        
+        self.yesterday = (datetime.now() - timedelta(days=1)).date()
+        self.session = create_retry_session(retries=3)
+        
+        # 캐시 로드
+        self._num_rows_cache = self._load_cache()
+    
+    def _load_cache(self) -> Dict:
+        """캐시 파일 로드"""
+        try:
+            cache_path = os.path.join(
+                os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
+                self.CACHE_FILE
+            )
+            if os.path.exists(cache_path):
+                with open(cache_path, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+        except Exception:
+            pass
+        return {}
+    
+    def _save_cache(self):
+        """캐시 파일 저장"""
+        try:
+            cache_path = os.path.join(
+                os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
+                self.CACHE_FILE
+            )
+            os.makedirs(os.path.dirname(cache_path), exist_ok=True)
+            with open(cache_path, 'w', encoding='utf-8') as f:
+                json.dump(self._num_rows_cache, f, ensure_ascii=False, indent=2)
+        except Exception as e:
+            logger.warning(f"캐시 저장 실패: {e}")
+    
+    def get_latest_date(self, conn: Connection, station: str) -> Optional[date]:
+        """
+        특정 측정소의 가장 최근 저장 날짜 조회
+        
+        Args:
+            conn: DB 연결
+            station: 측정소명
+        
+        Returns:
+            최근 날짜 또는 None
+        """
+        try:
+            stmt = select(func.max(self.table.c.date)).where(
+                self.table.c.station == station
+            )
+            result = conn.execute(stmt).scalar()
+            return result
+        except Exception as e:
+            logger.error(f"최근 날짜 조회 실패: {e}")
+            return None
+    
+    def parse_item_to_record(self, item: Dict, station: str) -> Optional[Dict]:
+        """
+        API 응답 아이템을 DB 레코드로 변환
+        
+        Args:
+            item: API 응답 아이템
+            station: 측정소명
+        
+        Returns:
+            DB 레코드 딕셔너리 또는 None
+        """
+        try:
+            item_date = datetime.strptime(item['msurDt'], '%Y-%m-%d').date()
+        except Exception as e:
+            logger.warning(f"날짜 파싱 오류: {item.get('msurDt')} - {e}")
+            return None
+        
+        def safe_float(val):
+            """안전한 float 변환"""
+            try:
+                return float(val) if val else None
+            except (ValueError, TypeError):
+                return None
+        
+        return {
+            'date': item_date,
+            'station': station,
+            'pm25': safe_float(item.get('pm25Value')),
+            'pm10': safe_float(item.get('pm10Value')),
+            'so2': safe_float(item.get('so2Value')),
+            'co': safe_float(item.get('coValue')),
+            'no2': safe_float(item.get('no2Value')),
+            'o3': safe_float(item.get('o3Value')),
+        }
+    
+    def save_items_to_db(
+        self,
+        items: List[Dict],
+        conn: Connection,
+        station: str
+    ) -> int:
+        """
+        데이터 항목들을 DB에 저장
+        
+        Args:
+            items: 저장할 데이터 리스트
+            conn: DB 연결
+            station: 측정소명
+        
+        Returns:
+            저장된 레코드 수
+        """
+        saved_count = 0
+        
+        for item in items:
+            data = self.parse_item_to_record(item, station)
+            if not data:
+                continue
+            
+            item_date = data['date']
+            
+            if self.debug:
+                logger.debug(f"[DEBUG] {item_date} [{station}] 저장 시도: {data}")
+                continue
+            
+            try:
+                if self.force_update:
+                    # UPSERT
+                    stmt = mysql_insert(self.table).values(**data)
+                    stmt = stmt.on_duplicate_key_update(**data)
+                    conn.execute(stmt)
+                    logger.info(f"{item_date} [{station}] 저장/업데이트 완료")
+                else:
+                    # 중복 확인 후 삽입
+                    sel = select(self.table.c.date).where(
+                        and_(
+                            self.table.c.date == item_date,
+                            self.table.c.station == station
+                        )
+                    )
+                    if conn.execute(sel).fetchone():
+                        logger.debug(f"{item_date} [{station}] 이미 존재, 생략")
+                        continue
+                    
+                    conn.execute(self.table.insert().values(**data))
+                    logger.info(f"{item_date} [{station}] 저장 완료")
+                
+                saved_count += 1
+            
+            except IntegrityError as e:
+                logger.error(f"중복 오류: {e}")
+            except Exception as e:
+                logger.error(f"저장 실패: {e}")
+                traceback.print_exc()
+        
+        return saved_count
+    
+    def find_optimal_num_rows(self, station_name: str, date_str: str) -> int:
+        """
+        최적의 numOfRows 파라미터 값 탐색
+        
+        API 서버 상태에 따라 최대 허용 rows 수가 다를 수 있어
+        적절한 값을 탐색합니다.
+        
+        Args:
+            station_name: 측정소명
+            date_str: 날짜 (YYYYMMDD)
+        
+        Returns:
+            최적의 numOfRows 값 (100~1000)
+        """
+        # 캐시 확인
+        cache_key = f"{station_name}:{date_str}"
+        if cache_key in self._num_rows_cache:
+            cached_val = int(self._num_rows_cache[cache_key])
+            logger.debug(f"캐시된 numOfRows 사용: {cached_val}")
+            return cached_val
+        
+        # 점진적으로 감소하며 테스트
+        max_rows = 1000
+        min_rows = 100
+        
+        while max_rows >= min_rows:
+            try:
+                params = {
+                    'serviceKey': self.api_key,
+                    'returnType': 'json',
+                    'numOfRows': str(max_rows),
+                    'pageNo': '1',
+                    'inqBginDt': date_str,
+                    'inqEndDt': date_str,
+                    'msrstnName': station_name,
+                }
+                response = self.session.get(AIR_QUALITY_API_URL, params=params, timeout=20)
+                response.raise_for_status()
+                response.json()  # JSON 파싱 테스트
+                
+                # 성공 - 캐시에 저장
+                self._num_rows_cache[cache_key] = max_rows
+                self._save_cache()
+                
+                logger.debug(f"numOfRows 최대값: {max_rows}")
+                return max_rows
+            
+            except Exception as e:
+                logger.warning(f"numOfRows={max_rows} 실패: {e}, 재시도...")
+                max_rows -= 100
+        
+        logger.warning("기본값 100 사용")
+        return 100
+    
+    def run(self) -> int:
+        """
+        데이터 수집 및 저장 실행
+        
+        모든 측정소에 대해 데이터를 수집하고 DB에 저장합니다.
+        
+        Returns:
+            총 저장된 레코드 수
+        """
+        total_saved = 0
+        
+        with self.engine.begin() as conn:
+            for station_name in self.station_list:
+                logger.info(f"측정소 처리 시작: {station_name}")
+                
+                # 시작일 결정
+                latest_date = self.get_latest_date(conn, station_name)
+                if latest_date:
+                    start_date = latest_date + timedelta(days=1)
+                else:
+                    start_date = self.start_date
+                
+                if start_date > self.yesterday:
+                    logger.info(f"{station_name}: 최신 데이터 존재 ({latest_date})")
+                    continue
+                
+                # 최적 numOfRows 탐색
+                optimal_rows = self.find_optimal_num_rows(
+                    station_name,
+                    start_date.strftime('%Y%m%d')
+                )
+                
+                # 청크 단위로 데이터 수집
+                current_start = start_date
+                while current_start <= self.yesterday:
+                    current_end = min(
+                        current_start + timedelta(days=optimal_rows - 1),
+                        self.yesterday
+                    )
+                    
+                    logger.info(f"{station_name}: {current_start} ~ {current_end} 수집")
+                    
+                    items = get_air_quality(
+                        self.api_key,
+                        station_name,
+                        current_start.strftime('%Y%m%d'),
+                        current_end.strftime('%Y%m%d'),
+                        num_of_rows=optimal_rows
+                    )
+                    
+                    if items:
+                        saved = self.save_items_to_db(items, conn, station_name)
+                        total_saved += saved
+                    
+                    current_start = current_end + timedelta(days=1)
+        
+        logger.info(f"대기질 데이터 수집 완료: 총 {total_saved}건 저장")
+        return total_saved
--- a/services/analytics/ga4.py
+++ b/services/analytics/ga4.py
@ -0,0 +1,401 @@
+# ===================================================================
+# services/analytics/ga4.py
+# Google Analytics 4 데이터 수집 서비스 모듈
+# ===================================================================
+# GA4 API를 통해 웹사이트 방문자 데이터를 수집하고 DB에 저장합니다.
+# 병렬 처리를 통해 대량 데이터 수집 성능을 최적화합니다.
+# ===================================================================
+"""
+Google Analytics 4 데이터 수집 서비스 모듈
+
+GA4 Data API를 사용하여 웹사이트 분석 데이터를 수집합니다.
+일별 세션, 사용자, 이벤트 등 다양한 메트릭을 조회할 수 있습니다.
+
+사용 예시:
+    from services.analytics.ga4 import GA4Client, GA4DataCollector
+    
+    # 간단한 데이터 조회
+    client = GA4Client(property_id, service_account_file)
+    data = client.get_daily_sessions('2024-01-01', '2024-01-31')
+    
+    # 자동 데이터 수집 및 저장
+    collector = GA4DataCollector(config)
+    collector.collect_and_save()
+"""
+
+import os
+import traceback
+from datetime import datetime, timedelta, date
+from typing import Dict, List, Optional, Tuple, Any
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+from dateutil.parser import parse as parse_date
+from sqlalchemy.dialects.mysql import insert as mysql_insert
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy import select, func, Table
+
+from core.logging_utils import get_logger
+from core.config import get_config
+
+logger = get_logger(__name__)
+
+# GA4 라이브러리 임포트 (설치 필요)
+try:
+    from google.analytics.data import BetaAnalyticsDataClient
+    from google.analytics.data_v1beta.types import (
+        DateRange, Dimension, Metric, RunReportRequest
+    )
+    GA4_AVAILABLE = True
+except ImportError:
+    GA4_AVAILABLE = False
+    logger.warning("google-analytics-data 패키지가 설치되지 않았습니다.")
+
+
+class GA4Client:
+    """
+    Google Analytics 4 API 클라이언트
+    
+    GA4 Data API를 통해 리포트 데이터를 조회합니다.
+    
+    Attributes:
+        property_id: GA4 속성 ID
+        client: BetaAnalyticsDataClient 인스턴스
+        max_rows: API 요청당 최대 행 수
+    """
+    
+    def __init__(
+        self,
+        property_id: int,
+        service_account_file: Optional[str] = None,
+        max_rows: int = 10000
+    ):
+        """
+        Args:
+            property_id: GA4 속성 ID
+            service_account_file: 서비스 계정 JSON 파일 경로
+            max_rows: 요청당 최대 행 수
+        
+        Raises:
+            ImportError: google-analytics-data 패키지 미설치 시
+            Exception: 인증 실패 시
+        """
+        if not GA4_AVAILABLE:
+            raise ImportError(
+                "GA4 기능을 사용하려면 google-analytics-data 패키지를 설치하세요: "
+                "pip install google-analytics-data"
+            )
+        
+        self.property_id = property_id
+        self.max_rows = max_rows
+        
+        # 서비스 계정 인증 설정
+        if service_account_file:
+            os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = service_account_file
+            logger.info(f"GA4 클라이언트 초기화 - 인증파일: {service_account_file}")
+        
+        try:
+            self.client = BetaAnalyticsDataClient()
+            logger.info("GA4 클라이언트 초기화 완료")
+        except Exception as e:
+            logger.error(f"GA4 클라이언트 초기화 실패: {e}")
+            traceback.print_exc()
+            raise
+    
+    def run_report(
+        self,
+        start_date: str,
+        end_date: str,
+        dimensions: List[str],
+        metrics: List[str],
+        limit: Optional[int] = None
+    ) -> Optional[Any]:
+        """
+        GA4 리포트 실행
+        
+        Args:
+            start_date: 시작 날짜 (YYYY-MM-DD)
+            end_date: 종료 날짜 (YYYY-MM-DD)
+            dimensions: 차원 목록 (예: ['date', 'city'])
+            metrics: 메트릭 목록 (예: ['sessions', 'activeUsers'])
+            limit: 결과 제한 (None이면 max_rows 사용)
+        
+        Returns:
+            RunReportResponse 또는 None (실패 시)
+        """
+        if limit is None:
+            limit = self.max_rows
+        
+        logger.debug(f"GA4 리포트 요청: {start_date} ~ {end_date}, dims={dimensions}, metrics={metrics}")
+        
+        try:
+            request = RunReportRequest(
+                property=f"properties/{self.property_id}",
+                dimensions=[Dimension(name=d) for d in dimensions],
+                metrics=[Metric(name=m) for m in metrics],
+                date_ranges=[DateRange(start_date=start_date, end_date=end_date)],
+                limit=limit,
+            )
+            response = self.client.run_report(request)
+            logger.info(f"GA4 리포트 응답: {len(response.rows)}건")
+            return response
+        
+        except Exception as e:
+            logger.error(f"GA4 리포트 요청 실패: {e}")
+            traceback.print_exc()
+            return None
+    
+    def get_daily_sessions(
+        self,
+        start_date: str,
+        end_date: str
+    ) -> List[Dict]:
+        """
+        일별 세션 데이터 조회
+        
+        Args:
+            start_date: 시작 날짜 (YYYY-MM-DD)
+            end_date: 종료 날짜 (YYYY-MM-DD)
+        
+        Returns:
+            일별 세션 데이터 리스트
+            [{'date': date, 'sessions': int, 'activeUsers': int}, ...]
+        """
+        response = self.run_report(
+            start_date=start_date,
+            end_date=end_date,
+            dimensions=['date'],
+            metrics=['sessions', 'activeUsers']
+        )
+        
+        if response is None:
+            return []
+        
+        result = []
+        for row in response.rows:
+            date_str = row.dimension_values[0].value
+            result.append({
+                'date': datetime.strptime(date_str, "%Y%m%d").date(),
+                'sessions': int(row.metric_values[0].value),
+                'activeUsers': int(row.metric_values[1].value)
+            })
+        
+        return result
+    
+    def detect_max_rows(self) -> int:
+        """
+        API에서 지원하는 최대 행 수 감지
+        
+        Returns:
+            최대 행 수 (감지 실패 시 기본값 10000)
+        """
+        try:
+            request = RunReportRequest(
+                property=f"properties/{self.property_id}",
+                dimensions=[Dimension(name="date")],
+                metrics=[Metric(name="sessions")],
+                date_ranges=[DateRange(start_date="2024-01-01", end_date="2024-12-31")],
+                limit=100000
+            )
+            response = self.client.run_report(request)
+            n_rows = len(response.rows)
+            logger.info(f"최대 행 수 감지: {n_rows}")
+            return n_rows
+        except Exception as e:
+            logger.warning(f"최대 행 수 감지 실패: {e}")
+            return 10000
+
+
+class GA4DataCollector:
+    """
+    GA4 데이터 자동 수집기
+    
+    설정에 따라 GA4 데이터를 자동으로 수집하고 DB에 저장합니다.
+    
+    Attributes:
+        client: GA4Client 인스턴스
+        engine: SQLAlchemy 엔진
+        table: 대상 테이블
+        force_update: 강제 업데이트 여부
+        debug: 디버그 모드
+    """
+    
+    def __init__(
+        self,
+        engine,
+        table: Table,
+        property_id: Optional[int] = None,
+        service_account_file: Optional[str] = None,
+        force_update: bool = False,
+        debug: bool = False
+    ):
+        """
+        Args:
+            engine: SQLAlchemy 엔진
+            table: 대상 테이블
+            property_id: GA4 속성 ID (None이면 설정에서 로드)
+            service_account_file: 서비스 계정 파일 (None이면 설정에서 로드)
+            force_update: 기존 데이터 덮어쓰기 여부
+            debug: 디버그 모드
+        """
+        config = get_config()
+        
+        if property_id is None:
+            property_id = config.ga4.get('property_id')
+        if service_account_file is None:
+            service_account_file = config.ga4.get('service_account_file')
+        
+        self.client = GA4Client(property_id, service_account_file)
+        self.engine = engine
+        self.table = table
+        self.force_update = force_update
+        self.debug = debug
+        
+        # 설정에서 날짜 범위 로드
+        self.config_start_date = datetime.strptime(
+            config.ga4.get('start_date', '20170101'), '%Y%m%d'
+        ).date()
+        self.config_end_date = datetime.strptime(
+            config.ga4.get('end_date', '20991231'), '%Y%m%d'
+        ).date()
+    
+    def get_latest_date_from_db(self) -> Optional[date]:
+        """DB에서 가장 최근 저장된 날짜 조회"""
+        with self.engine.connect() as conn:
+            stmt = select(func.max(self.table.c.date))
+            result = conn.execute(stmt).scalar()
+            logger.info(f"DB 기준 마지막 저장 날짜: {result}")
+            return result
+    
+    def determine_date_range(self) -> Tuple[date, date]:
+        """
+        수집할 날짜 범위 결정
+        
+        Returns:
+            (시작일, 종료일) 튜플
+        """
+        yesterday = datetime.now().date() - timedelta(days=1)
+        actual_end = min(yesterday, self.config_end_date)
+        
+        if self.force_update:
+            actual_start = self.config_start_date
+        else:
+            latest_db_date = self.get_latest_date_from_db()
+            if latest_db_date is not None:
+                actual_start = latest_db_date + timedelta(days=1)
+            else:
+                actual_start = self.config_start_date
+        
+        return actual_start, actual_end
+    
+    def save_response_to_db(
+        self,
+        response,
+        dimension_names: List[str],
+        metric_names: List[str]
+    ) -> int:
+        """
+        GA4 응답 데이터를 DB에 저장
+        
+        Args:
+            response: GA4 RunReportResponse
+            dimension_names: 차원 이름 목록
+            metric_names: 메트릭 이름 목록
+        
+        Returns:
+            저장된 레코드 수
+        """
+        if response is None:
+            return 0
+        
+        saved_count = 0
+        
+        with self.engine.begin() as conn:
+            for row in response.rows:
+                data = {}
+                
+                # 차원 처리
+                for i, dim_name in enumerate(dimension_names):
+                    try:
+                        val = row.dimension_values[i].value
+                        if dim_name == "date":
+                            if len(val) == 8:
+                                val = datetime.strptime(val, "%Y%m%d").date()
+                            else:
+                                val = parse_date(val).date()
+                        data[dim_name] = val
+                    except (IndexError, ValueError) as e:
+                        logger.warning(f"차원 처리 오류 ({dim_name}): {e}")
+                
+                # 메트릭 처리
+                for i, met_name in enumerate(metric_names):
+                    try:
+                        data[met_name] = int(row.metric_values[i].value)
+                    except (IndexError, ValueError):
+                        data[met_name] = None
+                
+                # DB 저장
+                if self.debug:
+                    logger.debug(f"[DEBUG] 저장할 데이터: {data}")
+                    continue
+                
+                try:
+                    stmt = mysql_insert(self.table).values(**data)
+                    stmt = stmt.on_duplicate_key_update(**data)
+                    conn.execute(stmt)
+                    saved_count += 1
+                except IntegrityError as e:
+                    logger.error(f"중복 오류: {e}")
+                except Exception as e:
+                    logger.error(f"저장 실패: {e}")
+                    traceback.print_exc()
+        
+        return saved_count
+    
+    def collect_and_save(
+        self,
+        dimensions: List[str] = ['date'],
+        metrics: List[str] = ['sessions', 'activeUsers'],
+        chunk_days: int = 30
+    ) -> int:
+        """
+        데이터 수집 및 저장 실행
+        
+        Args:
+            dimensions: 수집할 차원 목록
+            metrics: 수집할 메트릭 목록
+            chunk_days: 청크 크기 (일)
+        
+        Returns:
+            총 저장된 레코드 수
+        """
+        start_date, end_date = self.determine_date_range()
+        
+        if start_date > end_date:
+            logger.info("최신 데이터가 이미 존재합니다.")
+            return 0
+        
+        logger.info(f"GA4 데이터 수집 시작: {start_date} ~ {end_date}")
+        
+        total_saved = 0
+        current_start = start_date
+        
+        while current_start <= end_date:
+            current_end = min(current_start + timedelta(days=chunk_days - 1), end_date)
+            
+            logger.info(f"청크 처리: {current_start} ~ {current_end}")
+            
+            response = self.client.run_report(
+                start_date=current_start.strftime("%Y-%m-%d"),
+                end_date=current_end.strftime("%Y-%m-%d"),
+                dimensions=dimensions,
+                metrics=metrics
+            )
+            
+            if response:
+                saved = self.save_response_to_db(response, dimensions, metrics)
+                total_saved += saved
+            
+            current_start = current_end + timedelta(days=1)
+        
+        logger.info(f"GA4 데이터 수집 완료: 총 {total_saved}건 저장")
+        return total_saved
--- a/services/analytics/visitor_forecast.py
+++ b/services/analytics/visitor_forecast.py
@ -0,0 +1,300 @@
+# ===================================================================
+# services/analytics/visitor_forecast.py
+# 방문객 예측 서비스 모듈
+# ===================================================================
+# 날씨, 휴일, 과거 데이터를 기반으로 방문객 수를 예측합니다.
+# 간단한 가중치 기반 모델과 Prophet 시계열 모델을 지원합니다.
+# ===================================================================
+"""
+방문객 예측 서비스 모듈
+
+날씨 조건, 휴일 여부, 과거 방문 패턴을 분석하여
+미래 방문객 수를 예측합니다.
+
+사용 예시:
+    from services.analytics.visitor_forecast import VisitorForecaster
+    
+    forecaster = VisitorForecaster(config)
+    predictions = forecaster.predict_weekly()
+"""
+
+from datetime import datetime, timedelta, date
+from typing import Dict, List, Optional, Tuple, Any
+
+from core.logging_utils import get_logger
+from core.config import get_config
+
+logger = get_logger(__name__)
+
+
+class VisitorForecaster:
+    """
+    방문객 예측 클래스
+    
+    다양한 요소를 고려하여 방문객 수를 예측합니다.
+    
+    Attributes:
+        weights: 예측 가중치 설정
+        visitor_multiplier: 최종 예측값 조정 계수
+    """
+    
+    def __init__(
+        self,
+        weights: Optional[Dict] = None,
+        visitor_multiplier: float = 0.5
+    ):
+        """
+        Args:
+            weights: 예측 가중치 (None이면 설정에서 로드)
+            visitor_multiplier: 예측값 조정 계수
+        """
+        if weights is None:
+            config = get_config()
+            forecast_config = config.forecast_weight
+            
+            self.weights = {
+                'min_temp': forecast_config.get('min_temp', 1.0),
+                'max_temp': forecast_config.get('max_temp', 1.0),
+                'precipitation': forecast_config.get('precipitation', 10.0),
+                'humidity': forecast_config.get('humidity', 1.0),
+                'pm25': forecast_config.get('pm25', 1.0),
+                'holiday': forecast_config.get('holiday', 20),
+            }
+            self.visitor_multiplier = forecast_config.get('visitor_multiplier', 0.5)
+        else:
+            self.weights = weights
+            self.visitor_multiplier = visitor_multiplier
+    
+    def calculate_weather_impact(
+        self,
+        min_temp: float,
+        max_temp: float,
+        precipitation: float,
+        humidity: float,
+        pm25: Optional[float] = None
+    ) -> float:
+        """
+        날씨 조건에 따른 방문객 영향도 계산
+        
+        각 날씨 요소가 방문객 수에 미치는 영향을 계산합니다.
+        높은 값일수록 방문객 수 감소를 의미합니다.
+        
+        Args:
+            min_temp: 최저 기온 (℃)
+            max_temp: 최고 기온 (℃)
+            precipitation: 강수량 (mm)
+            humidity: 습도 (%)
+            pm25: 초미세먼지 농도 (㎍/㎥)
+        
+        Returns:
+            날씨 영향도 점수 (높을수록 부정적)
+        """
+        impact = 0.0
+        
+        # 기온 영향 (너무 낮거나 높으면 부정적)
+        # 최적 온도: 15~25℃
+        if min_temp < 0:
+            impact += abs(min_temp) * self.weights['min_temp']
+        elif min_temp < 10:
+            impact += (10 - min_temp) * self.weights['min_temp'] * 0.3
+        
+        if max_temp > 35:
+            impact += (max_temp - 35) * self.weights['max_temp']
+        elif max_temp > 30:
+            impact += (max_temp - 30) * self.weights['max_temp'] * 0.5
+        
+        # 강수량 영향 (비가 오면 크게 부정적)
+        if precipitation > 0:
+            impact += precipitation * self.weights['precipitation']
+        
+        # 습도 영향
+        if humidity > 80:
+            impact += (humidity - 80) * self.weights['humidity'] * 0.1
+        
+        # 미세먼지 영향
+        if pm25 is not None:
+            if pm25 > 75:  # 나쁨 기준
+                impact += (pm25 - 75) * self.weights['pm25'] * 0.5
+            elif pm25 > 35:  # 보통 기준
+                impact += (pm25 - 35) * self.weights['pm25'] * 0.2
+        
+        return impact
+    
+    def calculate_holiday_impact(self, is_holiday: bool, is_weekend: bool) -> float:
+        """
+        휴일/주말에 따른 방문객 영향도 계산
+        
+        Args:
+            is_holiday: 공휴일 여부
+            is_weekend: 주말 여부
+        
+        Returns:
+            휴일 영향도 (양수: 방문객 증가, 음수: 감소)
+        """
+        if is_holiday:
+            return self.weights['holiday']
+        elif is_weekend:
+            return self.weights['holiday'] * 0.7
+        else:
+            return 0.0
+    
+    def predict_visitors(
+        self,
+        base_visitors: float,
+        weather_data: Dict,
+        is_holiday: bool = False,
+        is_weekend: bool = False
+    ) -> float:
+        """
+        방문객 수 예측
+        
+        기준 방문객 수에 날씨와 휴일 영향을 적용하여 예측합니다.
+        
+        Args:
+            base_visitors: 기준 방문객 수 (과거 평균)
+            weather_data: 날씨 데이터 딕셔너리
+                - min_temp: 최저 기온
+                - max_temp: 최고 기온
+                - precipitation: 강수량 (또는 sumRn)
+                - humidity: 습도 (또는 avgRhm)
+                - pm25: 미세먼지 (선택)
+            is_holiday: 공휴일 여부
+            is_weekend: 주말 여부
+        
+        Returns:
+            예측 방문객 수
+        """
+        # 날씨 데이터 추출
+        min_temp = weather_data.get('min_temp', weather_data.get('minTa', 15))
+        max_temp = weather_data.get('max_temp', weather_data.get('maxTa', 25))
+        precipitation = weather_data.get('precipitation', weather_data.get('sumRn', 0))
+        humidity = weather_data.get('humidity', weather_data.get('avgRhm', 50))
+        pm25 = weather_data.get('pm25')
+        
+        # 영향도 계산
+        weather_impact = self.calculate_weather_impact(
+            min_temp, max_temp, precipitation, humidity, pm25
+        )
+        holiday_impact = self.calculate_holiday_impact(is_holiday, is_weekend)
+        
+        # 예측값 계산
+        # 날씨 영향은 감소 효과, 휴일 영향은 증가 효과
+        adjustment = holiday_impact - weather_impact
+        
+        # 조정 계수 적용
+        predicted = base_visitors * (1 + adjustment / 100 * self.visitor_multiplier)
+        
+        # 최소값 보장
+        return max(0, predicted)
+    
+    def predict_weekly(
+        self,
+        base_visitors: float,
+        weekly_weather: Dict[str, Dict],
+        holidays: Optional[List[date]] = None
+    ) -> Dict[str, float]:
+        """
+        주간 방문객 예측
+        
+        Args:
+            base_visitors: 기준 방문객 수
+            weekly_weather: 일별 날씨 데이터 {YYYYMMDD: weather_data}
+            holidays: 휴일 목록
+        
+        Returns:
+            일별 예측 방문객 {YYYYMMDD: visitors}
+        """
+        if holidays is None:
+            holidays = []
+        
+        predictions = {}
+        
+        for date_str, weather in weekly_weather.items():
+            try:
+                dt = datetime.strptime(date_str, '%Y%m%d').date()
+                is_holiday = dt in holidays
+                is_weekend = dt.weekday() >= 5
+                
+                predicted = self.predict_visitors(
+                    base_visitors,
+                    weather,
+                    is_holiday,
+                    is_weekend
+                )
+                
+                predictions[date_str] = round(predicted)
+                
+                logger.debug(
+                    f"{date_str}: 기준={base_visitors}, "
+                    f"휴일={is_holiday}, 주말={is_weekend}, "
+                    f"예측={predicted:.0f}"
+                )
+            
+            except Exception as e:
+                logger.warning(f"예측 실패 ({date_str}): {e}")
+                predictions[date_str] = base_visitors
+        
+        return predictions
+    
+    def analyze_prediction_factors(
+        self,
+        weather_data: Dict,
+        is_holiday: bool = False,
+        is_weekend: bool = False
+    ) -> Dict[str, Any]:
+        """
+        예측 요인 분석
+        
+        각 요인이 예측에 미치는 영향을 분석합니다.
+        
+        Args:
+            weather_data: 날씨 데이터
+            is_holiday: 공휴일 여부
+            is_weekend: 주말 여부
+        
+        Returns:
+            요인별 영향 분석 결과
+        """
+        min_temp = weather_data.get('min_temp', weather_data.get('minTa', 15))
+        max_temp = weather_data.get('max_temp', weather_data.get('maxTa', 25))
+        precipitation = weather_data.get('precipitation', weather_data.get('sumRn', 0))
+        humidity = weather_data.get('humidity', weather_data.get('avgRhm', 50))
+        pm25 = weather_data.get('pm25')
+        
+        analysis = {
+            'weather': {
+                'min_temp': {
+                    'value': min_temp,
+                    'impact': abs(min_temp) * self.weights['min_temp'] if min_temp < 0 else 0
+                },
+                'max_temp': {
+                    'value': max_temp,
+                    'impact': (max_temp - 35) * self.weights['max_temp'] if max_temp > 35 else 0
+                },
+                'precipitation': {
+                    'value': precipitation,
+                    'impact': precipitation * self.weights['precipitation'] if precipitation > 0 else 0
+                },
+                'humidity': {
+                    'value': humidity,
+                    'impact': (humidity - 80) * self.weights['humidity'] * 0.1 if humidity > 80 else 0
+                }
+            },
+            'holiday': {
+                'is_holiday': is_holiday,
+                'is_weekend': is_weekend,
+                'impact': self.calculate_holiday_impact(is_holiday, is_weekend)
+            },
+            'total_weather_impact': self.calculate_weather_impact(
+                min_temp, max_temp, precipitation, humidity, pm25
+            ),
+            'total_holiday_impact': self.calculate_holiday_impact(is_holiday, is_weekend)
+        }
+        
+        if pm25 is not None:
+            analysis['weather']['pm25'] = {
+                'value': pm25,
+                'impact': (pm25 - 75) * self.weights['pm25'] * 0.5 if pm25 > 75 else 0
+            }
+        
+        return analysis