"""
新闻信息搜索和分析模块
使用Web Search工具获取热搜话题的背景信息
"""
from typing import List, Dict, Optional
from datetime import datetime
import json


class NewsAnalyzer:
    """新闻和背景信息分析器"""

    def __init__(self):
        self.search_queries_per_topic = 3

    async def search_topic_background(self, topic_title: str, web_search_func) -> Dict:
        """
        搜索话题背景信息

        Args:
            topic_title: 热搜话题标题
            web_search_func: Web Search工具函数

        Returns:
            话题背景信息字典
        """
        background_info = {
            'topic': topic_title,
            'search_time': datetime.now().isoformat(),
            'news_sources': [],
            'key_events': [],
            'sentiment_analysis': {},
            'market_context': {},
            'timeline': []
        }

        # 构建多个搜索查询以获取全面信息
        search_queries = self._build_search_queries(topic_title)

        for query in search_queries:
            try:
                # 使用Web Search工具进行搜索
                search_results = await self._perform_web_search(query, web_search_func)

                if search_results:
                    # 分析搜索结果
                    analyzed_results = self._analyze_search_results(search_results, query)
                    background_info['news_sources'].extend(analyzed_results['sources'])
                    background_info['key_events'].extend(analyzed_results['events'])

            except Exception as e:
                print(f"搜索查询 '{query}' 失败: {e}")
                continue

        # 整理和分析收集到的信息
        background_info = self._consolidate_background_info(background_info)

        return background_info

    def _build_search_queries(self, topic_title: str) -> List[str]:
        """
        构建搜索查询列表

        Args:
            topic_title: 话题标题

        Returns:
            搜索查询列表
        """
        queries = [
            f"{topic_title} 最新消息 新闻",
            f"{topic_title} 背景原因 分析",
            f"{topic_title} 市场影响 产业链"
        ]
        return queries

    async def _perform_web_search(self, query: str, web_search_func) -> Optional[List[Dict]]:
        """
        执行Web搜索

        Args:
            query: 搜索查询
            web_search_func: Web Search工具函数

        Returns:
            搜索结果列表
        """
        try:
            # 调用Web Search工具
            # 这里需要根据实际的Web Search工具接口进行调整
            results = web_search_func(query)
            return results
        except Exception as e:
            print(f"Web搜索执行失败: {e}")
            return None

    def _analyze_search_results(self, search_results: List[Dict], query: str) -> Dict:
        """
        分析搜索结果

        Args:
            search_results: 搜索结果
            query: 搜索查询

        Returns:
            分析结果
        """
        analyzed_data = {
            'sources': [],
            'events': [],
            'query_type': self._classify_query(query)
        }

        for result in search_results:
            # 提取新闻来源信息
            source_info = self._extract_source_info(result)
            if source_info:
                analyzed_data['sources'].append(source_info)

            # 提取关键事件
            events = self._extract_key_events(result)
            analyzed_data['events'].extend(events)

        return analyzed_data

    def _classify_query(self, query: str) -> str:
        """
        分类搜索查询类型

        Args:
            query: 搜索查询

        Returns:
            查询类型
        """
        if '最新消息' in query or '新闻' in query:
            return 'latest_news'
        elif '背景' in query or '原因' in query:
            return 'background'
        elif '市场' in query or '产业' in query:
            return 'market'
        else:
            return 'general'

    def _extract_source_info(self, search_result: Dict) -> Optional[Dict]:
        """
        提取新闻来源信息

        Args:
            search_result: 单个搜索结果

        Returns:
            来源信息
        """
        try:
            source_info = {
                'title': search_result.get('title', ''),
                'url': search_result.get('url', ''),
                'source': search_result.get('source', ''),
                'publish_time': search_result.get('publish_time', ''),
                'summary': search_result.get('summary', ''),
                'relevance_score': self._calculate_relevance(search_result)
            }
            return source_info
        except Exception as e:
            print(f"提取来源信息失败: {e}")
            return None

    def _extract_key_events(self, search_result: Dict) -> List[Dict]:
        """
        提取关键事件

        Args:
            search_result: 搜索结果

        Returns:
            关键事件列表
        """
        events = []
        try:
            title = search_result.get('title', '')
            summary = search_result.get('summary', '')

            # 简单的事件提取逻辑
            # 这里可以进一步使用NLP技术来改进
            if title:
                event = {
                    'event_title': title,
                    'event_description': summary,
                    'event_time': search_result.get('publish_time', ''),
                    'event_type': self._classify_event_type(title)
                }
                events.append(event)
        except Exception as e:
            print(f"提取关键事件失败: {e}")

        return events

    def _classify_event_type(self, title: str) -> str:
        """
        分类事件类型

        Args:
            title: 事件标题

        Returns:
            事件类型
        """
        keywords = {
            'policy': ['政策', '法规', '规定', '通知'],
            'business': ['公司', '企业', '品牌', '产品'],
            'technology': ['技术', '研发', '创新', '科技'],
            'social': ['社会', '民生', '教育', '健康'],
            'entertainment': ['娱乐', '明星', '影视', '音乐']
        }

        for event_type, kw_list in keywords.items():
            if any(kw in title for kw in kw_list):
                return event_type

        return 'general'

    def _calculate_relevance(self, search_result: Dict) -> float:
        """
        计算结果相关性得分

        Args:
            search_result: 搜索结果

        Returns:
            相关性得分 (0-1)
        """
        # 简单的相关性计算逻辑
        # 可以根据需要进一步优化
        title = search_result.get('title', '')
        summary = search_result.get('summary', '')

        # 基于标题和摘要长度计算基础相关性
        title_length = len(title)
        summary_length = len(summary)

        if title_length == 0:
            return 0.0

        # 基础得分
        base_score = min(1.0, (title_length + summary_length) / 200)

        return base_score

    def _consolidate_background_info(self, background_info: Dict) -> Dict:
        """
        整合背景信息

        Args:
            background_info: 原始背景信息

        Returns:
            整合后的背景信息
        """
        # 去重和排序新闻来源
        unique_sources = self._deduplicate_sources(background_info['news_sources'])
        background_info['news_sources'] = sorted(
            unique_sources,
            key=lambda x: x['relevance_score'],
            reverse=True
        )

        # 整合关键事件
        background_info['key_events'] = self._organize_events(background_info['key_events'])

        # 生成情感分析摘要
        background_info['sentiment_analysis'] = self._generate_sentiment_analysis(background_info)

        # 生成市场背景分析
        background_info['market_context'] = self._generate_market_context(background_info)

        # 构建时间线
        background_info['timeline'] = self._build_timeline(background_info['key_events'])

        return background_info

    def _deduplicate_sources(self, sources: List[Dict]) -> List[Dict]:
        """
        去重新闻来源

        Args:
            sources: 新闻来源列表

        Returns:
            去重后的新闻来源列表
        """
        seen_urls = set()
        unique_sources = []

        for source in sources:
            url = source.get('url', '')
            if url and url not in seen_urls:
                seen_urls.add(url)
                unique_sources.append(source)

        return unique_sources

    def _organize_events(self, events: List[Dict]) -> List[Dict]:
        """
        整理关键事件

        Args:
            events: 事件列表

        Returns:
            整理后的事件列表
        """
        # 按时间和重要性排序事件
        return sorted(
            events,
            key=lambda x: (x.get('event_time', ''), x.get('event_type', '')),
            reverse=True
        )

    def _generate_sentiment_analysis(self, background_info: Dict) -> Dict:
        """
        生成情感分析

        Args:
            background_info: 背景信息

        Returns:
            情感分析结果
        """
        # 简单的情感分析逻辑
        # 可以集成更复杂的NLP分析工具
        total_events = len(background_info['key_events'])

        # 基于事件类型和关键词进行简单情感判断
        positive_keywords = ['成功', '突破', '增长', '创新', '发展']
        negative_keywords = ['失败', '危机', '下降', '问题', '挑战']

        positive_count = 0
        negative_count = 0

        for event in background_info['key_events']:
            title = event.get('event_title', '')
            if any(kw in title for kw in positive_keywords):
                positive_count += 1
            elif any(kw in title for kw in negative_keywords):
                negative_count += 1

        if total_events == 0:
            sentiment_score = 0.5
        else:
            sentiment_score = (positive_count - negative_count) / total_events + 0.5

        return {
            'sentiment_score': max(0, min(1, sentiment_score)),  # 限制在0-1之间
            'positive_events': positive_count,
            'negative_events': negative_count,
            'neutral_events': total_events - positive_count - negative_count
        }

    def _generate_market_context(self, background_info: Dict) -> Dict:
        """
        生成市场背景分析

        Args:
            background_info: 背景信息

        Returns:
            市场背景分析
        """
        # 分析事件类型分布
        event_types = {}
        for event in background_info['key_events']:
            event_type = event.get('event_type', 'general')
            event_types[event_type] = event_types.get(event_type, 0) + 1

        # 分析新闻来源质量
        high_quality_sources = len([
            s for s in background_info['news_sources']
            if s.get('relevance_score', 0) > 0.7
        ])

        return {
            'event_type_distribution': event_types,
            'total_news_sources': len(background_info['news_sources']),
            'high_quality_sources': high_quality_sources,
            'market_activity_level': 'high' if len(background_info['key_events']) > 5 else 'medium' if len(background_info['key_events']) > 2 else 'low'
        }

    def _build_timeline(self, events: List[Dict]) -> List[Dict]:
        """
        构建事件时间线

        Args:
            events: 事件列表

        Returns:
            时间线
        """
        # 按时间排序事件
        sorted_events = sorted(
            events,
            key=lambda x: x.get('event_time', ''),
            reverse=True
        )

        return sorted_events


# 全局分析器实例
news_analyzer = NewsAnalyzer()


async def analyze_topic_background(topic_title: str, web_search_func) -> Dict:
    """
    分析话题背景信息（便捷函数）

    Args:
        topic_title: 话题标题
        web_search_func: Web Search工具函数

    Returns:
        话题背景信息
    """
    return await news_analyzer.search_topic_background(topic_title, web_search_func)