Skip to content

新闻模块好像不太能用改了一下 #48

@HmmySunshine

Description

@HmmySunshine

"""
修复股票新闻数据获取功能的补丁

此模块提供了修复版的stock_news函数,以适应东方财富网站API的变更
"""

import re
import pandas as pd
import json
import requests
import time
from datetime import datetime

def stock_news(stock):
"""
东方财富-个股新闻修复版
stock: 股票代码或股票名称

返回包含新闻标题、内容和URL的DataFrame
"""
url = "https://search-api-web.eastmoney.com/search/jsonp" 

# 动态生成 callback 参数 
timestamp = int(time.time() * 1000)
cb = f"jQuery{str(int(time.time() * 10000000000000))[:16]}_{timestamp}"

# 构造更新后的参数结构
new_param = {
    "uid": "",  # 可以为空
    "keyword": stock,
    "type": ["cmsArticleWebOld"],  # 更新为新的类型
    "client": "web",
    "clientType": "web",
    "clientVersion": "curr",
    "param": {
        "cmsArticleWebOld": {  # 使用新的键名
            "searchScope": "default",
            "sort": "default",
            "pageIndex": 1,
            "pageSize": 100,
            "preTag": "<em>",
            "postTag": "</em>"
        }
    }
}

params = {
    "cb": cb,
    "param": json.dumps(new_param), 
    "_": timestamp + 1,
}

try:
    print(f"获取 {stock} 的新闻数据...")
    res = requests.get(url, params=params)
    res.raise_for_status()  # 检查 HTTP 请求是否成功 
    
    data_text = res.text  
    
    # 从 JSONP 响应中提取 JSON 部分
    json_str = data_text.strip(f"{cb}(")[:-1]
    data_json = json.loads(json_str)
    
    # 检查响应状态
    if data_json.get("code") != 0:
        print(f"API 返回错误: {data_json.get('msg', '未知错误')}")
        return None
        
    # 从新的路径获取数据
    if "result" not in data_json or "cmsArticleWebOld" not in data_json["result"]:
        print("在响应中找不到新闻数据")
        return None
        
    news_items = data_json["result"]["cmsArticleWebOld"]
    
    # 转换为 DataFrame
    df = pd.DataFrame(news_items)
    
    # 清洗标题和内容中的 HTML 标签 
    if 'title' in df.columns:
        df['title'] = df['title'].apply(lambda s: re.sub(r'<[^>]+>', '', s) if isinstance(s, str) else s)
    if 'content' in df.columns:
        df['content'] = df['content'].apply(lambda s: re.sub(r'<[^>]+>', '', s) if isinstance(s, str) else s)
    
    df['stock'] = stock
    
    # 选择并重排列
    cols = ['date', 'stock', 'title', 'content', 'url', 'mediaName']
    available_cols = [col for col in cols if col in df.columns]
    
    result_df = df[available_cols]
    print(f"成功获取 {len(result_df)} 条新闻")
    return result_df
    
except requests.exceptions.RequestException as e:
    print(f"请求失败: {e}")
    return None 
except json.JSONDecodeError:
    print("无法解析 JSON 响应")
    print(f"响应文本: {data_text[:200]}...")
    return None 
except KeyError as e:
    print(f"响应中缺少关键字段: {e}")
    return None 
except Exception as e:
    print(f"获取个股新闻时发生错误: {e}")
    import traceback
    traceback.print_exc()
    return None

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions