-
Notifications
You must be signed in to change notification settings - Fork 327
Open
Description
"""
修复股票新闻数据获取功能的补丁
此模块提供了修复版的stock_news函数,以适应东方财富网站API的变更
"""
import re
import pandas as pd
import json
import requests
import time
from datetime import datetime
def stock_news(stock):
"""
东方财富-个股新闻修复版
stock: 股票代码或股票名称
返回包含新闻标题、内容和URL的DataFrame
"""
url = "https://search-api-web.eastmoney.com/search/jsonp"
# 动态生成 callback 参数
timestamp = int(time.time() * 1000)
cb = f"jQuery{str(int(time.time() * 10000000000000))[:16]}_{timestamp}"
# 构造更新后的参数结构
new_param = {
"uid": "", # 可以为空
"keyword": stock,
"type": ["cmsArticleWebOld"], # 更新为新的类型
"client": "web",
"clientType": "web",
"clientVersion": "curr",
"param": {
"cmsArticleWebOld": { # 使用新的键名
"searchScope": "default",
"sort": "default",
"pageIndex": 1,
"pageSize": 100,
"preTag": "<em>",
"postTag": "</em>"
}
}
}
params = {
"cb": cb,
"param": json.dumps(new_param),
"_": timestamp + 1,
}
try:
print(f"获取 {stock} 的新闻数据...")
res = requests.get(url, params=params)
res.raise_for_status() # 检查 HTTP 请求是否成功
data_text = res.text
# 从 JSONP 响应中提取 JSON 部分
json_str = data_text.strip(f"{cb}(")[:-1]
data_json = json.loads(json_str)
# 检查响应状态
if data_json.get("code") != 0:
print(f"API 返回错误: {data_json.get('msg', '未知错误')}")
return None
# 从新的路径获取数据
if "result" not in data_json or "cmsArticleWebOld" not in data_json["result"]:
print("在响应中找不到新闻数据")
return None
news_items = data_json["result"]["cmsArticleWebOld"]
# 转换为 DataFrame
df = pd.DataFrame(news_items)
# 清洗标题和内容中的 HTML 标签
if 'title' in df.columns:
df['title'] = df['title'].apply(lambda s: re.sub(r'<[^>]+>', '', s) if isinstance(s, str) else s)
if 'content' in df.columns:
df['content'] = df['content'].apply(lambda s: re.sub(r'<[^>]+>', '', s) if isinstance(s, str) else s)
df['stock'] = stock
# 选择并重排列
cols = ['date', 'stock', 'title', 'content', 'url', 'mediaName']
available_cols = [col for col in cols if col in df.columns]
result_df = df[available_cols]
print(f"成功获取 {len(result_df)} 条新闻")
return result_df
except requests.exceptions.RequestException as e:
print(f"请求失败: {e}")
return None
except json.JSONDecodeError:
print("无法解析 JSON 响应")
print(f"响应文本: {data_text[:200]}...")
return None
except KeyError as e:
print(f"响应中缺少关键字段: {e}")
return None
except Exception as e:
print(f"获取个股新闻时发生错误: {e}")
import traceback
traceback.print_exc()
return None
Metadata
Metadata
Assignees
Labels
No labels