修复虎嗅接口

改为抓取虎嗅移动端页面的 window.__NUXT__ 内联数据,使用移动 UA/Referer 获取有效列表。
解析 NUxT 数据时处理 moment_id/origin_publish_time,生成正确的链接与时间戳。
重新拆分内容:以首行作标题(去掉末尾句号),其余行合并为描述,避免标题夹带全文、描述为空。
增加结构缺失和解析失败的错误提示,便于排查。
This commit is contained in:
xuan
2025-12-18 17:35:48 +08:00
parent 772f421157
commit 82f983793c

67
src/routes/huxiu.ts Normal file → Executable file
View File

@@ -16,36 +16,57 @@ export const handleRoute = async (_: undefined, noCache: boolean) => {
return routeData; return routeData;
}; };
// 标题处理
const titleProcessing = (text: string) => {
const paragraphs = text.split("<br><br>");
const title = paragraphs.shift()?.replace(/。$/, "");
const intro = paragraphs.join("<br><br>");
return { title, intro };
};
const getList = async (noCache: boolean) => { const getList = async (noCache: boolean) => {
const url = `https://www.huxiu.com/moment/`; // 使用移动端页面,数据通过 window.__NUXT__ 内联
const url = `https://m.huxiu.com/moment/`;
const result = await get({ const result = await get({
url, url,
noCache, noCache,
headers: {
"User-Agent":
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15E148 Safari/604.1",
Referer: "https://m.huxiu.com/moment/",
},
responseType: "text",
}); });
// 正则查找 // 正则查找内联的 NUxt 数据
const pattern = const pattern = /window.__NUXT__=(.*?);<\/script>/s;
/<script>[\s\S]*?window\.__INITIAL_STATE__\s*=\s*(\{[\s\S]*?\});[\s\S]*?<\/script>/;
const matchResult = result.data.match(pattern); const matchResult = result.data.match(pattern);
const jsonObject = JSON.parse(matchResult[1]).moment.momentList.moment_list.datalist; if (!matchResult || !matchResult[1]) {
throw new Error("虎嗅页面结构变更,未找到内联数据");
}
const expr = matchResult[1].trim();
let nuxtData: any;
try {
// 直接求值表达式 (function(...){return {...}})(...)
// eslint-disable-next-line no-eval
nuxtData = eval(expr);
} catch (err) {
const msg = err instanceof Error ? err.message : "未知错误";
throw new Error(`虎嗅数据解析失败: ${msg}`);
}
const list = nuxtData?.data?.[0]?.momentList || [];
return { return {
...result, ...result,
data: jsonObject.map((v: RouterType["huxiu"]) => ({ data: list.map((v: RouterType["huxiu"]) => {
id: v.object_id, const content = (v.content || "").replace(/<br\s*\/?>/gi, "\n");
title: titleProcessing(v.content).title, const [titleLine, ...rest] = content
desc: titleProcessing(v.content).intro, .split("\n")
author: v.user_info.username, .map((s) => s.trim())
timestamp: getTime(v.publish_time), .filter(Boolean);
hot: undefined, const title = titleLine?.replace(/。$/, "") || "";
url: v.url || `https://www.huxiu.com/moment/${v.object_id}.html`, const intro = rest.join("\n");
mobileUrl: v.url || `https://m.huxiu.com/moment/${v.object_id}.html`, const momentId = v.moment_id || v.object_id;
})), return {
id: momentId,
title,
desc: intro,
author: v.user_info?.username || "",
timestamp: getTime(v.origin_publish_time || v.publish_time),
hot: undefined,
url: v.url || `https://www.huxiu.com/moment/${momentId}.html`,
mobileUrl: v.url || `https://m.huxiu.com/moment/${momentId}.html`,
};
}),
}; };
}; };