mirror of
https://github.com/imsyy/DailyHotApi.git
synced 2026-01-12 13:14:55 +08:00
虎嗅接口:从移动端页面爬取改为原生feed API
移动端页面内容爬取不稳定,标题和描述拆分易出错;原生feed接口获取的内容更为稳定。
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
import type { RouterData } from "../types.js";
|
import type { RouterData } from "../types.js";
|
||||||
import type { RouterType } from "../router.types.js";
|
import type { RouterType } from "../router.types.js";
|
||||||
import { get } from "../utils/getData.js";
|
|
||||||
import { getTime } from "../utils/getTime.js";
|
import { getTime } from "../utils/getTime.js";
|
||||||
|
import axios from "axios";
|
||||||
|
|
||||||
export const handleRoute = async (_: undefined, noCache: boolean) => {
|
export const handleRoute = async (_: undefined, noCache: boolean) => {
|
||||||
const listData = await getList(noCache);
|
const listData = await getList(noCache);
|
||||||
@@ -17,37 +17,21 @@ export const handleRoute = async (_: undefined, noCache: boolean) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const getList = async (noCache: boolean) => {
|
const getList = async (noCache: boolean) => {
|
||||||
// 使用移动端页面,数据通过 window.__NUXT__ 内联
|
// PC 端接口
|
||||||
const url = `https://m.huxiu.com/moment/`;
|
const url = `https://moment-api.huxiu.com/web-v3/moment/feed?platform=www`;
|
||||||
const result = await get({
|
const res = await axios.get(url, {
|
||||||
url,
|
|
||||||
noCache,
|
|
||||||
headers: {
|
headers: {
|
||||||
"User-Agent":
|
"User-Agent": "Mozilla/5.0",
|
||||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15E148 Safari/604.1",
|
Referer: "https://www.huxiu.com/moment/",
|
||||||
Referer: "https://m.huxiu.com/moment/",
|
|
||||||
},
|
},
|
||||||
responseType: "text",
|
timeout: 10000,
|
||||||
});
|
});
|
||||||
// 正则查找内联的 NUxt 数据
|
const list: RouterType["huxiu"][] = res.data?.data?.moment_list?.datalist || [];
|
||||||
const pattern = /window.__NUXT__=(.*?);<\/script>/s;
|
|
||||||
const matchResult = result.data.match(pattern);
|
|
||||||
if (!matchResult || !matchResult[1]) {
|
|
||||||
throw new Error("虎嗅页面结构变更,未找到内联数据");
|
|
||||||
}
|
|
||||||
const expr = matchResult[1].trim();
|
|
||||||
let nuxtData: any;
|
|
||||||
try {
|
|
||||||
// 直接求值表达式 (function(...){return {...}})(...)
|
|
||||||
// eslint-disable-next-line no-eval
|
|
||||||
nuxtData = eval(expr);
|
|
||||||
} catch (err) {
|
|
||||||
const msg = err instanceof Error ? err.message : "未知错误";
|
|
||||||
throw new Error(`虎嗅数据解析失败: ${msg}`);
|
|
||||||
}
|
|
||||||
const list = nuxtData?.data?.[0]?.momentList || [];
|
|
||||||
return {
|
return {
|
||||||
...result,
|
fromCache: false,
|
||||||
|
updateTime: res.data?.data?.moment_list?.last_id
|
||||||
|
? getTime(res.data.data.moment_list.last_id)
|
||||||
|
: undefined,
|
||||||
data: list.map((v: RouterType["huxiu"]) => {
|
data: list.map((v: RouterType["huxiu"]) => {
|
||||||
const content = (v.content || "").replace(/<br\s*\/?>/gi, "\n");
|
const content = (v.content || "").replace(/<br\s*\/?>/gi, "\n");
|
||||||
const [titleLine, ...rest] = content
|
const [titleLine, ...rest] = content
|
||||||
@@ -56,16 +40,16 @@ const getList = async (noCache: boolean) => {
|
|||||||
.filter(Boolean);
|
.filter(Boolean);
|
||||||
const title = titleLine?.replace(/。$/, "") || "";
|
const title = titleLine?.replace(/。$/, "") || "";
|
||||||
const intro = rest.join("\n");
|
const intro = rest.join("\n");
|
||||||
const momentId = v.moment_id || v.object_id;
|
const momentId = v.object_id;
|
||||||
return {
|
return {
|
||||||
id: momentId,
|
id: momentId,
|
||||||
title,
|
title,
|
||||||
desc: intro,
|
desc: intro,
|
||||||
author: v.user_info?.username || "",
|
author: v.user_info?.username || "",
|
||||||
timestamp: getTime(v.origin_publish_time || v.publish_time),
|
timestamp: getTime(v.publish_time),
|
||||||
hot: undefined,
|
hot: v.count_info?.agree_num,
|
||||||
url: v.url || `https://www.huxiu.com/moment/${momentId}.html`,
|
url: `https://www.huxiu.com/moment/${momentId}.html`,
|
||||||
mobileUrl: v.url || `https://m.huxiu.com/moment/${momentId}.html`,
|
mobileUrl: `https://m.huxiu.com/moment/${momentId}.html`,
|
||||||
};
|
};
|
||||||
}),
|
}),
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user