feat: GitHub请求优化

This commit is contained in:
helti
2025-02-07 09:57:44 +08:00
parent 6ce0f7d8b0
commit ec190229cc
2 changed files with 104 additions and 63 deletions

View File

@@ -80,7 +80,6 @@ export async function getTrendingRepos(
type: TrendingType | string = "daily", type: TrendingType | string = "daily",
ttl = 60 * 60 * 24, ttl = 60 * 60 * 24,
): Promise<TrendingRepoInfo> { ): Promise<TrendingRepoInfo> {
// 拼接 Trending 地址,可根据需要调整时间维度
const url = `https://github.com/trending?since=${type}`; const url = `https://github.com/trending?since=${type}`;
// 先从缓存中取 // 先从缓存中取
const cachedData = await getCache(url); const cachedData = await getCache(url);
@@ -93,76 +92,115 @@ export async function getTrendingRepos(
}; };
} }
logger.info(`🌐 [GET] ${url}`); logger.info(`🌐 [GET] ${url}`);
console.log("获取 github信息 url", url);
// 添加浏览器请求头 // 更新请求头
const headers = { const headers = {
"User-Agent": 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
"Mozilla/5.0 (iPhone; CPU iPhone OS 14_2_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/1.0 Mobile/12F69 Safari/605.1.15", 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Cache-Control': 'max-age=0',
}; };
try { // 添加重试逻辑
// 添加请求头到 fetch 请求中 const maxRetries = 3;
const response = await fetch(url, { headers }); let lastError;
const html = await response.text();
// 1. 加载 HTML
const $ = cheerio.load(html);
// 2. 存储结果的数组
const results: RepoInfo[] = [];
// 3. 遍历每个 article.Box-row
$("article.Box-row").each((_, el) => {
const $el = $(el);
// 仓库标题和链接 (在 <h2> > <a> 里)
const $repoAnchor = $el.find("h2 a");
// 可能出现 "owner / repo" 这种文本
// eg: "owner / repo"
const fullNameText = $repoAnchor
.text()
.trim()
// 可能有多余空格,可以再做一次 split
// "owner / repo" => ["owner", "repo"]
.replace(/\r?\n/g, "") // 去掉换行
.replace(/\s+/g, " ") // 多空格处理
.split("/")
.map((s) => s.trim());
const owner = fullNameText[0] || ""; for (let i = 0; i < maxRetries; i++) {
const repoName = fullNameText[1] || ""; try {
// 设置超时时间为 20 秒
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), 20000);
// href 即仓库链接 const response = await fetch(url, {
const repoUrl = "https://github.com" + $repoAnchor.attr("href"); headers,
signal: controller.signal
// 仓库描述 (<p class="col-9 color-fg-muted ...">)
const description = $el.find("p.col-9.color-fg-muted").text().trim();
// 语言 (<span itemprop="programmingLanguage">)
const language = $el.find('[itemprop="programmingLanguage"]').text().trim();
const starsText = $el.find('a[href$="/stargazers"]').text().trim();
const forksText = $el.find(`a[href$="/forks"]`).text().trim();
// 整合
results.push({
owner,
repo: repoName,
url: repoUrl || "",
description,
language,
stars: starsText,
forks: forksText,
}); });
}); clearTimeout(timeout);
const updateTime = new Date().toISOString(); if (!response.ok) {
const data = results; throw new Error(`HTTP error! status: ${response.status}`);
}
await setCache(url, { data, updateTime }, ttl); const html = await response.text();
// 返回数据 // 1. 加载 HTML
logger.info(`✅ [${response?.status}] request was successful`); const $ = cheerio.load(html);
return { fromCache: false, updateTime, data }; // 2. 存储结果的数组
} catch (error) { const results: RepoInfo[] = [];
logger.error("❌ [ERROR] request failed"); // 3. 遍历每个 article.Box-row
throw error; $("article.Box-row").each((_, el) => {
const $el = $(el);
// 仓库标题和链接 (在 <h2> > <a> 里)
const $repoAnchor = $el.find("h2 a");
// 可能出现 "owner / repo" 这种文本
// eg: "owner / repo"
const fullNameText = $repoAnchor
.text()
.trim()
// 可能有多余空格,可以再做一次 split
// "owner / repo" => ["owner", "repo"]
.replace(/\r?\n/g, "") // 去掉换行
.replace(/\s+/g, " ") // 多空格处理
.split("/")
.map((s) => s.trim());
const owner = fullNameText[0] || "";
const repoName = fullNameText[1] || "";
// href 即仓库链接
const repoUrl = "https://github.com" + $repoAnchor.attr("href");
// 仓库描述 (<p class="col-9 color-fg-muted ...">)
const description = $el.find("p.col-9.color-fg-muted").text().trim();
// 语言 (<span itemprop="programmingLanguage">)
const language = $el.find('[itemprop="programmingLanguage"]').text().trim();
const starsText = $el.find('a[href$="/stargazers"]').text().trim();
const forksText = $el.find(`a[href$="/forks"]`).text().trim();
// 整合
results.push({
owner,
repo: repoName,
url: repoUrl || "",
description,
language,
stars: starsText,
forks: forksText,
});
});
const updateTime = new Date().toISOString();
const data = results;
await setCache(url, { data, updateTime }, ttl);
// 返回数据
logger.info(`✅ [${response?.status}] 请求成功!`);
return { fromCache: false, updateTime, data };
} catch (error: Error | unknown) {
lastError = error;
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
logger.error(`❌ [ERROR] 第 ${i + 1} 请求失败: ${errorMessage}`);
// 如果是最后一次重试,则抛出错误
if (i === maxRetries - 1) {
logger.error("❌ [ERROR] 所有尝试请求失败!");
throw lastError;
}
// 等待一段时间后重试 (1秒、2秒、4秒...)
await new Promise(resolve => setTimeout(resolve, Math.pow(2, i) * 1000));
continue;
}
} }
throw new Error("请求失败!");
} }

View File

@@ -2,6 +2,9 @@ import type { RouterData } from "../types.js";
import type { RouterType } from "../router.types.js"; import type { RouterType } from "../router.types.js";
import { get } from "../utils/getData.js"; import { get } from "../utils/getData.js";
// 类目接口
// https://api.juejin.cn/tag_api/v1/query_category_briefs
export const handleRoute = async (_: undefined, noCache: boolean) => { export const handleRoute = async (_: undefined, noCache: boolean) => {
const listData = await getList(noCache); const listData = await getList(noCache);
const routeData: RouterData = { const routeData: RouterData = {