747 lines
22 KiB
JavaScript
747 lines
22 KiB
JavaScript
/* eslint-disable no-console */
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
const dns = require('node:dns').promises;
|
||
const net = require('node:net');
|
||
const Parser = require('rss-parser');
|
||
|
||
const { loadConfig } = require('../src/generator.js');
|
||
const { createLogger, isVerbose, startTimer } = require('../src/generator/utils/logger');
|
||
|
||
const log = createLogger('sync:articles');
|
||
|
||
const DEFAULT_RSS_SETTINGS = {
|
||
enabled: true,
|
||
cacheDir: 'dev',
|
||
fetch: {
|
||
timeoutMs: 10_000,
|
||
maxRetries: 1,
|
||
concurrency: 5,
|
||
totalTimeoutMs: 60_000,
|
||
maxRedirects: 3,
|
||
userAgent: 'MeNavRSSSync/1.0',
|
||
htmlMaxBytes: 512 * 1024,
|
||
feedMaxBytes: 1024 * 1024,
|
||
},
|
||
articles: {
|
||
perSite: 8,
|
||
total: 50,
|
||
summaryMaxLength: 200,
|
||
},
|
||
};
|
||
|
||
function parseBooleanEnv(value, fallback) {
|
||
if (value === undefined || value === null || value === '') return fallback;
|
||
const v = String(value).trim().toLowerCase();
|
||
if (v === '1' || v === 'true' || v === 'yes' || v === 'y') return true;
|
||
if (v === '0' || v === 'false' || v === 'no' || v === 'n') return false;
|
||
return fallback;
|
||
}
|
||
|
||
function parseIntegerEnv(value, fallback) {
|
||
if (value === undefined || value === null || value === '') return fallback;
|
||
const n = Number.parseInt(String(value), 10);
|
||
return Number.isFinite(n) ? n : fallback;
|
||
}
|
||
|
||
function getRssSettings(config) {
|
||
const fromConfig =
|
||
config && config.site && config.site.rss && typeof config.site.rss === 'object'
|
||
? config.site.rss
|
||
: {};
|
||
|
||
const merged = {
|
||
...DEFAULT_RSS_SETTINGS,
|
||
...fromConfig,
|
||
fetch: {
|
||
...DEFAULT_RSS_SETTINGS.fetch,
|
||
...(fromConfig.fetch || {}),
|
||
},
|
||
articles: {
|
||
...DEFAULT_RSS_SETTINGS.articles,
|
||
...(fromConfig.articles || {}),
|
||
},
|
||
};
|
||
|
||
// 环境变量覆盖(主要给 CI 调试/降级用)
|
||
merged.enabled = parseBooleanEnv(process.env.RSS_ENABLED, merged.enabled);
|
||
merged.cacheDir = process.env.RSS_CACHE_DIR ? String(process.env.RSS_CACHE_DIR) : merged.cacheDir;
|
||
|
||
merged.fetch.timeoutMs = parseIntegerEnv(process.env.RSS_FETCH_TIMEOUT, merged.fetch.timeoutMs);
|
||
merged.fetch.maxRetries = parseIntegerEnv(
|
||
process.env.RSS_FETCH_MAX_RETRIES,
|
||
merged.fetch.maxRetries
|
||
);
|
||
merged.fetch.concurrency = parseIntegerEnv(
|
||
process.env.RSS_FETCH_CONCURRENCY,
|
||
merged.fetch.concurrency
|
||
);
|
||
merged.fetch.totalTimeoutMs = parseIntegerEnv(
|
||
process.env.RSS_TOTAL_TIMEOUT,
|
||
merged.fetch.totalTimeoutMs
|
||
);
|
||
merged.fetch.maxRedirects = parseIntegerEnv(
|
||
process.env.RSS_FETCH_MAX_REDIRECTS,
|
||
merged.fetch.maxRedirects
|
||
);
|
||
|
||
merged.articles.perSite = parseIntegerEnv(
|
||
process.env.RSS_ARTICLES_PER_SITE,
|
||
merged.articles.perSite
|
||
);
|
||
merged.articles.total = parseIntegerEnv(process.env.RSS_ARTICLES_TOTAL, merged.articles.total);
|
||
merged.articles.summaryMaxLength = parseIntegerEnv(
|
||
process.env.RSS_SUMMARY_MAX_LENGTH,
|
||
merged.articles.summaryMaxLength
|
||
);
|
||
|
||
// 兜底约束:避免奇怪配置导致卡死/爆内存
|
||
merged.fetch.timeoutMs = Math.max(1_000, merged.fetch.timeoutMs);
|
||
merged.fetch.totalTimeoutMs = Math.max(5_000, merged.fetch.totalTimeoutMs);
|
||
merged.fetch.concurrency = Math.max(1, Math.min(20, merged.fetch.concurrency));
|
||
merged.fetch.maxRetries = Math.max(0, Math.min(3, merged.fetch.maxRetries));
|
||
merged.fetch.maxRedirects = Math.max(0, Math.min(10, merged.fetch.maxRedirects));
|
||
|
||
merged.articles.perSite = Math.max(1, Math.min(50, merged.articles.perSite));
|
||
merged.articles.total = Math.max(1, Math.min(500, merged.articles.total));
|
||
merged.articles.summaryMaxLength = Math.max(0, Math.min(2_000, merged.articles.summaryMaxLength));
|
||
|
||
return merged;
|
||
}
|
||
|
||
function isHttpUrl(url) {
|
||
if (!url) return false;
|
||
try {
|
||
const u = new URL(String(url));
|
||
return u.protocol === 'http:' || u.protocol === 'https:';
|
||
} catch {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
function isPrivateIp(ip) {
|
||
if (!ip) return true;
|
||
|
||
if (net.isIP(ip) === 4) {
|
||
const parts = ip.split('.').map((n) => Number.parseInt(n, 10));
|
||
if (parts.length !== 4 || parts.some((n) => !Number.isFinite(n) || n < 0 || n > 255))
|
||
return true;
|
||
|
||
const [a, b] = parts;
|
||
if (a === 10) return true;
|
||
if (a === 127) return true;
|
||
if (a === 0) return true;
|
||
if (a === 169 && b === 254) return true;
|
||
if (a === 172 && b >= 16 && b <= 31) return true;
|
||
if (a === 192 && b === 168) return true;
|
||
if (a >= 224) return true; // 组播/保留
|
||
return false;
|
||
}
|
||
|
||
if (net.isIP(ip) === 6) {
|
||
const normalized = String(ip).toLowerCase();
|
||
if (normalized === '::1') return true;
|
||
if (normalized.startsWith('fe80:')) return true; // link-local
|
||
if (normalized.startsWith('fc') || normalized.startsWith('fd')) return true; // ULA
|
||
return false;
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
async function withTimeout(promise, timeoutMs, label) {
|
||
let timer;
|
||
try {
|
||
const timeout = new Promise((_, reject) => {
|
||
timer = setTimeout(() => reject(new Error(`${label} 超时(${timeoutMs}ms)`)), timeoutMs);
|
||
});
|
||
return await Promise.race([promise, timeout]);
|
||
} finally {
|
||
if (timer) clearTimeout(timer);
|
||
}
|
||
}
|
||
|
||
async function assertSafeToFetch(url, timeoutMs) {
|
||
const u = new URL(String(url));
|
||
if (u.protocol !== 'http:' && u.protocol !== 'https:') {
|
||
throw new Error(`仅允许 http/https:${u.protocol}`);
|
||
}
|
||
|
||
if (u.username || u.password) {
|
||
throw new Error('禁止包含用户名/密码的 URL');
|
||
}
|
||
|
||
const hostname = u.hostname.toLowerCase();
|
||
if (
|
||
hostname === 'localhost' ||
|
||
hostname === '0.0.0.0' ||
|
||
hostname === '127.0.0.1' ||
|
||
hostname === '::1'
|
||
) {
|
||
throw new Error('禁止访问本机地址');
|
||
}
|
||
if (hostname.endsWith('.local')) {
|
||
throw new Error('禁止访问 .local 域名');
|
||
}
|
||
|
||
if (net.isIP(hostname)) {
|
||
if (isPrivateIp(hostname)) throw new Error('禁止访问内网/保留 IP');
|
||
return;
|
||
}
|
||
|
||
// 解析域名,阻断解析到内网的情况(best-effort)
|
||
const records = await withTimeout(
|
||
dns.lookup(hostname, { all: true, verbatim: true }),
|
||
Math.min(2_000, timeoutMs),
|
||
`DNS 解析 ${hostname}`
|
||
);
|
||
|
||
if (!Array.isArray(records) || records.length === 0) {
|
||
throw new Error('DNS 解析失败或无结果');
|
||
}
|
||
|
||
const hasPrivate = records.some((r) => isPrivateIp(r.address));
|
||
if (hasPrivate) throw new Error('DNS 解析到内网/保留地址,已阻断');
|
||
}
|
||
|
||
function buildHeaders(userAgent) {
|
||
return {
|
||
'user-agent': userAgent,
|
||
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||
};
|
||
}
|
||
|
||
async function fetchWithRedirects(url, { timeoutMs, maxRedirects, headers, maxBytes }) {
|
||
let current = String(url);
|
||
for (let i = 0; i <= maxRedirects; i += 1) {
|
||
await assertSafeToFetch(current, timeoutMs);
|
||
|
||
const controller = new AbortController();
|
||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||
|
||
let response;
|
||
try {
|
||
response = await fetch(current, {
|
||
method: 'GET',
|
||
redirect: 'manual',
|
||
headers,
|
||
signal: controller.signal,
|
||
});
|
||
} finally {
|
||
clearTimeout(timer);
|
||
}
|
||
|
||
const status = response.status;
|
||
if (status >= 300 && status < 400) {
|
||
const location = response.headers.get('location');
|
||
if (!location) throw new Error(`重定向缺少 Location(${status})`);
|
||
current = new URL(location, current).toString();
|
||
continue;
|
||
}
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`HTTP ${status}`);
|
||
}
|
||
|
||
const text = await readResponseTextWithLimit(response, maxBytes);
|
||
return { url: current, response, text };
|
||
}
|
||
|
||
throw new Error(`重定向次数超过上限(${maxRedirects})`);
|
||
}
|
||
|
||
async function readResponseTextWithLimit(response, maxBytes) {
|
||
if (!response.body || typeof response.body.getReader !== 'function') {
|
||
const text = await response.text();
|
||
if (Buffer.byteLength(text, 'utf8') > maxBytes) {
|
||
throw new Error('响应体过大');
|
||
}
|
||
return text;
|
||
}
|
||
|
||
const reader = response.body.getReader();
|
||
const decoder = new TextDecoder('utf-8');
|
||
let received = 0;
|
||
let text = '';
|
||
|
||
while (true) {
|
||
// eslint-disable-next-line no-await-in-loop
|
||
const { done, value } = await reader.read();
|
||
if (done) break;
|
||
received += value.byteLength;
|
||
if (received > maxBytes) {
|
||
try {
|
||
reader.cancel();
|
||
} catch {
|
||
// ignore
|
||
}
|
||
throw new Error('响应体过大');
|
||
}
|
||
text += decoder.decode(value, { stream: true });
|
||
}
|
||
text += decoder.decode();
|
||
return text;
|
||
}
|
||
|
||
function extractFeedLinksFromHtml(html, baseUrl) {
|
||
const candidates = [];
|
||
if (!html) return candidates;
|
||
|
||
const linkTags = String(html).match(/<link\b[^>]*>/gi) || [];
|
||
for (const tag of linkTags) {
|
||
const rel = /rel\s*=\s*["']([^"']+)["']/i.exec(tag)?.[1] || '';
|
||
if (!/alternate/i.test(rel)) continue;
|
||
|
||
const type = /type\s*=\s*["']([^"']+)["']/i.exec(tag)?.[1] || '';
|
||
const isFeedType = /application\/(rss|atom)\+xml/i.test(type) || /(rss|atom)/i.test(type);
|
||
if (!isFeedType) continue;
|
||
|
||
const href = /href\s*=\s*["']([^"']+)["']/i.exec(tag)?.[1];
|
||
if (!href) continue;
|
||
|
||
try {
|
||
const resolved = new URL(href, baseUrl).toString();
|
||
if (isHttpUrl(resolved)) candidates.push(resolved);
|
||
} catch {
|
||
// ignore bad url
|
||
}
|
||
}
|
||
|
||
// 简单排序:优先 RSS,其次 Atom
|
||
const rank = (url) => (url.includes('atom') ? 2 : 1);
|
||
return [...new Set(candidates)].sort((a, b) => rank(a) - rank(b));
|
||
}
|
||
|
||
function buildCommonFeedUrls(siteUrl) {
|
||
const common = ['/feed', '/rss.xml', '/rss', '/atom.xml', '/atom', '/feed.xml'];
|
||
const out = [];
|
||
for (const p of common) {
|
||
try {
|
||
const u = new URL(p, siteUrl).toString();
|
||
out.push(u);
|
||
} catch {
|
||
// ignore
|
||
}
|
||
}
|
||
return out;
|
||
}
|
||
|
||
async function discoverFeedUrl(siteUrl, settings, deadlineTs) {
|
||
const timeRemaining = deadlineTs - Date.now();
|
||
if (timeRemaining <= 0) throw new Error('总超时:无法继续发现 RSS');
|
||
|
||
const homepage = await fetchWithRedirects(siteUrl, {
|
||
timeoutMs: Math.min(settings.fetch.timeoutMs, timeRemaining),
|
||
maxRedirects: settings.fetch.maxRedirects,
|
||
headers: buildHeaders(settings.fetch.userAgent),
|
||
maxBytes: settings.fetch.htmlMaxBytes,
|
||
});
|
||
|
||
const contentType = homepage.response.headers.get('content-type') || '';
|
||
if (
|
||
/text\/html/i.test(contentType) ||
|
||
/application\/xhtml\+xml/i.test(contentType) ||
|
||
!contentType
|
||
) {
|
||
const candidates = extractFeedLinksFromHtml(homepage.text, homepage.url);
|
||
if (candidates.length > 0) {
|
||
return candidates[0];
|
||
}
|
||
}
|
||
|
||
return null;
|
||
}
|
||
|
||
function stripHtmlToText(input) {
|
||
const raw = String(input || '');
|
||
const withoutTags = raw
|
||
.replace(/<script[\s\S]*?<\/script>/gi, '')
|
||
.replace(/<style[\s\S]*?<\/style>/gi, '')
|
||
.replace(/<[^>]+>/g, ' ');
|
||
|
||
const decoded = withoutTags
|
||
.replace(/ /gi, ' ')
|
||
.replace(/&/gi, '&')
|
||
.replace(/</gi, '<')
|
||
.replace(/>/gi, '>')
|
||
.replace(/"/gi, '"')
|
||
.replace(/�?39;/g, "'")
|
||
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)))
|
||
.replace(/&#(\d+);/g, (_, num) => String.fromCodePoint(Number.parseInt(num, 10)));
|
||
|
||
return decoded.replace(/\s+/g, ' ').trim();
|
||
}
|
||
|
||
function truncateText(text, maxLen) {
|
||
if (!maxLen || maxLen <= 0) return '';
|
||
const s = String(text || '');
|
||
if (s.length <= maxLen) return s;
|
||
return s.slice(0, maxLen) + '...';
|
||
}
|
||
|
||
function normalizePublishedAt(item) {
|
||
const iso = item && typeof item.isoDate === 'string' ? item.isoDate : '';
|
||
if (iso) return iso;
|
||
|
||
const pub = item && typeof item.pubDate === 'string' ? item.pubDate : '';
|
||
if (pub) {
|
||
const d = new Date(pub);
|
||
if (!Number.isNaN(d.getTime())) return d.toISOString();
|
||
}
|
||
|
||
return '';
|
||
}
|
||
|
||
function normalizeArticle(item, sourceSite, settings) {
|
||
const title = item && item.title !== undefined ? String(item.title).trim() : '';
|
||
if (!title) return null;
|
||
|
||
const link = item && item.link ? String(item.link).trim() : '';
|
||
if (!isHttpUrl(link)) return null;
|
||
|
||
const summaryRaw =
|
||
(item && item.contentSnippet) || (item && item.summary) || (item && item.content) || '';
|
||
const summaryText = stripHtmlToText(summaryRaw);
|
||
const summary = settings.articles.summaryMaxLength
|
||
? truncateText(summaryText, settings.articles.summaryMaxLength)
|
||
: summaryText;
|
||
|
||
const publishedAt = normalizePublishedAt(item);
|
||
|
||
const source = sourceSite && sourceSite.name ? String(sourceSite.name) : '';
|
||
const sourceUrl = sourceSite && sourceSite.url ? String(sourceSite.url) : '';
|
||
const icon = sourceSite && sourceSite.icon ? String(sourceSite.icon) : 'fas fa-pen';
|
||
|
||
return {
|
||
title,
|
||
url: link,
|
||
summary,
|
||
publishedAt,
|
||
source,
|
||
// 站点首页 URL(用于生成端按分类聚合展示;文章 url 为具体文章链接)
|
||
sourceUrl,
|
||
icon,
|
||
};
|
||
}
|
||
|
||
async function fetchAndParseFeed(feedUrl, settings, parser, deadlineTs) {
|
||
const timeRemaining = deadlineTs - Date.now();
|
||
if (timeRemaining <= 0) throw new Error('总超时:无法继续抓取 Feed');
|
||
|
||
const feed = await fetchWithRedirects(feedUrl, {
|
||
timeoutMs: Math.min(settings.fetch.timeoutMs, timeRemaining),
|
||
maxRedirects: settings.fetch.maxRedirects,
|
||
headers: {
|
||
...buildHeaders(settings.fetch.userAgent),
|
||
accept: 'application/rss+xml,application/atom+xml,application/xml,text/xml;q=0.9,*/*;q=0.8',
|
||
},
|
||
maxBytes: settings.fetch.feedMaxBytes,
|
||
});
|
||
|
||
const parsed = await parser.parseString(feed.text);
|
||
return {
|
||
feedUrl: feed.url,
|
||
feedTitle: parsed.title || '',
|
||
items: Array.isArray(parsed.items) ? parsed.items : [],
|
||
};
|
||
}
|
||
|
||
async function processSourceSite(sourceSite, settings, parser, deadlineTs) {
|
||
const url = sourceSite && sourceSite.url ? String(sourceSite.url) : '';
|
||
if (!isHttpUrl(url)) {
|
||
return {
|
||
site: {
|
||
name: sourceSite && sourceSite.name ? String(sourceSite.name) : '',
|
||
url,
|
||
feedUrl: '',
|
||
status: 'skipped',
|
||
error: '无效 URL(需为 http/https)',
|
||
fetchedAt: new Date().toISOString(),
|
||
},
|
||
articles: [],
|
||
};
|
||
}
|
||
|
||
let lastError = null;
|
||
|
||
const tryOnce = async (feedUrl) => {
|
||
const parsed = await fetchAndParseFeed(feedUrl, settings, parser, deadlineTs);
|
||
const normalized = parsed.items
|
||
.map((item) => normalizeArticle(item, sourceSite, settings))
|
||
.filter(Boolean)
|
||
.slice(0, settings.articles.perSite);
|
||
return { feedUrl: parsed.feedUrl, articles: normalized };
|
||
};
|
||
|
||
const attempt = async () => {
|
||
const discovered = await discoverFeedUrl(url, settings, deadlineTs);
|
||
const candidates = discovered
|
||
? [discovered, ...buildCommonFeedUrls(url)]
|
||
: buildCommonFeedUrls(url);
|
||
|
||
for (const candidate of [...new Set(candidates)]) {
|
||
try {
|
||
// eslint-disable-next-line no-await-in-loop
|
||
const res = await tryOnce(candidate);
|
||
return res;
|
||
} catch (e) {
|
||
lastError = e;
|
||
}
|
||
}
|
||
throw lastError || new Error('未找到可用 Feed');
|
||
};
|
||
|
||
const elapsedMs = startTimer();
|
||
for (let i = 0; i <= settings.fetch.maxRetries; i += 1) {
|
||
try {
|
||
// eslint-disable-next-line no-await-in-loop
|
||
const res = await attempt();
|
||
return {
|
||
site: {
|
||
name: sourceSite && sourceSite.name ? String(sourceSite.name) : '',
|
||
url,
|
||
feedUrl: res.feedUrl,
|
||
status: 'success',
|
||
error: '',
|
||
fetchedAt: new Date().toISOString(),
|
||
durationMs: elapsedMs(),
|
||
},
|
||
articles: res.articles,
|
||
};
|
||
} catch (e) {
|
||
lastError = e;
|
||
}
|
||
}
|
||
|
||
return {
|
||
site: {
|
||
name: sourceSite && sourceSite.name ? String(sourceSite.name) : '',
|
||
url,
|
||
feedUrl: '',
|
||
status: 'failed',
|
||
error: lastError ? String(lastError.message || lastError) : '未知错误',
|
||
fetchedAt: new Date().toISOString(),
|
||
durationMs: elapsedMs(),
|
||
},
|
||
articles: [],
|
||
};
|
||
}
|
||
|
||
async function mapWithConcurrency(items, concurrency, worker) {
|
||
const results = new Array(items.length);
|
||
let nextIndex = 0;
|
||
|
||
async function runOne() {
|
||
while (nextIndex < items.length) {
|
||
const currentIndex = nextIndex;
|
||
nextIndex += 1;
|
||
|
||
try {
|
||
// eslint-disable-next-line no-await-in-loop
|
||
results[currentIndex] = await worker(items[currentIndex], currentIndex);
|
||
} catch (e) {
|
||
results[currentIndex] = { error: e };
|
||
}
|
||
}
|
||
}
|
||
|
||
const runners = [];
|
||
const count = Math.max(1, Math.min(concurrency, items.length));
|
||
for (let i = 0; i < count; i += 1) {
|
||
runners.push(runOne());
|
||
}
|
||
await Promise.all(runners);
|
||
return results;
|
||
}
|
||
|
||
function collectSitesRecursively(node, output) {
|
||
if (!node || typeof node !== 'object') return;
|
||
|
||
if (Array.isArray(node.subcategories))
|
||
node.subcategories.forEach((child) => collectSitesRecursively(child, output));
|
||
if (Array.isArray(node.groups))
|
||
node.groups.forEach((child) => collectSitesRecursively(child, output));
|
||
if (Array.isArray(node.subgroups))
|
||
node.subgroups.forEach((child) => collectSitesRecursively(child, output));
|
||
|
||
if (Array.isArray(node.sites)) {
|
||
node.sites.forEach((site) => {
|
||
if (site && typeof site === 'object') output.push(site);
|
||
});
|
||
}
|
||
}
|
||
|
||
function buildFlatSitesFromCategories(categories) {
|
||
const out = [];
|
||
if (!Array.isArray(categories)) return out;
|
||
categories.forEach((category) => collectSitesRecursively(category, out));
|
||
return out;
|
||
}
|
||
|
||
async function syncArticlesForPage(pageId, pageConfig, config, settings) {
|
||
const sourceSites = Array.isArray(pageConfig && pageConfig.sites)
|
||
? pageConfig.sites
|
||
: buildFlatSitesFromCategories(
|
||
pageConfig && Array.isArray(pageConfig.categories) ? pageConfig.categories : []
|
||
);
|
||
|
||
const elapsedMs = startTimer();
|
||
const startedAt = Date.now();
|
||
const deadlineTs = startedAt + settings.fetch.totalTimeoutMs;
|
||
|
||
const parser = new Parser({
|
||
timeout: settings.fetch.timeoutMs,
|
||
});
|
||
|
||
const results = await mapWithConcurrency(sourceSites, settings.fetch.concurrency, async (site) =>
|
||
processSourceSite(site, settings, parser, deadlineTs)
|
||
);
|
||
|
||
const sites = [];
|
||
const articles = [];
|
||
const seen = new Set();
|
||
|
||
for (const r of results) {
|
||
if (!r || r.error) continue;
|
||
if (r.site) sites.push(r.site);
|
||
if (Array.isArray(r.articles)) {
|
||
for (const a of r.articles) {
|
||
if (!a || !a.url) continue;
|
||
if (seen.has(a.url)) continue;
|
||
seen.add(a.url);
|
||
articles.push(a);
|
||
}
|
||
}
|
||
}
|
||
|
||
articles.sort((a, b) => {
|
||
const ta = a.publishedAt ? new Date(a.publishedAt).getTime() : 0;
|
||
const tb = b.publishedAt ? new Date(b.publishedAt).getTime() : 0;
|
||
return tb - ta;
|
||
});
|
||
|
||
const limitedArticles = articles.slice(0, settings.articles.total);
|
||
|
||
const successSites = sites.filter((s) => s.status === 'success').length;
|
||
const failedSites = sites.filter((s) => s.status === 'failed').length;
|
||
const skippedSites = sites.filter((s) => s.status === 'skipped').length;
|
||
|
||
const cache = {
|
||
version: '1.0',
|
||
pageId,
|
||
generatedAt: new Date().toISOString(),
|
||
title: pageConfig && pageConfig.title ? String(pageConfig.title) : '',
|
||
sites,
|
||
articles: limitedArticles,
|
||
stats: {
|
||
totalSites: sourceSites.length,
|
||
successSites,
|
||
failedSites,
|
||
skippedSites,
|
||
totalArticles: limitedArticles.length,
|
||
durationMs: elapsedMs(),
|
||
},
|
||
};
|
||
|
||
const cacheDir = path.resolve(process.cwd(), settings.cacheDir);
|
||
fs.mkdirSync(cacheDir, { recursive: true });
|
||
|
||
const cachePath = path.join(cacheDir, `${pageId}.feed-cache.json`);
|
||
fs.writeFileSync(cachePath, JSON.stringify(cache, null, 2));
|
||
|
||
return { cachePath, cache };
|
||
}
|
||
|
||
function pickArticlesPages(config, onlyPageId) {
|
||
const pages = [];
|
||
const nav = Array.isArray(config.navigation) ? config.navigation : [];
|
||
|
||
for (const item of nav) {
|
||
const pageId = item && item.id ? String(item.id) : '';
|
||
if (!pageId) continue;
|
||
if (onlyPageId && pageId !== onlyPageId) continue;
|
||
|
||
const pageConfig = config[pageId];
|
||
if (!pageConfig || typeof pageConfig !== 'object') continue;
|
||
|
||
const templateName = pageConfig.template ? String(pageConfig.template) : pageId;
|
||
if (templateName !== 'articles') continue;
|
||
|
||
pages.push({ pageId, pageConfig });
|
||
}
|
||
|
||
return pages;
|
||
}
|
||
|
||
async function main() {
|
||
const elapsedMs = startTimer();
|
||
const args = process.argv.slice(2);
|
||
const pageArgIndex = args.findIndex((a) => a === '--page');
|
||
const onlyPageId = pageArgIndex >= 0 ? args[pageArgIndex + 1] : null;
|
||
|
||
log.info('开始', { page: onlyPageId || '' });
|
||
|
||
const config = loadConfig();
|
||
const settings = getRssSettings(config);
|
||
|
||
if (!settings.enabled) {
|
||
log.ok('RSS 已禁用,跳过', { env: 'RSS_ENABLED=false' });
|
||
return;
|
||
}
|
||
|
||
const pages = pickArticlesPages(config, onlyPageId);
|
||
if (pages.length === 0) {
|
||
log.ok('未找到需要同步的 articles 页面,跳过');
|
||
return;
|
||
}
|
||
|
||
log.info('准备同步 articles 页面缓存', { pages: pages.length });
|
||
|
||
let success = 0;
|
||
let failed = 0;
|
||
|
||
for (const { pageId, pageConfig } of pages) {
|
||
try {
|
||
// eslint-disable-next-line no-await-in-loop
|
||
const { cachePath, cache } = await syncArticlesForPage(pageId, pageConfig, config, settings);
|
||
success += 1;
|
||
log.ok('已生成缓存', {
|
||
page: pageId,
|
||
cache: cachePath,
|
||
articles: cache && cache.stats ? cache.stats.totalArticles : '',
|
||
sites: cache && cache.stats ? cache.stats.totalSites : '',
|
||
});
|
||
} catch (e) {
|
||
failed += 1;
|
||
log.warn('页面同步失败,已跳过(best-effort)', {
|
||
page: pageId,
|
||
message: e && e.message ? e.message : String(e),
|
||
});
|
||
if (isVerbose() && e && e.stack) console.error(e.stack);
|
||
// best-effort:不阻断其他页面/后续 build
|
||
}
|
||
}
|
||
|
||
log.ok('完成', { ms: elapsedMs(), pages: pages.length, success, failed });
|
||
}
|
||
|
||
if (require.main === module) {
|
||
main().catch((err) => {
|
||
log.error('执行失败(best-effort,不阻断后续 build/deploy)', {
|
||
message: err && err.message ? err.message : String(err),
|
||
});
|
||
if (isVerbose() && err && err.stack) console.error(err.stack);
|
||
// best-effort:不阻断后续 build/deploy(错误已输出到日志,便于排查)
|
||
process.exitCode = 0;
|
||
});
|
||
}
|
||
|
||
module.exports = {
|
||
getRssSettings,
|
||
isPrivateIp,
|
||
extractFeedLinksFromHtml,
|
||
stripHtmlToText,
|
||
normalizeArticle,
|
||
buildFlatSitesFromCategories,
|
||
};
|