feat: 页面模板差异化改进 + 配置优化 + 兼容清理 (#29)
- 首页判定:navigation 第一项 - 模板:page/projects/articles/bookmarks/search-results - bookmarks:update: YYYY-MM-DD | from: git|mtime - articles:RSS 聚合只读条目 + 分类聚合 + 影子写回结构 - projects:repo 卡片 + 可选热力图 + 自动抓取元信息 - 工作流:构建前 sync + schedule 定时刷新 - 移除兼容:config.yml/config.yaml、navigation.yml、home 特例 - 迁移说明:config/update-instructions.md
This commit is contained in:
687
scripts/sync-articles.js
Normal file
687
scripts/sync-articles.js
Normal file
@@ -0,0 +1,687 @@
|
||||
/* eslint-disable no-console */
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const dns = require('node:dns').promises;
|
||||
const net = require('node:net');
|
||||
const Parser = require('rss-parser');
|
||||
|
||||
const { loadConfig } = require('../src/generator.js');
|
||||
|
||||
const DEFAULT_RSS_SETTINGS = {
|
||||
enabled: true,
|
||||
cacheDir: 'dev',
|
||||
fetch: {
|
||||
timeoutMs: 10_000,
|
||||
maxRetries: 1,
|
||||
concurrency: 5,
|
||||
totalTimeoutMs: 60_000,
|
||||
maxRedirects: 3,
|
||||
userAgent: 'MeNavRSSSync/1.0',
|
||||
htmlMaxBytes: 512 * 1024,
|
||||
feedMaxBytes: 1024 * 1024
|
||||
},
|
||||
articles: {
|
||||
perSite: 8,
|
||||
total: 50,
|
||||
summaryMaxLength: 200
|
||||
}
|
||||
};
|
||||
|
||||
function parseBooleanEnv(value, fallback) {
|
||||
if (value === undefined || value === null || value === '') return fallback;
|
||||
const v = String(value).trim().toLowerCase();
|
||||
if (v === '1' || v === 'true' || v === 'yes' || v === 'y') return true;
|
||||
if (v === '0' || v === 'false' || v === 'no' || v === 'n') return false;
|
||||
return fallback;
|
||||
}
|
||||
|
||||
function parseIntegerEnv(value, fallback) {
|
||||
if (value === undefined || value === null || value === '') return fallback;
|
||||
const n = Number.parseInt(String(value), 10);
|
||||
return Number.isFinite(n) ? n : fallback;
|
||||
}
|
||||
|
||||
function getRssSettings(config) {
|
||||
const fromConfig = (config && config.site && config.site.rss && typeof config.site.rss === 'object')
|
||||
? config.site.rss
|
||||
: {};
|
||||
|
||||
const merged = {
|
||||
...DEFAULT_RSS_SETTINGS,
|
||||
...fromConfig,
|
||||
fetch: {
|
||||
...DEFAULT_RSS_SETTINGS.fetch,
|
||||
...(fromConfig.fetch || {})
|
||||
},
|
||||
articles: {
|
||||
...DEFAULT_RSS_SETTINGS.articles,
|
||||
...(fromConfig.articles || {})
|
||||
}
|
||||
};
|
||||
|
||||
// 环境变量覆盖(主要给 CI 调试/降级用)
|
||||
merged.enabled = parseBooleanEnv(process.env.RSS_ENABLED, merged.enabled);
|
||||
merged.cacheDir = process.env.RSS_CACHE_DIR ? String(process.env.RSS_CACHE_DIR) : merged.cacheDir;
|
||||
|
||||
merged.fetch.timeoutMs = parseIntegerEnv(process.env.RSS_FETCH_TIMEOUT, merged.fetch.timeoutMs);
|
||||
merged.fetch.maxRetries = parseIntegerEnv(process.env.RSS_FETCH_MAX_RETRIES, merged.fetch.maxRetries);
|
||||
merged.fetch.concurrency = parseIntegerEnv(process.env.RSS_FETCH_CONCURRENCY, merged.fetch.concurrency);
|
||||
merged.fetch.totalTimeoutMs = parseIntegerEnv(process.env.RSS_TOTAL_TIMEOUT, merged.fetch.totalTimeoutMs);
|
||||
merged.fetch.maxRedirects = parseIntegerEnv(process.env.RSS_FETCH_MAX_REDIRECTS, merged.fetch.maxRedirects);
|
||||
|
||||
merged.articles.perSite = parseIntegerEnv(process.env.RSS_ARTICLES_PER_SITE, merged.articles.perSite);
|
||||
merged.articles.total = parseIntegerEnv(process.env.RSS_ARTICLES_TOTAL, merged.articles.total);
|
||||
merged.articles.summaryMaxLength = parseIntegerEnv(
|
||||
process.env.RSS_SUMMARY_MAX_LENGTH,
|
||||
merged.articles.summaryMaxLength
|
||||
);
|
||||
|
||||
// 兜底约束:避免奇怪配置导致卡死/爆内存
|
||||
merged.fetch.timeoutMs = Math.max(1_000, merged.fetch.timeoutMs);
|
||||
merged.fetch.totalTimeoutMs = Math.max(5_000, merged.fetch.totalTimeoutMs);
|
||||
merged.fetch.concurrency = Math.max(1, Math.min(20, merged.fetch.concurrency));
|
||||
merged.fetch.maxRetries = Math.max(0, Math.min(3, merged.fetch.maxRetries));
|
||||
merged.fetch.maxRedirects = Math.max(0, Math.min(10, merged.fetch.maxRedirects));
|
||||
|
||||
merged.articles.perSite = Math.max(1, Math.min(50, merged.articles.perSite));
|
||||
merged.articles.total = Math.max(1, Math.min(500, merged.articles.total));
|
||||
merged.articles.summaryMaxLength = Math.max(0, Math.min(2_000, merged.articles.summaryMaxLength));
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
function isHttpUrl(url) {
|
||||
if (!url) return false;
|
||||
try {
|
||||
const u = new URL(String(url));
|
||||
return u.protocol === 'http:' || u.protocol === 'https:';
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function isPrivateIp(ip) {
|
||||
if (!ip) return true;
|
||||
|
||||
if (net.isIP(ip) === 4) {
|
||||
const parts = ip.split('.').map(n => Number.parseInt(n, 10));
|
||||
if (parts.length !== 4 || parts.some(n => !Number.isFinite(n) || n < 0 || n > 255)) return true;
|
||||
|
||||
const [a, b] = parts;
|
||||
if (a === 10) return true;
|
||||
if (a === 127) return true;
|
||||
if (a === 0) return true;
|
||||
if (a === 169 && b === 254) return true;
|
||||
if (a === 172 && b >= 16 && b <= 31) return true;
|
||||
if (a === 192 && b === 168) return true;
|
||||
if (a >= 224) return true; // 组播/保留
|
||||
return false;
|
||||
}
|
||||
|
||||
if (net.isIP(ip) === 6) {
|
||||
const normalized = String(ip).toLowerCase();
|
||||
if (normalized === '::1') return true;
|
||||
if (normalized.startsWith('fe80:')) return true; // link-local
|
||||
if (normalized.startsWith('fc') || normalized.startsWith('fd')) return true; // ULA
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
async function withTimeout(promise, timeoutMs, label) {
|
||||
let timer;
|
||||
try {
|
||||
const timeout = new Promise((_, reject) => {
|
||||
timer = setTimeout(() => reject(new Error(`${label} 超时(${timeoutMs}ms)`)), timeoutMs);
|
||||
});
|
||||
return await Promise.race([promise, timeout]);
|
||||
} finally {
|
||||
if (timer) clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
async function assertSafeToFetch(url, timeoutMs) {
|
||||
const u = new URL(String(url));
|
||||
if (u.protocol !== 'http:' && u.protocol !== 'https:') {
|
||||
throw new Error(`仅允许 http/https:${u.protocol}`);
|
||||
}
|
||||
|
||||
if (u.username || u.password) {
|
||||
throw new Error('禁止包含用户名/密码的 URL');
|
||||
}
|
||||
|
||||
const hostname = u.hostname.toLowerCase();
|
||||
if (hostname === 'localhost' || hostname === '0.0.0.0' || hostname === '127.0.0.1' || hostname === '::1') {
|
||||
throw new Error('禁止访问本机地址');
|
||||
}
|
||||
if (hostname.endsWith('.local')) {
|
||||
throw new Error('禁止访问 .local 域名');
|
||||
}
|
||||
|
||||
if (net.isIP(hostname)) {
|
||||
if (isPrivateIp(hostname)) throw new Error('禁止访问内网/保留 IP');
|
||||
return;
|
||||
}
|
||||
|
||||
// 解析域名,阻断解析到内网的情况(best-effort)
|
||||
const records = await withTimeout(
|
||||
dns.lookup(hostname, { all: true, verbatim: true }),
|
||||
Math.min(2_000, timeoutMs),
|
||||
`DNS 解析 ${hostname}`
|
||||
);
|
||||
|
||||
if (!Array.isArray(records) || records.length === 0) {
|
||||
throw new Error('DNS 解析失败或无结果');
|
||||
}
|
||||
|
||||
const hasPrivate = records.some(r => isPrivateIp(r.address));
|
||||
if (hasPrivate) throw new Error('DNS 解析到内网/保留地址,已阻断');
|
||||
}
|
||||
|
||||
function buildHeaders(userAgent) {
|
||||
return {
|
||||
'user-agent': userAgent,
|
||||
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchWithRedirects(url, { timeoutMs, maxRedirects, headers, maxBytes }) {
|
||||
let current = String(url);
|
||||
for (let i = 0; i <= maxRedirects; i += 1) {
|
||||
await assertSafeToFetch(current, timeoutMs);
|
||||
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
let response;
|
||||
try {
|
||||
response = await fetch(current, {
|
||||
method: 'GET',
|
||||
redirect: 'manual',
|
||||
headers,
|
||||
signal: controller.signal
|
||||
});
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
|
||||
const status = response.status;
|
||||
if (status >= 300 && status < 400) {
|
||||
const location = response.headers.get('location');
|
||||
if (!location) throw new Error(`重定向缺少 Location(${status})`);
|
||||
current = new URL(location, current).toString();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${status}`);
|
||||
}
|
||||
|
||||
const text = await readResponseTextWithLimit(response, maxBytes);
|
||||
return { url: current, response, text };
|
||||
}
|
||||
|
||||
throw new Error(`重定向次数超过上限(${maxRedirects})`);
|
||||
}
|
||||
|
||||
async function readResponseTextWithLimit(response, maxBytes) {
|
||||
if (!response.body || typeof response.body.getReader !== 'function') {
|
||||
const text = await response.text();
|
||||
if (Buffer.byteLength(text, 'utf8') > maxBytes) {
|
||||
throw new Error('响应体过大');
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder('utf-8');
|
||||
let received = 0;
|
||||
let text = '';
|
||||
|
||||
while (true) {
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
received += value.byteLength;
|
||||
if (received > maxBytes) {
|
||||
try {
|
||||
reader.cancel();
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
throw new Error('响应体过大');
|
||||
}
|
||||
text += decoder.decode(value, { stream: true });
|
||||
}
|
||||
text += decoder.decode();
|
||||
return text;
|
||||
}
|
||||
|
||||
function extractFeedLinksFromHtml(html, baseUrl) {
|
||||
const candidates = [];
|
||||
if (!html) return candidates;
|
||||
|
||||
const linkTags = String(html).match(/<link\b[^>]*>/gi) || [];
|
||||
for (const tag of linkTags) {
|
||||
const rel = /rel\s*=\s*["']([^"']+)["']/i.exec(tag)?.[1] || '';
|
||||
if (!/alternate/i.test(rel)) continue;
|
||||
|
||||
const type = /type\s*=\s*["']([^"']+)["']/i.exec(tag)?.[1] || '';
|
||||
const isFeedType = /application\/(rss|atom)\+xml/i.test(type) || /(rss|atom)/i.test(type);
|
||||
if (!isFeedType) continue;
|
||||
|
||||
const href = /href\s*=\s*["']([^"']+)["']/i.exec(tag)?.[1];
|
||||
if (!href) continue;
|
||||
|
||||
try {
|
||||
const resolved = new URL(href, baseUrl).toString();
|
||||
if (isHttpUrl(resolved)) candidates.push(resolved);
|
||||
} catch {
|
||||
// ignore bad url
|
||||
}
|
||||
}
|
||||
|
||||
// 简单排序:优先 RSS,其次 Atom
|
||||
const rank = url => (url.includes('atom') ? 2 : 1);
|
||||
return [...new Set(candidates)].sort((a, b) => rank(a) - rank(b));
|
||||
}
|
||||
|
||||
function buildCommonFeedUrls(siteUrl) {
|
||||
const common = ['/feed', '/rss.xml', '/rss', '/atom.xml', '/atom', '/feed.xml'];
|
||||
const out = [];
|
||||
for (const p of common) {
|
||||
try {
|
||||
const u = new URL(p, siteUrl).toString();
|
||||
out.push(u);
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
async function discoverFeedUrl(siteUrl, settings, deadlineTs) {
|
||||
const timeRemaining = deadlineTs - Date.now();
|
||||
if (timeRemaining <= 0) throw new Error('总超时:无法继续发现 RSS');
|
||||
|
||||
const homepage = await fetchWithRedirects(siteUrl, {
|
||||
timeoutMs: Math.min(settings.fetch.timeoutMs, timeRemaining),
|
||||
maxRedirects: settings.fetch.maxRedirects,
|
||||
headers: buildHeaders(settings.fetch.userAgent),
|
||||
maxBytes: settings.fetch.htmlMaxBytes
|
||||
});
|
||||
|
||||
const contentType = homepage.response.headers.get('content-type') || '';
|
||||
if (/text\/html/i.test(contentType) || /application\/xhtml\+xml/i.test(contentType) || !contentType) {
|
||||
const candidates = extractFeedLinksFromHtml(homepage.text, homepage.url);
|
||||
if (candidates.length > 0) {
|
||||
return candidates[0];
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function stripHtmlToText(input) {
|
||||
const raw = String(input || '');
|
||||
const withoutTags = raw.replace(/<script[\s\S]*?<\/script>/gi, '')
|
||||
.replace(/<style[\s\S]*?<\/style>/gi, '')
|
||||
.replace(/<[^>]+>/g, ' ');
|
||||
|
||||
const decoded = withoutTags
|
||||
.replace(/ /gi, ' ')
|
||||
.replace(/&/gi, '&')
|
||||
.replace(/</gi, '<')
|
||||
.replace(/>/gi, '>')
|
||||
.replace(/"/gi, '"')
|
||||
.replace(/�?39;/g, "'")
|
||||
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCodePoint(Number.parseInt(hex, 16)))
|
||||
.replace(/&#(\d+);/g, (_, num) => String.fromCodePoint(Number.parseInt(num, 10)));
|
||||
|
||||
return decoded.replace(/\s+/g, ' ').trim();
|
||||
}
|
||||
|
||||
function truncateText(text, maxLen) {
|
||||
if (!maxLen || maxLen <= 0) return '';
|
||||
const s = String(text || '');
|
||||
if (s.length <= maxLen) return s;
|
||||
return s.slice(0, maxLen) + '...';
|
||||
}
|
||||
|
||||
function normalizePublishedAt(item) {
|
||||
const iso = item && typeof item.isoDate === 'string' ? item.isoDate : '';
|
||||
if (iso) return iso;
|
||||
|
||||
const pub = item && typeof item.pubDate === 'string' ? item.pubDate : '';
|
||||
if (pub) {
|
||||
const d = new Date(pub);
|
||||
if (!Number.isNaN(d.getTime())) return d.toISOString();
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
function normalizeArticle(item, sourceSite, settings) {
|
||||
const title = (item && item.title !== undefined) ? String(item.title).trim() : '';
|
||||
if (!title) return null;
|
||||
|
||||
const link = item && item.link ? String(item.link).trim() : '';
|
||||
if (!isHttpUrl(link)) return null;
|
||||
|
||||
const summaryRaw =
|
||||
(item && item.contentSnippet) ||
|
||||
(item && item.summary) ||
|
||||
(item && item.content) ||
|
||||
'';
|
||||
const summaryText = stripHtmlToText(summaryRaw);
|
||||
const summary = settings.articles.summaryMaxLength
|
||||
? truncateText(summaryText, settings.articles.summaryMaxLength)
|
||||
: summaryText;
|
||||
|
||||
const publishedAt = normalizePublishedAt(item);
|
||||
|
||||
const source = sourceSite && sourceSite.name ? String(sourceSite.name) : '';
|
||||
const sourceUrl = sourceSite && sourceSite.url ? String(sourceSite.url) : '';
|
||||
const icon = sourceSite && sourceSite.icon ? String(sourceSite.icon) : 'fas fa-pen';
|
||||
|
||||
return {
|
||||
title,
|
||||
url: link,
|
||||
summary,
|
||||
publishedAt,
|
||||
source,
|
||||
// 站点首页 URL(用于生成端按分类聚合展示;文章 url 为具体文章链接)
|
||||
sourceUrl,
|
||||
icon
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchAndParseFeed(feedUrl, settings, parser, deadlineTs) {
|
||||
const timeRemaining = deadlineTs - Date.now();
|
||||
if (timeRemaining <= 0) throw new Error('总超时:无法继续抓取 Feed');
|
||||
|
||||
const feed = await fetchWithRedirects(feedUrl, {
|
||||
timeoutMs: Math.min(settings.fetch.timeoutMs, timeRemaining),
|
||||
maxRedirects: settings.fetch.maxRedirects,
|
||||
headers: {
|
||||
...buildHeaders(settings.fetch.userAgent),
|
||||
accept: 'application/rss+xml,application/atom+xml,application/xml,text/xml;q=0.9,*/*;q=0.8'
|
||||
},
|
||||
maxBytes: settings.fetch.feedMaxBytes
|
||||
});
|
||||
|
||||
const parsed = await parser.parseString(feed.text);
|
||||
return { feedUrl: feed.url, feedTitle: parsed.title || '', items: Array.isArray(parsed.items) ? parsed.items : [] };
|
||||
}
|
||||
|
||||
async function processSourceSite(sourceSite, settings, parser, deadlineTs) {
|
||||
const url = sourceSite && sourceSite.url ? String(sourceSite.url) : '';
|
||||
if (!isHttpUrl(url)) {
|
||||
return {
|
||||
site: {
|
||||
name: sourceSite && sourceSite.name ? String(sourceSite.name) : '',
|
||||
url,
|
||||
feedUrl: '',
|
||||
status: 'skipped',
|
||||
error: '无效 URL(需为 http/https)',
|
||||
fetchedAt: new Date().toISOString()
|
||||
},
|
||||
articles: []
|
||||
};
|
||||
}
|
||||
|
||||
let lastError = null;
|
||||
|
||||
const tryOnce = async feedUrl => {
|
||||
const parsed = await fetchAndParseFeed(feedUrl, settings, parser, deadlineTs);
|
||||
const normalized = parsed.items
|
||||
.map(item => normalizeArticle(item, sourceSite, settings))
|
||||
.filter(Boolean)
|
||||
.slice(0, settings.articles.perSite);
|
||||
return { feedUrl: parsed.feedUrl, articles: normalized };
|
||||
};
|
||||
|
||||
const attempt = async () => {
|
||||
const discovered = await discoverFeedUrl(url, settings, deadlineTs);
|
||||
const candidates = discovered ? [discovered, ...buildCommonFeedUrls(url)] : buildCommonFeedUrls(url);
|
||||
|
||||
for (const candidate of [...new Set(candidates)]) {
|
||||
try {
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const res = await tryOnce(candidate);
|
||||
return res;
|
||||
} catch (e) {
|
||||
lastError = e;
|
||||
}
|
||||
}
|
||||
throw lastError || new Error('未找到可用 Feed');
|
||||
};
|
||||
|
||||
const startedAt = Date.now();
|
||||
for (let i = 0; i <= settings.fetch.maxRetries; i += 1) {
|
||||
try {
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const res = await attempt();
|
||||
return {
|
||||
site: {
|
||||
name: sourceSite && sourceSite.name ? String(sourceSite.name) : '',
|
||||
url,
|
||||
feedUrl: res.feedUrl,
|
||||
status: 'success',
|
||||
error: '',
|
||||
fetchedAt: new Date().toISOString(),
|
||||
durationMs: Date.now() - startedAt
|
||||
},
|
||||
articles: res.articles
|
||||
};
|
||||
} catch (e) {
|
||||
lastError = e;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
site: {
|
||||
name: sourceSite && sourceSite.name ? String(sourceSite.name) : '',
|
||||
url,
|
||||
feedUrl: '',
|
||||
status: 'failed',
|
||||
error: lastError ? String(lastError.message || lastError) : '未知错误',
|
||||
fetchedAt: new Date().toISOString(),
|
||||
durationMs: Date.now() - startedAt
|
||||
},
|
||||
articles: []
|
||||
};
|
||||
}
|
||||
|
||||
async function mapWithConcurrency(items, concurrency, worker) {
|
||||
const results = new Array(items.length);
|
||||
let nextIndex = 0;
|
||||
|
||||
async function runOne() {
|
||||
while (nextIndex < items.length) {
|
||||
const currentIndex = nextIndex;
|
||||
nextIndex += 1;
|
||||
|
||||
try {
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
results[currentIndex] = await worker(items[currentIndex], currentIndex);
|
||||
} catch (e) {
|
||||
results[currentIndex] = { error: e };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const runners = [];
|
||||
const count = Math.max(1, Math.min(concurrency, items.length));
|
||||
for (let i = 0; i < count; i += 1) {
|
||||
runners.push(runOne());
|
||||
}
|
||||
await Promise.all(runners);
|
||||
return results;
|
||||
}
|
||||
|
||||
function collectSitesRecursively(node, output) {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
|
||||
if (Array.isArray(node.subcategories)) node.subcategories.forEach(child => collectSitesRecursively(child, output));
|
||||
if (Array.isArray(node.groups)) node.groups.forEach(child => collectSitesRecursively(child, output));
|
||||
if (Array.isArray(node.subgroups)) node.subgroups.forEach(child => collectSitesRecursively(child, output));
|
||||
|
||||
if (Array.isArray(node.sites)) {
|
||||
node.sites.forEach(site => {
|
||||
if (site && typeof site === 'object') output.push(site);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function buildFlatSitesFromCategories(categories) {
|
||||
const out = [];
|
||||
if (!Array.isArray(categories)) return out;
|
||||
categories.forEach(category => collectSitesRecursively(category, out));
|
||||
return out;
|
||||
}
|
||||
|
||||
async function syncArticlesForPage(pageId, pageConfig, config, settings) {
|
||||
const sourceSites = Array.isArray(pageConfig && pageConfig.sites)
|
||||
? pageConfig.sites
|
||||
: buildFlatSitesFromCategories(pageConfig && Array.isArray(pageConfig.categories) ? pageConfig.categories : []);
|
||||
|
||||
const startedAt = Date.now();
|
||||
const deadlineTs = startedAt + settings.fetch.totalTimeoutMs;
|
||||
|
||||
const parser = new Parser({
|
||||
timeout: settings.fetch.timeoutMs
|
||||
});
|
||||
|
||||
const results = await mapWithConcurrency(
|
||||
sourceSites,
|
||||
settings.fetch.concurrency,
|
||||
async site => processSourceSite(site, settings, parser, deadlineTs)
|
||||
);
|
||||
|
||||
const sites = [];
|
||||
const articles = [];
|
||||
const seen = new Set();
|
||||
|
||||
for (const r of results) {
|
||||
if (!r || r.error) continue;
|
||||
if (r.site) sites.push(r.site);
|
||||
if (Array.isArray(r.articles)) {
|
||||
for (const a of r.articles) {
|
||||
if (!a || !a.url) continue;
|
||||
if (seen.has(a.url)) continue;
|
||||
seen.add(a.url);
|
||||
articles.push(a);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
articles.sort((a, b) => {
|
||||
const ta = a.publishedAt ? new Date(a.publishedAt).getTime() : 0;
|
||||
const tb = b.publishedAt ? new Date(b.publishedAt).getTime() : 0;
|
||||
return tb - ta;
|
||||
});
|
||||
|
||||
const limitedArticles = articles.slice(0, settings.articles.total);
|
||||
|
||||
const successSites = sites.filter(s => s.status === 'success').length;
|
||||
const failedSites = sites.filter(s => s.status === 'failed').length;
|
||||
const skippedSites = sites.filter(s => s.status === 'skipped').length;
|
||||
|
||||
const cache = {
|
||||
version: '1.0',
|
||||
pageId,
|
||||
generatedAt: new Date().toISOString(),
|
||||
title: pageConfig && pageConfig.title ? String(pageConfig.title) : '',
|
||||
sites,
|
||||
articles: limitedArticles,
|
||||
stats: {
|
||||
totalSites: sourceSites.length,
|
||||
successSites,
|
||||
failedSites,
|
||||
skippedSites,
|
||||
totalArticles: limitedArticles.length,
|
||||
durationMs: Date.now() - startedAt
|
||||
}
|
||||
};
|
||||
|
||||
const cacheDir = path.resolve(process.cwd(), settings.cacheDir);
|
||||
fs.mkdirSync(cacheDir, { recursive: true });
|
||||
|
||||
const cachePath = path.join(cacheDir, `${pageId}.feed-cache.json`);
|
||||
fs.writeFileSync(cachePath, JSON.stringify(cache, null, 2));
|
||||
|
||||
return { cachePath, cache };
|
||||
}
|
||||
|
||||
function pickArticlesPages(config, onlyPageId) {
|
||||
const pages = [];
|
||||
const nav = Array.isArray(config.navigation) ? config.navigation : [];
|
||||
|
||||
for (const item of nav) {
|
||||
const pageId = item && item.id ? String(item.id) : '';
|
||||
if (!pageId) continue;
|
||||
if (onlyPageId && pageId !== onlyPageId) continue;
|
||||
|
||||
const pageConfig = config[pageId];
|
||||
if (!pageConfig || typeof pageConfig !== 'object') continue;
|
||||
|
||||
const templateName = pageConfig.template ? String(pageConfig.template) : pageId;
|
||||
if (templateName !== 'articles') continue;
|
||||
|
||||
pages.push({ pageId, pageConfig });
|
||||
}
|
||||
|
||||
return pages;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
const pageArgIndex = args.findIndex(a => a === '--page');
|
||||
const onlyPageId = pageArgIndex >= 0 ? args[pageArgIndex + 1] : null;
|
||||
|
||||
const config = loadConfig();
|
||||
const settings = getRssSettings(config);
|
||||
|
||||
if (!settings.enabled) {
|
||||
console.log('[INFO] RSS 已禁用(RSS_ENABLED=false),跳过。');
|
||||
return;
|
||||
}
|
||||
|
||||
const pages = pickArticlesPages(config, onlyPageId);
|
||||
if (pages.length === 0) {
|
||||
console.log('[INFO] 未找到需要同步的 articles 页面。');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[INFO] 准备同步 ${pages.length} 个 articles 页面缓存…`);
|
||||
|
||||
for (const { pageId, pageConfig } of pages) {
|
||||
try {
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const { cachePath, cache } = await syncArticlesForPage(pageId, pageConfig, config, settings);
|
||||
console.log(`[INFO] 已生成缓存:${cachePath}(articles=${cache.stats.totalArticles}, sites=${cache.stats.totalSites})`);
|
||||
} catch (e) {
|
||||
console.warn(`[WARN] 页面 ${pageId} 同步失败:${e.message || e}`);
|
||||
// best-effort:不阻断其他页面/后续 build
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (require.main === module) {
|
||||
main().catch(err => {
|
||||
console.error('[ERROR] sync-articles 执行失败:', err);
|
||||
// best-effort:除非是非常规异常,否则不阻断 CI;此处仍保留非 0 退出码便于本地排查
|
||||
process.exitCode = 1;
|
||||
});
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getRssSettings,
|
||||
isPrivateIp,
|
||||
extractFeedLinksFromHtml,
|
||||
stripHtmlToText,
|
||||
normalizeArticle,
|
||||
buildFlatSitesFromCategories
|
||||
};
|
||||
269
scripts/sync-projects.js
Normal file
269
scripts/sync-projects.js
Normal file
@@ -0,0 +1,269 @@
|
||||
/* eslint-disable no-console */
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const { loadConfig } = require('../src/generator.js');
|
||||
|
||||
const DEFAULT_SETTINGS = {
|
||||
enabled: true,
|
||||
cacheDir: 'dev',
|
||||
fetch: {
|
||||
timeoutMs: 10_000,
|
||||
concurrency: 4,
|
||||
userAgent: 'MeNavProjectsSync/1.0'
|
||||
},
|
||||
colors: {
|
||||
url: 'https://raw.githubusercontent.com/ozh/github-colors/master/colors.json',
|
||||
maxAgeMs: 7 * 24 * 60 * 60 * 1000
|
||||
}
|
||||
};
|
||||
|
||||
function parseBooleanEnv(value, fallback) {
|
||||
if (value === undefined || value === null || value === '') return fallback;
|
||||
const v = String(value).trim().toLowerCase();
|
||||
if (v === '1' || v === 'true' || v === 'yes' || v === 'y') return true;
|
||||
if (v === '0' || v === 'false' || v === 'no' || v === 'n') return false;
|
||||
return fallback;
|
||||
}
|
||||
|
||||
function parseIntegerEnv(value, fallback) {
|
||||
if (value === undefined || value === null || value === '') return fallback;
|
||||
const n = Number.parseInt(String(value), 10);
|
||||
return Number.isFinite(n) ? n : fallback;
|
||||
}
|
||||
|
||||
function getSettings(config) {
|
||||
const fromConfig =
|
||||
config && config.site && config.site.github && typeof config.site.github === 'object' ? config.site.github : {};
|
||||
|
||||
const merged = {
|
||||
...DEFAULT_SETTINGS,
|
||||
...fromConfig,
|
||||
fetch: {
|
||||
...DEFAULT_SETTINGS.fetch,
|
||||
...(fromConfig.fetch || {})
|
||||
},
|
||||
colors: {
|
||||
...DEFAULT_SETTINGS.colors,
|
||||
...(fromConfig.colors || {})
|
||||
}
|
||||
};
|
||||
|
||||
merged.enabled = parseBooleanEnv(process.env.PROJECTS_ENABLED, merged.enabled);
|
||||
merged.cacheDir = process.env.PROJECTS_CACHE_DIR ? String(process.env.PROJECTS_CACHE_DIR) : merged.cacheDir;
|
||||
merged.fetch.timeoutMs = parseIntegerEnv(process.env.PROJECTS_FETCH_TIMEOUT, merged.fetch.timeoutMs);
|
||||
merged.fetch.concurrency = parseIntegerEnv(process.env.PROJECTS_FETCH_CONCURRENCY, merged.fetch.concurrency);
|
||||
|
||||
merged.fetch.timeoutMs = Math.max(1_000, merged.fetch.timeoutMs);
|
||||
merged.fetch.concurrency = Math.max(1, Math.min(10, merged.fetch.concurrency));
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
function ensureDir(dirPath) {
|
||||
fs.mkdirSync(dirPath, { recursive: true });
|
||||
}
|
||||
|
||||
function isGithubRepoUrl(url) {
|
||||
if (!url) return null;
|
||||
try {
|
||||
const u = new URL(String(url));
|
||||
if (u.protocol !== 'https:' && u.protocol !== 'http:') return null;
|
||||
if (u.hostname.toLowerCase() !== 'github.com') return null;
|
||||
const parts = u.pathname.split('/').filter(Boolean);
|
||||
if (parts.length < 2) return null;
|
||||
const owner = parts[0];
|
||||
const repo = parts[1].replace(/\.git$/i, '');
|
||||
if (!owner || !repo) return null;
|
||||
return { owner, repo, canonicalUrl: `https://github.com/${owner}/${repo}` };
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function collectSitesRecursively(node, output) {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
if (Array.isArray(node.subcategories)) node.subcategories.forEach(child => collectSitesRecursively(child, output));
|
||||
if (Array.isArray(node.groups)) node.groups.forEach(child => collectSitesRecursively(child, output));
|
||||
if (Array.isArray(node.subgroups)) node.subgroups.forEach(child => collectSitesRecursively(child, output));
|
||||
if (Array.isArray(node.sites)) node.sites.forEach(site => output.push(site));
|
||||
}
|
||||
|
||||
function findProjectsPages(config) {
|
||||
const pages = [];
|
||||
const nav = Array.isArray(config.navigation) ? config.navigation : [];
|
||||
nav.forEach(item => {
|
||||
const pageId = item && item.id ? String(item.id) : '';
|
||||
if (!pageId || !config[pageId]) return;
|
||||
const page = config[pageId];
|
||||
const templateName = page && page.template ? String(page.template) : pageId;
|
||||
if (templateName !== 'projects') return;
|
||||
pages.push({ pageId, page });
|
||||
});
|
||||
return pages;
|
||||
}
|
||||
|
||||
async function fetchJsonWithTimeout(url, { timeoutMs, headers }) {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||
try {
|
||||
const response = await fetch(url, { method: 'GET', headers, signal: controller.signal });
|
||||
if (!response.ok) throw new Error(`HTTP ${response.status}`);
|
||||
return await response.json();
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
async function loadLanguageColors(settings, cacheBaseDir) {
|
||||
const cachePath = path.join(cacheBaseDir, 'github-colors.json');
|
||||
|
||||
try {
|
||||
const stat = fs.existsSync(cachePath) ? fs.statSync(cachePath) : null;
|
||||
if (stat && stat.mtimeMs && Date.now() - stat.mtimeMs < settings.colors.maxAgeMs) {
|
||||
const raw = fs.readFileSync(cachePath, 'utf8');
|
||||
const parsed = JSON.parse(raw);
|
||||
if (parsed && typeof parsed === 'object') return parsed;
|
||||
}
|
||||
} catch {
|
||||
// 继续联网抓取
|
||||
}
|
||||
|
||||
try {
|
||||
const headers = { 'user-agent': settings.fetch.userAgent, accept: 'application/json' };
|
||||
const colors = await fetchJsonWithTimeout(settings.colors.url, { timeoutMs: settings.fetch.timeoutMs, headers });
|
||||
if (colors && typeof colors === 'object') {
|
||||
fs.writeFileSync(cachePath, JSON.stringify(colors, null, 2), 'utf8');
|
||||
return colors;
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(`[WARN] 获取语言颜色表失败(将不输出 languageColor):${String(error && error.message ? error.message : error)}`);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
async function fetchRepoMeta(repo, settings, colors) {
|
||||
const headers = {
|
||||
'user-agent': settings.fetch.userAgent,
|
||||
accept: 'application/vnd.github+json'
|
||||
};
|
||||
|
||||
const apiUrl = `https://api.github.com/repos/${repo.owner}/${repo.repo}`;
|
||||
const data = await fetchJsonWithTimeout(apiUrl, { timeoutMs: settings.fetch.timeoutMs, headers });
|
||||
|
||||
const language = data && data.language ? String(data.language) : '';
|
||||
const stars = data && Number.isFinite(data.stargazers_count) ? data.stargazers_count : null;
|
||||
const forks = data && Number.isFinite(data.forks_count) ? data.forks_count : null;
|
||||
|
||||
let languageColor = '';
|
||||
if (language && colors && colors[language] && colors[language].color) {
|
||||
languageColor = String(colors[language].color);
|
||||
}
|
||||
|
||||
return {
|
||||
url: repo.canonicalUrl,
|
||||
fullName: data && data.full_name ? String(data.full_name) : `${repo.owner}/${repo.repo}`,
|
||||
language,
|
||||
languageColor,
|
||||
stars,
|
||||
forks
|
||||
};
|
||||
}
|
||||
|
||||
async function runPool(items, concurrency, worker) {
|
||||
const results = [];
|
||||
let index = 0;
|
||||
|
||||
async function runOne() {
|
||||
while (index < items.length) {
|
||||
const current = items[index];
|
||||
index += 1;
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const result = await worker(current);
|
||||
if (result) results.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
const runners = Array.from({ length: Math.min(concurrency, items.length) }, () => runOne());
|
||||
await Promise.all(runners);
|
||||
return results;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const config = loadConfig();
|
||||
const settings = getSettings(config);
|
||||
|
||||
if (!settings.enabled) {
|
||||
console.log('[INFO] projects 仓库同步已禁用(PROJECTS_ENABLED=false)');
|
||||
return;
|
||||
}
|
||||
|
||||
const cacheBaseDir = path.isAbsolute(settings.cacheDir) ? settings.cacheDir : path.join(process.cwd(), settings.cacheDir);
|
||||
ensureDir(cacheBaseDir);
|
||||
|
||||
const colors = await loadLanguageColors(settings, cacheBaseDir);
|
||||
const pages = findProjectsPages(config);
|
||||
|
||||
if (!pages.length) {
|
||||
console.log('[INFO] 未找到 template=projects 的页面,跳过同步');
|
||||
return;
|
||||
}
|
||||
|
||||
for (const { pageId, page } of pages) {
|
||||
const categories = Array.isArray(page.categories) ? page.categories : [];
|
||||
const sites = [];
|
||||
categories.forEach(category => collectSitesRecursively(category, sites));
|
||||
|
||||
const repos = sites
|
||||
.map(site => (site && site.url ? isGithubRepoUrl(site.url) : null))
|
||||
.filter(Boolean);
|
||||
|
||||
const unique = new Map();
|
||||
repos.forEach(r => unique.set(r.canonicalUrl, r));
|
||||
const repoList = Array.from(unique.values());
|
||||
|
||||
if (!repoList.length) {
|
||||
console.log(`[INFO] 页面 ${pageId}:未发现 GitHub 仓库链接,跳过`);
|
||||
continue;
|
||||
}
|
||||
|
||||
let success = 0;
|
||||
let failed = 0;
|
||||
|
||||
const results = await runPool(repoList, settings.fetch.concurrency, async repo => {
|
||||
try {
|
||||
const meta = await fetchRepoMeta(repo, settings, colors);
|
||||
success += 1;
|
||||
return meta;
|
||||
} catch (error) {
|
||||
failed += 1;
|
||||
console.warn(`[WARN] 拉取失败:${repo.canonicalUrl}(${String(error && error.message ? error.message : error)})`);
|
||||
return null;
|
||||
}
|
||||
});
|
||||
|
||||
const payload = {
|
||||
version: '1.0',
|
||||
pageId,
|
||||
generatedAt: new Date().toISOString(),
|
||||
repos: results,
|
||||
stats: {
|
||||
totalRepos: repoList.length,
|
||||
success,
|
||||
failed
|
||||
}
|
||||
};
|
||||
|
||||
const cachePath = path.join(cacheBaseDir, `${pageId}.repo-cache.json`);
|
||||
fs.writeFileSync(cachePath, JSON.stringify(payload, null, 2), 'utf8');
|
||||
|
||||
console.log(`[INFO] 页面 ${pageId}:同步完成(成功 ${success} / 失败 ${failed}),写入缓存 ${cachePath}`);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(error => {
|
||||
console.error('[ERROR] projects 同步异常:', error);
|
||||
process.exitCode = 0; // best-effort:不阻断后续 build
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user