diff --git a/site.tar.gz b/site.tar.gz new file mode 100644 index 0000000..962abc8 Binary files /dev/null and b/site.tar.gz differ diff --git a/src/build.js b/src/build.js index f122a38..ac023f4 100644 --- a/src/build.js +++ b/src/build.js @@ -24,6 +24,7 @@ const CONTACT_EMAIL = process.env.CONTACT_EMAIL || 'signalledger@jopdorp.nl'; const PARENT_COMPANY = process.env.PARENT_COMPANY || 'Jopdorp'; const ADSENSE_CLIENT = process.env.ADSENSE_CLIENT || 'ca-pub-1269854634225826'; const ADSENSE_SLOT = process.env.ADSENSE_SLOT || '7019613848'; +const ADSENSE_IN_ARTICLE_SLOT = process.env.ADSENSE_IN_ARTICLE_SLOT || '9095112841'; const IMAGE_MODEL = process.env.OPENAI_IMAGE_MODEL || 'gpt-image-1'; const GENERATED_IMAGE_LIMIT = Number(process.env.GENERATED_IMAGE_LIMIT || 3); const parser = new Parser({ timeout: 15000 }); @@ -531,10 +532,43 @@ async function ensureArticleImage(item, imageMeta, client, { tryRemote = true, a item.hasAvif = false; } -function renderShell({ title, description, pathName = '/', bodyClass = '', content, generatedAt }) { +function renderArticleJsonLd(item) { + const json = { + '@context': 'https://schema.org', + '@type': 'NewsArticle', + headline: item.title, + description: item.summary || item.dek || SITE_TAGLINE, + image: item.imagePath ? `${SITE_URL}${item.imagePath}` : undefined, + url: `${SITE_URL}${item.articlePath}`, + datePublished: item.published, + dateModified: item.published, + author: { + '@type': 'Organization', + name: SITE_NAME, + url: SITE_URL, + }, + publisher: { + '@type': 'Organization', + name: SITE_NAME, + url: SITE_URL, + logo: { + '@type': 'ImageObject', + url: `${SITE_URL}/favicon.svg`, + }, + }, + mainEntityOfPage: { + '@type': 'WebPage', + '@id': `${SITE_URL}${item.articlePath}`, + }, + }; + if (!json.image) delete json.image; + return ``; +} + +function renderShell({ title, description, pathName = '/', bodyClass = '', content, generatedAt, jsonLd = '' }) { const canonical = `${SITE_URL}${pathName}`; const adsenseHead = ADSENSE_CLIENT ? `\n ` : ''; - return `${escapeHtml(title)}${adsenseHead}
${content}
`; + return `${escapeHtml(title)}${jsonLd}${adsenseHead}
${content}
`; } function renderMainAdUnit(label = 'main page ads') { @@ -542,6 +576,11 @@ function renderMainAdUnit(label = 'main page ads') { return ``; } +function renderInArticleAdUnit(label = 'in-article ads') { + if (!ADSENSE_CLIENT || !ADSENSE_IN_ARTICLE_SLOT) return ''; + return ``; +} + function renderImageCredit(item) { if (!item.imageCredit && !item.imageLicense) return ''; const parts = [item.imageCredit, item.imageLicense].filter(Boolean); @@ -589,8 +628,8 @@ function renderHome(items, groups, generatedAt) { function renderArticle(item, related, generatedAt) { const sourceHref = item.link && item.link !== '#' ? `

Read the original reporting

` : ''; const relatedInline = related.length ? `

Where this fits in Signal Ledger

This story sits alongside related Signal Ledger coverage that helps frame the broader pattern.

` : ''; - const content = `
${renderPicture(item, { className: 'article-image' })}${renderImageCredit(item)}
${escapeHtml(item.category)}

${escapeHtml(item.title)}

${item.dek ? `

${escapeHtml(item.dek)}

` : ''}

${escapeHtml(item.summaryLabel)}

${escapeHtml(item.summary)}

${escapeHtml(item.stakesLabel)}

${escapeHtml(item.whyItMatters)}

${escapeHtml(item.contextLabel)}

${escapeHtml(item.context)}

${relatedInline}

${escapeHtml(item.viewLabel)}

${escapeHtml(item.viewpoint)}

Source note

${escapeHtml(item.sourceNote)}

${sourceHref}
`; - return renderShell({ title: `${item.title} — ${SITE_NAME}`, description: item.summary || SITE_TAGLINE, pathName: item.articlePath, bodyClass: 'article-page', content, generatedAt }); + const content = `
${renderPicture(item, { className: 'article-image' })}${renderImageCredit(item)}
${escapeHtml(item.category)}

${escapeHtml(item.title)}

${item.dek ? `

${escapeHtml(item.dek)}

` : ''}

${escapeHtml(item.summaryLabel)}

${escapeHtml(item.summary)}

${renderInArticleAdUnit()}

${escapeHtml(item.stakesLabel)}

${escapeHtml(item.whyItMatters)}

${escapeHtml(item.contextLabel)}

${escapeHtml(item.context)}

${relatedInline}

${escapeHtml(item.viewLabel)}

${escapeHtml(item.viewpoint)}

Source note

${escapeHtml(item.sourceNote)}

${sourceHref}
`; + return renderShell({ title: `${item.title} — ${SITE_NAME}`, description: item.summary || SITE_TAGLINE, pathName: item.articlePath, bodyClass: 'article-page', content, generatedAt, jsonLd: renderArticleJsonLd(item) }); } function renderSectionPage(group, generatedAt) { @@ -622,6 +661,38 @@ async function writeCss() { await fs.writeFile(path.join(OUT_DIR, 'styles.css'), css); } +async function cleanOldArticles(currentSlugs) { + const slugSet = new Set(currentSlugs); + try { + const entries = await fs.readdir(ARTICLES_DIR, { withFileTypes: true }); + for (const entry of entries) { + if (entry.isDirectory() && !slugSet.has(entry.name)) { + const dirPath = path.join(ARTICLES_DIR, entry.name); + await fs.rm(dirPath, { recursive: true, force: true }); + console.log('Cleaned old article directory:', entry.name); + } + } + } catch (error) { + console.error('Error cleaning old articles:', error?.message || error); + } +} + +async function cleanOldSections(currentSectionSlugs) { + const slugSet = new Set(currentSectionSlugs); + try { + const entries = await fs.readdir(SECTIONS_DIR, { withFileTypes: true }); + for (const entry of entries) { + if (entry.isDirectory() && !slugSet.has(entry.name)) { + const dirPath = path.join(SECTIONS_DIR, entry.name); + await fs.rm(dirPath, { recursive: true, force: true }); + console.log('Cleaned old section directory:', entry.name); + } + } + } catch (error) { + console.error('Error cleaning old sections:', error?.message || error); + } +} + async function main() { await fs.mkdir(OUT_DIR, { recursive: true }); await fs.mkdir(ARTICLES_DIR, { recursive: true }); @@ -703,12 +774,16 @@ async function main() { await fs.writeFile(path.join(dir, 'index.html'), renderArticle(item, related, generatedAt)); } + // Clean up old article and section directories that are no longer in the corpus + await cleanOldArticles(corpus.map((item) => item.slug)); + await cleanOldSections(allGroups.map((g) => g.slug)); + await fs.writeFile(STATE_FILE, JSON.stringify({ generatedAt: generatedAt.toISOString(), articles: corpus }, null, 2)); await fs.writeFile(path.join(OUT_DIR, 'feed.json'), JSON.stringify({ siteName: SITE_NAME, siteTagline: SITE_TAGLINE, generatedAt: generatedAt.toISOString(), items: homepageItems }, null, 2)); await fs.writeFile(path.join(OUT_DIR, 'feed.xml'), renderRss(homepageItems, generatedAt)); await fs.writeFile(path.join(OUT_DIR, 'sitemap.xml'), renderSitemap(corpus, allGroups)); await fs.writeFile(path.join(OUT_DIR, 'robots.txt'), `User-agent: *\nAllow: /\nSitemap: ${SITE_URL}/sitemap.xml\n`); - await fs.writeFile(path.join(OUT_DIR, 'ads.txt'), `google.com, pub-1269854634225826, DIRECT, f08c47fec0942fa0\n`); + await fs.writeFile(path.join(OUT_DIR, 'ads.txt'), `google.com, ${ADSENSE_CLIENT.replace('ca-pub-', 'pub-')}, DIRECT, f08c47fec0942fa0\n`); console.log(`Built ${corpus.length} stories into ${OUT_DIR}`); }