Awesome De Novo Peptide Sequencing

A comprehensive, interactive map of the field. Algorithms, post-processors, downstream applications and adjacent tools, deep-learning and classical alike.

import { aq, op } from "@uwdata/arquero"

// Shared Plot-tip styling used across every Plot chart on the page. Matches
// the .map-tooltip CSS palette (12-13 px, roomier padding, white background
// with soft border) so the built-in Plot tip looks the same as our custom
// world-map / co-auth / bipartite HTML tooltips.
plot_tip_style = ({
  fontSize: 13,
  lineHeight: 1.35,
  textPadding: 10,
  fill: "white",
  stroke: "#d0d7de"
})

pubs_t = transpose(pubs).map(r => ({ ...r, year: +r.year, date: r.date ? new Date(r.date) : null }))
top_authors_t = transpose(top_authors)
geo_t = transpose(geo)
institutions_t = transpose(institutions)
pub_authorship_t = transpose(pub_authorship)
coauth_edges_t = transpose(coauth_edges).map(r => ({ ...r, n_authors: +r.n_authors, papers: +r.papers }))

// Newman (2001) fractional collaboration strength. Each pair on an n-author
// paper earns 1/(n-1), so a 2-author paper contributes a full 1.0 to its single
// pair while a 53-author consortium contributes ~0.019 to each of its 1378
// pairs. `max_authors` lets the reader drop mega-author papers entirely.
coauth_strength = (max_authors = Infinity) => {
  // Author names contain spaces, so the composite key needs a delimiter that
  // cannot occur inside a name (same '<<|>>' convention as the bipartite chart).
  const SEP = "<<|>>"
  const w = new Map()   // "source<<|>>target" -> summed fractional weight
  for (const r of coauth_edges_t) {
    if (r.n_authors > max_authors) continue
    const k = r.source + SEP + r.target
    w.set(k, (w.get(k) ?? 0) + r.papers / (r.n_authors - 1))
  }
  return Array.from(w, ([k, weight]) => {
    const [source, target] = k.split(SEP)
    return { source, target, weight }
  })
}

// All prolific authors, regardless of the network's reactive filters — used by
// the bipartite author↔model chart so its node set stays stable.
coauth_all_authors = new Set(coauth_edges_t.flatMap(e => [e.source, e.target]))
author_affs_t = transpose(author_affs)
algorithms_t = transpose(algorithms).map(r => ({
  ...r,
  first_pub: r.first_pub ? new Date(r.first_pub) : null,
  is_dl: r.is_dl == null ? null : !!r.is_dl   // SQLite INTEGER 0/1 → JS boolean
}))

// Version-aware view of algorithms: any algorithm whose joined publications
// carry a `version` tag (currently Casanovo v1 / v2 / v5) is expanded into one
// row per version, each with its own first_pub date and a label like
// "Casanovo v2". Drives the architectures timeline so successive releases of
// the same method appear as distinct dots instead of collapsing onto the
// earliest one. All other downstream cells (counters, bipartite network,
// browse table) keep using `algorithms_t` and remain one-row-per-algorithm.
algorithms_versioned_t = {
  const pubs_for = new Map()
  for (const p of pubs_t) {
    for (const m of (p.models ?? "").split(",").map(s => s.trim()).filter(Boolean)) {
      if (!pubs_for.has(m)) pubs_for.set(m, [])
      pubs_for.get(m).push(p)
    }
  }
  const out = []
  for (const r of algorithms_t) {
    const its_pubs = pubs_for.get(r.model) ?? []
    const versions = Array.from(new Set(its_pubs.map(p => p.version).filter(Boolean))).sort()
    if (versions.length === 0) { out.push({ ...r, base_model: r.model }); continue }
    for (const v of versions) {
      const dates = its_pubs.filter(p => p.version === v).map(p => p.date).filter(Boolean)
      const earliest = dates.length ? new Date(Math.min(...dates.map(d => +d))) : r.first_pub
      out.push({ ...r, model: `${r.model} ${v}`, version: v, base_model: r.model, first_pub: earliest })
    }
    const unversioned = its_pubs.filter(p => !p.version)
    if (unversioned.length) {
      const dates = unversioned.map(p => p.date).filter(Boolean)
      const earliest = dates.length ? new Date(Math.min(...dates.map(d => +d))) : r.first_pub
      out.push({ ...r, base_model: r.model, first_pub: earliest })
    }
  }
  return out
}
venues_t = transpose(venues)
author_details_t = transpose(author_details)
citations_t      = transpose(citations)
journal_impact_t = transpose(journal_impact)
// Lookup table: journal name → 2yr citedness, for fast joins in OJS cells.
journal_impact_by_name = new Map(journal_impact_t.map(j => [j.journal, j]))
publication_impact_t = transpose(publication_impact)
publication_impact_by_pub = new Map(publication_impact_t.map(r => [r.publication_id, r]))
repo_metrics_t = transpose(repo_metrics).map(r => ({
  ...r,
  last_pushed: r.last_pushed ? new Date(r.last_pushed) : null,
  fetched_at:  r.fetched_at  ? new Date(r.fetched_at)  : null,
}))

// Some repositories back multiple catalogued algorithms (e.g. InstaNovo +
// InstaNovo-P share instadeepai/instanovo). For the two Code-activity plots
// we want ONE dot per repo — otherwise the shared bar/point appears twice
// on top of itself and the label collides. Fold rows by URL, joining the
// per-repo model names with ' / ' so the tooltip still shows which
// algorithms live in that repo. The Browse-all table below keeps the
// un-deduplicated repo_metrics_t so per-algorithm columns (family, kind)
// stay intact.
repo_metrics_by_url = {
  const grouped = new Map()
  for (const r of repo_metrics_t) {
    if (!r.url) continue
    if (!grouped.has(r.url)) grouped.set(r.url, {...r, models: [r.model]})
    else grouped.get(r.url).models.push(r.model)
  }
  return Array.from(grouped.values()).map(r => ({
    ...r,
    // 'model' overrides the single-value field with a joined label used by
    // Plot.text and the tooltip title. 'family' picks the first algorithm's
    // family for the dot color (families are usually the same when they
    // share a repo; where they differ, the first one seen wins).
    model: r.models.join(" / "),
  }))
}

// Derived counters. Everything flows from the data, no hardcoded numbers.
n_papers = pubs_t.length
n_models = algorithms_t.length
n_authors = new Set(pubs_t.flatMap(p => (p.authors ?? "").split(", ").filter(Boolean))).size
n_countries = geo_t.length
years_with_pubs = pubs_t.map(p => p.year).filter(y => Number.isFinite(y))
first_year = Math.min(...years_with_pubs)
last_year  = Math.max(...years_with_pubs)
// First year a paper using deep learning appears in the catalog. Anchors the
// "wave" prose so the DL inflection point reads from data, not a constant.
dl_algo_names = new Set(algorithms_t.filter(a => a.is_dl === true).map(a => a.model))
first_dl_year = Math.min(...pubs_t
  .filter(p => Number.isFinite(p.year) && (p.models ?? "").split(",").map(s => s.trim()).some(m => dl_algo_names.has(m)))
  .map(p => p.year)
)
n_preprints = pubs_t.filter(p => p.type === "preprint").length
n_peer_reviewed = pubs_t.filter(p => p.type === "peer-reviewed").length
families = Array.from(new Set(algorithms_t.map(a => a.family).filter(Boolean)))

// Classification breakdowns (kind / DL / acquisition).
n_dl     = algorithms_t.filter(a => a.is_dl === true).length
n_non_dl = algorithms_t.filter(a => a.is_dl === false).length
kinds_present     = Array.from(new Set(algorithms_t.map(a => a.kind).filter(Boolean))).sort()
acq_modes_present = Array.from(new Set(algorithms_t.map(a => a.acquisition).filter(Boolean))).sort()
kind_counts = kinds_present.map(k => ({ kind: k, n: algorithms_t.filter(a => a.kind === k).length }))
acq_counts  = acq_modes_present.map(m => ({ acquisition: m, n: algorithms_t.filter(a => a.acquisition === m).length }))

html`<div class="hero-grid">
  <div class="hero-stat"><div class="hero-num">${n_papers}</div><div class="hero-lbl">papers</div></div>
  <div class="hero-stat"><div class="hero-num">${n_models}</div><div class="hero-lbl">methods</div></div>
  <div class="hero-stat"><div class="hero-num">${n_authors}</div><div class="hero-lbl">authors</div></div>
  <div class="hero-stat"><div class="hero-num">${n_countries}</div><div class="hero-lbl">countries</div></div>
</div>`

{
  const kind_label = {
    "algorithm": "Algorithms",
    "post-processor": "Post-processors",
    "downstream-application": "Downstream apps",
    "adjacent": "Adjacent",
    "review": "Reviews / surveys",
    "benchmark": "Benchmarks",
    "meta": "Meta"
  }
  return html`<div class="breakdown-grid">
    <div class="breakdown-cell">
      <div class="breakdown-title">By kind</div>
      ${kind_counts.map(k => html`<div class="breakdown-row">
        <span class="bk-num">${k.n}</span>
        <span class="bk-bar"><span class="bk-fill" style="width:${100 * k.n / n_models}%"></span></span>
        <span class="bk-lbl">${kind_label[k.kind] ?? k.kind}</span>
      </div>`)}
    </div>
    <div class="breakdown-cell">
      <div class="breakdown-title">By approach</div>
      <div class="breakdown-row">
        <span class="bk-num">${n_dl}</span>
        <span class="bk-bar"><span class="bk-fill" style="width:${100 * n_dl / n_models}%; background:#1f6feb"></span></span>
        <span class="bk-lbl">Deep learning</span>
      </div>
      <div class="breakdown-row">
        <span class="bk-num">${n_non_dl}</span>
        <span class="bk-bar"><span class="bk-fill" style="width:${100 * n_non_dl / n_models}%; background:#6f42c1"></span></span>
        <span class="bk-lbl">Classical</span>
      </div>
    </div>
    <div class="breakdown-cell">
      <div class="breakdown-title">By acquisition</div>
      ${acq_counts.map(m => html`<div class="breakdown-row">
        <span class="bk-num">${m.n}</span>
        <span class="bk-bar"><span class="bk-fill" style="width:${100 * m.n / n_models}%; background:#1a7f37"></span></span>
        <span class="bk-lbl">${m.acquisition}</span>
      </div>`)}
    </div>
  </div>`
}

md`Since **${first_year}**, **${n_papers}** papers have introduced **${n_models}** methods for *de novo* peptide sequencing, written by **${n_authors}** authors across **${n_countries}** countries. Of those papers, **${n_preprints}** are preprints and **${n_peer_reviewed}** are peer-reviewed. A snapshot of a field where the conversation moves faster than the journals.`

🎚️ Filters · Kind · Approach · Acquisition. Apply across the whole page; pinned to the top while you scroll.

viewof kind_filter = Inputs.checkbox(kinds_present, { label: "Kind", value: kinds_present })

viewof dl_filter = Inputs.radio(["all", "DL only", "Classical only"], { label: "Approach", value: "all" })

viewof acq_filter = Inputs.checkbox(acq_modes_present, { label: "Acquisition", value: acq_modes_present })

pubs_matches_filter = p => {
  if (p.kind && !kind_filter.includes(p.kind)) return false
  if (dl_filter === "DL only"        && !(p.is_dl === 1 || p.is_dl === true))  return false
  if (dl_filter === "Classical only" && !(p.is_dl === 0 || p.is_dl === false)) return false
  if (p.acquisition && !acq_filter.includes(p.acquisition)) return false
  return true
}
pubs_filtered = pubs_t.filter(pubs_matches_filter)
n_papers_filtered = pubs_filtered.length

The wave

md`The earliest paper tracked here appeared in **${first_year}**; the first deep-learning method shows up in **${first_dl_year}**. Activity has accelerated sharply since. **${n_peer_reviewed}** papers have made it through peer review, alongside **${n_preprints}** preprints still in the publication pipeline.`

Plot.plot({
  marginLeft: 50,
  width: 1100,
  height: 360,
  x: { label: "Year", tickFormat: "d", interval: 1 },
  y: { label: `Papers (${n_papers_filtered} shown)`, grid: true },
  color: { legend: true, scheme: "blues", domain: ["preprint", "peer-reviewed", "ML conference", "thesis", "commentary"] },
  marks: [
    Plot.barY(
      // Drop kind='meta' rows: the catalog's self-entry (no
      // publication_type, no real venue) plus any other meta artefacts
      // like commentaries-without-method. They'd otherwise render as
      // transparent stack segments and skew the publication-volume story.
      pubs_filtered.filter(p => Number.isFinite(p.year) && p.kind !== "meta"),
      Plot.groupX(
        { y: "count" },
        { x: "year", fill: "type", tip: plot_tip_style }
      )
    ),
    Plot.ruleY([0])
  ]
})

The architectures

De novo sequencing has cycled through several methodological families: first hand-engineered dynamic programming and learning-to-rank, then a long stretch of CNN+RNN models, then transformers, GNNs, NAR variants, and most recently diffusion. Use the filters to focus on one slice of the field; hover a dot to read the method’s signature contribution.

viewof family_filter = Inputs.checkbox(families, { label: "Families", value: families })

innovations_timeline = {
  // Notebook-style band order: oldest paradigm at the bottom, newest at the top.
  // Classical (non-DL) bands sit at the bottom: heuristic → graph/DP → HMM →
  // decision tree → random forest → learning-to-rank → CNN+RNN → ... → diffusion.
  const band_order = [
    "Heuristic", "Graph / DP", "HMM", "Decision tree",
    "Random Forest", "Learning-to-rank",
    "CNN + RNN", "Transformer (AR)", "GNN", "CNN", "Transformer (NAR)", "Diffusion", "Flow"
  ]
  const band_color = {
    "Heuristic":         "#8a96a0",
    "Graph / DP":        "#6c757d",
    "HMM":               "#4d6a8c",
    "Decision tree":     "#7b6f43",
    "Random Forest":     "#a5673f",
    "Learning-to-rank":  "#5f6b7a",
    "CNN + RNN":         "#4C72B0",
    "Transformer (AR)":  "#DD8452",
    "GNN":               "#937860",
    "CNN":               "#8172B3",
    "Transformer (NAR)": "#55A868",
    "Diffusion":         "#C44E52",
    "Flow":              "#937DC2"
  }
  // Tiers chosen to spread labels both above and below the band center.
  // 25 interleaved tier positions (the previous 15 ran out for the 2024-2026
  // Transformer (AR) burst, dropping ~6 models onto the center-line fallback).
  const Y_TIERS = [
    0.5, 0.8, 0.2, 0.9, 0.1, 0.7, 0.3, 0.6, 0.4,
    0.85, 0.15, 0.95, 0.05, 0.75, 0.25, 0.65, 0.35, 0.55, 0.45,
    0.88, 0.12, 0.78, 0.22, 0.58, 0.42
  ]
  // MIN_GAP_DAYS is computed dynamically below from the actual x-range so it self-tunes
  // when more years of data come in (e.g. older classical methods extending back to 1997).

  // Per-family band heights: busy families get more vertical room (matches plots.ipynb).
  // Graph / DP now holds seven classical algorithms (Sherenga, PEAKS, PepNovo, MSNovo,
  // pNovo, pNovo+, CycloNovo) so it needs a taller swimlane.
  const band_heights = {
    "Heuristic":         2.5,   // 2001-2006 cluster: MALDI-QIT / MALDI-TOF-TOF / Adaptive GA all needed vertical room
    "Graph / DP":        4.0,   // 2003-2005 burst: Sherenga / DP-based / Suboptimal / DeNovo / PepNovo overlap
    "HMM":               1.0,
    "Decision tree":     1.0,
    "Random Forest":     1.2,
    "Learning-to-rank":  1.2,
    "CNN + RNN":         5.0,   // 2018-2024 cluster of DeepNovo variants + neighbours (was 3.5, still tight)
    "Transformer (AR)":  14.0,  // Cambrian explosion 2024-2026: ~27 Casanovo/Novo variants, needed 75 % more room
    "GNN":               1.2,
    "CNN":               1.2,
    "Transformer (NAR)": 3.5,   // 2024-2025 CrossNovo/OmniNovo/π-PrimeNovo/DiffNovo/Refinovo cluster
    "Diffusion":         1.8,
    "Flow":              1.2
  }

  // Apply every filter (family, kind, approach, acquisition) up front.
  const matches_filters = a => {
    if (!a.first_pub || !a.family) return false
    if (!family_filter.includes(a.family)) return false
    if (!kind_filter.includes(a.kind)) return false
    if (dl_filter === "DL only"        && a.is_dl !== true)  return false
    if (dl_filter === "Classical only" && a.is_dl !== false) return false
    if (a.acquisition && !acq_filter.includes(a.acquisition)) return false
    return true
  }

  // Only include bands that have at least one surviving model.
  const present = new Set(algorithms_versioned_t.filter(matches_filters).map(a => a.family))
  const visible_bands = band_order.filter(f => present.has(f))

  // Stack bands bottom-to-top, each at its own height.
  let y_cursor = 0
  const bands = visible_bands.map(family => {
    const h = band_heights[family] ?? 1.2
    const band = { family, y0: y_cursor, y1: y_cursor + h, center: y_cursor + h / 2, height: h }
    y_cursor += h
    return band
  })
  const total_height = y_cursor
  const band_index = new Map(bands.map(b => [b.family, b]))

  // Assign each model a tier so labels at similar dates don't overlap; the tier fraction
  // is interpreted relative to the *band's* height so taller bands spread further apart.
  // The band_index check is a safety net for any future family name that's not yet listed
  // X-range computed first so MIN_GAP_DAYS can self-tune to the actual scale.
  const all_dates = algorithms_versioned_t.filter(a => a.first_pub).map(a => a.first_pub)
  const x_min = d3.min(all_dates)
  const x_max = d3.max(all_dates)
  // Pad each side so labels at the temporal extremes (DeepNovo, DiffuNovo) stay inside the band.
  const x_pad_min = d3.timeMonth.offset(x_min, -8)
  const x_pad_max = d3.timeMonth.offset(x_max, 8)

  // MIN_GAP_DAYS: how many days of horizontal space we want between two labels on
  // the same tier. We compute it from the actual x-scale (so the spacing self-tunes
  // when the timeline extends) but cap it so dense bursts (e.g. 2024-2026 Transformer
  // (AR) papers) don't all fall through to the center-line fallback.
  const PLOT_WIDTH_PX  = 1200          // chart 1400 minus left+right margins
  const LABEL_RESERVE_PX = 150         // median label width AFTER wrap_label (2-line labels ~120-150 px wide)
  const MAX_GAP_DAYS = 1400            // cap ~3.8 yr so busy years (2003/2005) get proper tier spread
  const total_days = (x_pad_max - x_pad_min) / 86400000
  const px_per_day = PLOT_WIDTH_PX / total_days
  const MIN_GAP_DAYS = Math.min(MAX_GAP_DAYS, LABEL_RESERVE_PX / px_per_day)

  // Wrap the display label onto two lines at a word boundary when it exceeds
  // the max width (Plot renders \n as separate tspans). Same helper as the
  // application-areas chart below — labels here can be long too
  // ('MALDI-QIT de novo sequencing', 'Robust FL-Sequencing', etc).
  const wrap_label = (s, max_chars = 22) => {
    if (!s || s.length <= max_chars) return s
    const words = s.split(' ')
    if (words.length === 1) return s
    let line1 = ''
    let i = 0
    while (i < words.length && (line1 ? line1.length + 1 + words[i].length : words[i].length) <= max_chars) {
      line1 = line1 ? `${line1} ${words[i]}` : words[i]
      i++
    }
    if (!line1) { line1 = words[0]; i = 1 }
    const line2 = words.slice(i).join(' ')
    return line2 ? `${line1}\n${line2}` : line1
  }

  // in band_order (without it the chart would crash on .y0 of undefined).
  const rows = algorithms_versioned_t
    .filter(a => matches_filters(a) && band_index.has(a.family))
    .slice()
    .sort((a, b) => a.first_pub - b.first_pub)
  const last_date_at_tier = new Map()
  const items = []
  for (const row of rows) {
    let placed = false
    for (let ti = 0; ti < Y_TIERS.length; ti++) {
      const key = `${row.family}|${ti}`
      const last = last_date_at_tier.get(key)
      if (last === undefined || (row.first_pub - last) / 86400000 >= MIN_GAP_DAYS) {
        last_date_at_tier.set(key, row.first_pub)
        const band = band_index.get(row.family)
        items.push({ ...row, y: band.y0 + band.height * Y_TIERS[ti], y_frac: Y_TIERS[ti] })
        placed = true
        break
      }
    }
    if (!placed) {
      const band = band_index.get(row.family)
      items.push({ ...row, y: band.center, y_frac: 0.5 })
    }
  }

  const chart = Plot.plot({
    width: 1400,
    height: Math.max(420, total_height * 70),
    marginLeft: 140,
    marginRight: 60,
    marginBottom: 40,
    x: { type: "time", label: "First publication →", domain: [x_pad_min, x_pad_max], grid: true },
    y: { domain: [0, total_height], axis: null },
    color: { domain: band_order, range: band_order.map(f => band_color[f]), legend: false },
    marks: [
      // Background band per family, extended to the padded x domain so edge labels stay inside.
      Plot.rect(bands, {
        x1: () => x_pad_min, x2: () => x_pad_max,
        y1: "y0", y2: "y1",
        fill: "family",
        fillOpacity: 0.09
      }),
      // Thin separators between bands
      Plot.ruleY(bands.flatMap(b => [b.y0, b.y1]), { stroke: "#ddd", strokeWidth: 0.5 }),
      // Left-edge family label
      Plot.text(bands, {
        x: () => x_pad_min,
        y: "center",
        text: "family",
        textAnchor: "end",
        dx: -8,
        fontSize: 12,
        fontWeight: "bold",
        fill: "family"
      }),
      // Rich HTML tooltips are wired below via a scoped
      // chart.querySelectorAll('g[aria-label="dot"] circle') selector on the
      // rendered SVG; no Plot tip is used here.
      Plot.dot(items, {
        x: "first_pub",
        y: "y",
        fill: "family",
        r: 7,
        stroke: "white",
        strokeWidth: 1.5
      }),
      // Labels anchored above the dot (top half of band). Wrap onto two
      // lines when the model name exceeds ~22 chars so long names don't
      // collide horizontally with neighbours. Full name stays in the tip.
      Plot.text(items.filter(d => d.y_frac >= 0.5), {
        x: "first_pub",
        y: "y",
        text: d => wrap_label(d.model),
        textAnchor: "middle",
        lineAnchor: "bottom",
        dy: -12,
        fontSize: 10,
        fontWeight: "bold",
        fill: "family"
      }),
      // Labels anchored below the dot (bottom half of band)
      Plot.text(items.filter(d => d.y_frac < 0.5), {
        x: "first_pub",
        y: "y",
        text: d => wrap_label(d.model),
        textAnchor: "middle",
        lineAnchor: "top",
        dy: 12,
        fontSize: 10,
        fontWeight: "bold",
        fill: "family"
      })
    ]
  })

  // Rich .map-tooltip on the Plot.dot circles — matches the HTML tooltip style
  // used across the world-map / co-auth / bipartite / Sankey / citation-arc
  // charts. Plot renders dots in the order of the input `items` array, so we
  // bind by DOM order.
  const arch_tooltip = html`<div class="map-tooltip" role="tooltip" aria-hidden="true"></div>`
  const move_arch_tip = ev => {
    const pad = 12
    const box = arch_tooltip.getBoundingClientRect()
    const left = Math.min(ev.clientX + pad, window.innerWidth  - box.width  - pad)
    const top  = Math.min(ev.clientY + pad, window.innerHeight - box.height - pad)
    arch_tooltip.style.left = `${Math.max(pad, left)}px`
    arch_tooltip.style.top  = `${Math.max(pad, top)}px`
  }
  // Plot silently ignores the `className: 'arch-dot'` mark option we passed —
  // it labels dot marks with aria-label='dot' instead. querySelectorAll is
  // scoped to *this* chart's DOM so we won't pick up other Plot.dot charts.
  const arch_dot_circles = chart.querySelectorAll('g[aria-label="dot"] circle')
  arch_dot_circles.forEach((circle, i) => {
    const d = items[i]
    if (!d) return
    circle.style.cursor = "pointer"
    circle.setAttribute("aria-label", `${d.model}. ${d.family}. First publication ${d.first_pub.toISOString().slice(0,10)}.`)
    circle.addEventListener("mouseenter", ev => {
      arch_tooltip.replaceChildren(html`<div>
        <div class="map-tooltip-title">${d.model}</div>
        <div class="map-tooltip-meta">${d.first_pub.toISOString().slice(0,10)}</div>
        <div class="map-tooltip-row">
          <span class="map-tooltip-swatch" style=${`background:${band_color[d.family] ?? "#999"}`}></span>
          <span>${d.family}</span>
        </div>
        ${d.description ? html`<div class="map-tooltip-meta" style="margin-top:6px; max-width:280px; white-space:normal;">${d.description}</div>` : ""}
      </div>`)
      arch_tooltip.classList.add("visible")
      arch_tooltip.setAttribute("aria-hidden", "false")
      move_arch_tip(ev)
    })
    circle.addEventListener("mousemove", move_arch_tip)
    circle.addEventListener("mouseleave", () => {
      arch_tooltip.classList.remove("visible")
      arch_tooltip.setAttribute("aria-hidden", "true")
    })
  })

  // Wrap in a horizontally-scrollable container with a fullscreen button.
  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${chart}</div>
    ${arch_tooltip}
  </div>`
}

Application areas

De novo peptide sequencing gets picked up by a handful of distinct scientific communities, each with its own workflow conventions. Each dot below is one application-focused method or workflow, placed at its first publication date and stacked into a lane by sub-domain.

subdomain_order = [
  "general-proteomics", "plant-pathogen", "metaproteomics", "wastewater-metaproteomics",
  "immunopeptidomics", "antibodyomics", "venomics", "neuropeptidomics",
  "bioactive-peptides", "wildlife-proteomics",
  "food-authentication", "pathogen-identification", "forensics", "palaeoproteomics",
  "toxin-identification"
]
subdomain_label = ({
  "general-proteomics":       "General proteomics",
  "plant-pathogen":           "Plant-pathogen effectors",
  "metaproteomics":           "Metaproteomics",
  "wastewater-metaproteomics":"Wastewater metaproteomics",
  "immunopeptidomics":        "Immunopeptidomics / neoantigen",
  "antibodyomics":            "Antibodyomics",
  "venomics":                 "Venomics",
  "neuropeptidomics":         "Neuropeptidomics",
  "bioactive-peptides":       "Bioactive peptides",
  "wildlife-proteomics":      "Wildlife proteomics",
  "food-authentication":      "Food authentication",
  "pathogen-identification":  "Clinical pathogen ID",
  "forensics":                "Forensics",
  "palaeoproteomics":         "Palaeoproteomics",
  "toxin-identification":     "Toxin identification (biodefense)"
})
subdomain_color = ({
  "general-proteomics":       "#8a96a0",
  "plant-pathogen":           "#4c8b3a",
  "metaproteomics":           "#1a7f37",
  "wastewater-metaproteomics":"#218380",
  "immunopeptidomics":        "#bf8700",
  "antibodyomics":            "#cf222e",
  "venomics":                 "#a40e26",
  "neuropeptidomics":         "#116329",
  "bioactive-peptides":       "#4e7a1a",
  "wildlife-proteomics":      "#0969da",
  "food-authentication":      "#d4a72c",
  "pathogen-identification":  "#0a3069",
  "forensics":                "#953800",
  "palaeoproteomics":         "#8250df",
  "toxin-identification":     "#4338ca"
})
// Per-lane vertical room, only used by the timeline (the Sankey sizes nodes
// by traffic instead), kept here anyway so it lives next to the rest of the
// per-subdomain metadata.
subdomain_lane_height = ({
  "general-proteomics":       1.0,
  "plant-pathogen":           1.0,
  "metaproteomics":           1.2,
  "wastewater-metaproteomics":1.0,
  "immunopeptidomics":        1.5,
  "antibodyomics":            1.2,
  "venomics":                 3.0,  // 10 papers spanning 2014-2024 with clustering in 2018/2020/2022
  "neuropeptidomics":         1.0,
  "bioactive-peptides":       1.0,
  "wildlife-proteomics":      1.2,
  "food-authentication":      1.0,
  "pathogen-identification":  1.0,
  "forensics":                1.0,
  "palaeoproteomics":         1.8,
  "toxin-identification":     1.0
})

application_timeline = {
  // Same swim-lane pattern as the architectures chart, keyed on subdomain
  // instead of family. Lanes ordered oldest-application-adoption at the top
  // (immunopeptidomics + general proteomics were first) down to the newest
  // adopters. Currently one entry only sits in each single-entry lane
  // (plant-pathogen, general-proteomics), so lane heights adjust accordingly.
  // Metadata (order/label/color/height) lives in the shared subdomain_* cells
  // above so this chart and the Sankey diagram below stay in sync.
  const lane_order  = subdomain_order
  const lane_label  = subdomain_label
  const lane_color  = subdomain_color
  const lane_height = subdomain_lane_height

  const rows = algorithms_t
    .filter(a => a.kind === "downstream-application" && a.subdomain && a.first_pub)
    .slice()
    .sort((a, b) => a.first_pub - b.first_pub)

  let y_cursor = 0
  const lanes = lane_order.map(sd => {
    const h = lane_height[sd] ?? 1.2
    const lane = { subdomain: sd, y0: y_cursor, y1: y_cursor + h, center: y_cursor + h/2, height: h }
    y_cursor += h
    return lane
  })
  const total_height = y_cursor
  const lane_index = new Map(lanes.map(l => [l.subdomain, l]))

  // Within-lane tier assignment so dots at similar dates don't overlap labels.
  // Ported from the architectures chart above: 25 interleaved tiers + dynamic
  // MIN_GAP_DAYS. The venomics lane alone has 10 papers with clustering in
  // 2018/2020/2022, so the old 9-tier / static-400-day setup was over-crowded.
  const Y_TIERS = [
    0.5, 0.8, 0.2, 0.9, 0.1, 0.7, 0.3, 0.6, 0.4,
    0.85, 0.15, 0.95, 0.05, 0.75, 0.25, 0.65, 0.35, 0.55, 0.45,
    0.88, 0.12, 0.78, 0.22, 0.58, 0.42
  ]
  const x_min = d3.min(rows, d => d.first_pub)
  const x_max = d3.max(rows, d => d.first_pub)
  const x_pad_min = d3.timeMonth.offset(x_min, -12)
  const x_pad_max = d3.timeMonth.offset(x_max, 12)
  // MIN_GAP_DAYS auto-tunes to the actual x-scale. Labels here are longer than
  // in the architectures chart (workflow names average 25 chars ≈ 180 px), and
  // the venomics lane has 10 papers clustered in 2018/2020/2022 — so we need
  // both a bigger label reserve AND a much larger max-gap cap so tier packing
  // spreads dots across 5-6 tiers instead of collapsing to 2 rows.
  const PLOT_WIDTH_PX = 950            // chart 1200 minus 210 left + 40 right margins
  const LABEL_RESERVE_PX = 200         // conservative for taxon-focused labels ≤22 chars
  const MAX_GAP_DAYS = 1200            // cap ~3.3 yr so dense years spread vertically
  const total_days = (x_pad_max - x_pad_min) / 86400000
  const px_per_day = PLOT_WIDTH_PX / total_days
  const MIN_GAP_DAYS = Math.min(MAX_GAP_DAYS, LABEL_RESERVE_PX / px_per_day)

  // Wrap the display label onto two lines at a word boundary when it exceeds
  // the max width (Plot renders \n as separate tspans). Falls back to the raw
  // string if the label already fits or has no whitespace to break on.
  const wrap_label = (s, max_chars = 22) => {
    if (!s || s.length <= max_chars) return s
    const words = s.split(' ')
    if (words.length === 1) return s
    let line1 = ''
    let i = 0
    while (i < words.length && (line1 ? line1.length + 1 + words[i].length : words[i].length) <= max_chars) {
      line1 = line1 ? `${line1} ${words[i]}` : words[i]
      i++
    }
    if (!line1) { line1 = words[0]; i = 1 }
    const line2 = words.slice(i).join(' ')
    return line2 ? `${line1}\n${line2}` : line1
  }

  const last_at_tier = new Map()
  const items = []
  for (const row of rows) {
    let placed = false
    for (let ti = 0; ti < Y_TIERS.length; ti++) {
      const key = `${row.subdomain}|${ti}`
      const last = last_at_tier.get(key)
      if (last === undefined || (row.first_pub - last)/86400000 >= MIN_GAP_DAYS) {
        last_at_tier.set(key, row.first_pub)
        const lane = lane_index.get(row.subdomain)
        items.push({ ...row, y: lane.y0 + lane.height * Y_TIERS[ti], y_frac: Y_TIERS[ti] })
        placed = true
        break
      }
    }
    if (!placed) {
      const lane = lane_index.get(row.subdomain)
      items.push({ ...row, y: lane.center, y_frac: 0.5 })
    }
  }

  const chart = Plot.plot({
    width: 1200,
    height: Math.max(340, total_height * 90),
    marginLeft: 210,
    marginRight: 40,
    marginBottom: 40,
    x: { type: "time", label: "First publication →", domain: [x_pad_min, x_pad_max], grid: true },
    y: { domain: [0, total_height], axis: null },
    color: { domain: lane_order, range: lane_order.map(s => lane_color[s]), legend: false },
    marks: [
      Plot.rect(lanes, {
        x1: () => x_pad_min, x2: () => x_pad_max,
        y1: "y0", y2: "y1",
        fill: "subdomain",
        fillOpacity: 0.09
      }),
      Plot.ruleY(lanes.flatMap(l => [l.y0, l.y1]), { stroke: "#ddd", strokeWidth: 0.5 }),
      Plot.text(lanes, {
        x: () => x_pad_min, y: "center",
        text: d => lane_label[d.subdomain],
        textAnchor: "end", dx: -8,
        fontSize: 12, fontWeight: "bold",
        fill: "subdomain"
      }),
      Plot.dot(items, {
        x: "first_pub", y: "y",
        fill: "subdomain",
        r: 7, stroke: "white", strokeWidth: 1.5
      }),
      // Labels sit just above the dot (upper half of lane) or just below (lower half).
      // Display label wraps onto two lines at a word boundary when it exceeds
      // ~22 chars; the full untruncated name is still available via the tooltip.
      Plot.text(items.filter(d => d.y_frac >= 0.5), {
        x: "first_pub", y: "y", text: d => wrap_label(d.model),
        textAnchor: "middle", lineAnchor: "bottom", dy: -12,
        fontSize: 10, fontWeight: "bold", fill: "subdomain"
      }),
      Plot.text(items.filter(d => d.y_frac < 0.5), {
        x: "first_pub", y: "y", text: d => wrap_label(d.model),
        textAnchor: "middle", lineAnchor: "top", dy: 12,
        fontSize: 10, fontWeight: "bold", fill: "subdomain"
      })
    ]
  })

  // Rich .map-tooltip on the Plot.dot circles (matches the shared style).
  // Plot renders dots in the order of `items`, so bind by DOM order.
  const app_tooltip = html`<div class="map-tooltip" role="tooltip" aria-hidden="true"></div>`
  const move_app_tip = ev => {
    const pad = 12
    const box = app_tooltip.getBoundingClientRect()
    const left = Math.min(ev.clientX + pad, window.innerWidth  - box.width  - pad)
    const top  = Math.min(ev.clientY + pad, window.innerHeight - box.height - pad)
    app_tooltip.style.left = `${Math.max(pad, left)}px`
    app_tooltip.style.top  = `${Math.max(pad, top)}px`
  }
  // Same selector fix as the architectures chart — Plot uses aria-label='dot'.
  const app_dot_circles = chart.querySelectorAll('g[aria-label="dot"] circle')
  app_dot_circles.forEach((circle, i) => {
    const d = items[i]
    if (!d) return
    circle.style.cursor = "pointer"
    circle.setAttribute("aria-label", `${d.model}. ${lane_label[d.subdomain] ?? d.subdomain}. First publication ${d.first_pub.toISOString().slice(0,10)}.`)
    circle.addEventListener("mouseenter", ev => {
      app_tooltip.replaceChildren(html`<div>
        <div class="map-tooltip-title">${d.model}</div>
        <div class="map-tooltip-meta">${d.first_pub.toISOString().slice(0,10)}</div>
        <div class="map-tooltip-row">
          <span class="map-tooltip-swatch" style=${`background:${lane_color[d.subdomain] ?? "#999"}`}></span>
          <span>${lane_label[d.subdomain] ?? d.subdomain}</span>
        </div>
        ${d.description ? html`<div class="map-tooltip-meta" style="margin-top:6px; max-width:280px; white-space:normal;">${d.description}</div>` : ""}
      </div>`)
      app_tooltip.classList.add("visible")
      app_tooltip.setAttribute("aria-hidden", "false")
      move_app_tip(ev)
    })
    circle.addEventListener("mousemove", move_app_tip)
    circle.addEventListener("mouseleave", () => {
      app_tooltip.classList.remove("visible")
      app_tooltip.setAttribute("aria-hidden", "true")
    })
  })

  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${chart}</div>
    ${app_tooltip}
  </div>`
}

Application → sequencer flow

Which sequencing tools does each application community actually reach for? Two signals can drive the Sankey and they’re different strengths:

Uses: the paper has a curated publication_algorithm link asserting “this method was used” — usually because the methods section names the tool explicitly.
Cites: the paper’s Crossref reference list contains a publication describing the tool — a weaker signal (the tool might just be mentioned as prior art). Rebuilt monthly by build_citations.py.

Use the toggle to switch between them, or view the union of both. An edge from metaproteomics to PEAKS means at least one metaproteomics-tagged paper satisfies the selected signal. Edge thickness is the paper count.

viewof sankey_signal = Inputs.radio(["Uses", "Cites", "Either"], { label: "Signal", value: "Either" })

application_sankey = {
  // Which sequencer(s) does each downstream-application paper cite?
  const alg_by_name = new Map(algorithms_t.map(a => [a.model, a]))
  const pub_by_id   = new Map(pubs_t.map(p => [p.id, p]))

  // For each pub, gather its algorithm rows (parsed from the comma-joined
  // 'models' string).
  const pub_algs = new Map()
  for (const p of pubs_t) {
    const names = (p.models ?? "").split(",").map(s => s.trim()).filter(Boolean)
    pub_algs.set(p.id, names.map(n => alg_by_name.get(n)).filter(Boolean))
  }

  // For each downstream-application publication, resolve its subdomain via
  // any linked kind='downstream-application' algorithm that has a subdomain.
  const pub_subdomain = new Map()
  for (const p of pubs_t) {
    const app_alg = (pub_algs.get(p.id) || []).find(a => a.kind === "downstream-application" && a.subdomain)
    if (app_alg) pub_subdomain.set(p.id, app_alg.subdomain)
  }

  // Aggregate: (subdomain, sequencer) → number of downstream-app papers.
  // TWO signals are unioned so a paper counts once per sequencer regardless
  // of which surfaced the edge:
  //   1. Direct publication_algorithm links (curator-asserted "uses tool X").
  //   2. Citation edges to papers describing tool X (via publication_citation).
  // The seen_edges dedup key is (paper, subdomain, tool), so a paper that both
  // uses and cites the same tool contributes just once to the flow count.
  const flow = new Map()
  const sd_totals   = new Map()
  const seq_totals  = new Map()
  const seen_edges  = new Set()
  const record = (paper_id, sd, a) => {
    if (!(a.kind === "algorithm" || a.kind === "adjacent") || !a.family) return
    const edge_key = `${paper_id}|${sd}|${a.model}`
    if (seen_edges.has(edge_key)) return
    seen_edges.add(edge_key)
    const key = `${sd}|${a.model}`
    flow.set(key, (flow.get(key) ?? 0) + 1)
    sd_totals.set(sd,     (sd_totals.get(sd)     ?? 0) + 1)
    seq_totals.set(a.model,(seq_totals.get(a.model)?? 0) + 1)
  }
  // Signal 1: curated tool-usage. Included when the radio is 'Uses' or 'Either'.
  if (sankey_signal !== "Cites") {
    for (const p of pubs_t) {
      const sd = pub_subdomain.get(p.id)
      if (!sd) continue
      for (const a of pub_algs.get(p.id) || []) record(p.id, sd, a)
    }
  }
  // Signal 2: intra-catalog citation graph. Included when the radio is 'Cites' or 'Either'.
  if (sankey_signal !== "Uses") {
    for (const e of citations_t) {
      const sd = pub_subdomain.get(e.citing_id)
      if (!sd) continue
      for (const a of pub_algs.get(e.cited_id) || []) record(e.citing_id, sd, a)
    }
  }

  if (flow.size === 0) {
    return html`<p style="color:#57606a; font-style:italic; padding:1rem;">
      No subdomain→sequencer flows for signal <b>${sankey_signal}</b>. Try
      switching the radio above to <i>Either</i> to combine curated tool-usage
      with the citation graph.
    </p>`
  }

  // Layout parameters.
  const width = 1100, height = 480
  const margin = { top: 30, right: 260, bottom: 20, left: 260 }
  const inner_w = width  - margin.left - margin.right
  const inner_h = height - margin.top  - margin.bottom

  // Node lists sorted by traffic.
  const left_nodes = Array.from(sd_totals.entries())
    .sort((a, b) => b[1] - a[1])
    .map(([sd, total]) => ({ id: sd, kind: "subdomain", total }))
  const right_nodes = Array.from(seq_totals.entries())
    .sort((a, b) => b[1] - a[1])
    .map(([seq, total]) => ({ id: seq, kind: "sequencer", total }))

  // Palette + labels come from the shared subdomain_color / subdomain_label
  // cells defined above (next to the timeline chart) so the two charts can't
  // drift out of sync.

  // Assign Y positions (stacked, proportional to traffic + a small padding).
  const pad_y = 8
  const total_left  = d3.sum(left_nodes,  d => d.total)
  const total_right = d3.sum(right_nodes, d => d.total)
  const avail_left  = inner_h - pad_y * (left_nodes.length  - 1)
  const avail_right = inner_h - pad_y * (right_nodes.length - 1)

  let y_cursor = 0
  for (const n of left_nodes) {
    n.h = (n.total / total_left) * avail_left
    n.y0 = y_cursor
    n.y1 = y_cursor + n.h
    y_cursor += n.h + pad_y
  }
  y_cursor = 0
  for (const n of right_nodes) {
    n.h = (n.total / total_right) * avail_right
    n.y0 = y_cursor
    n.y1 = y_cursor + n.h
    y_cursor += n.h + pad_y
  }

  const left_by_id  = new Map(left_nodes.map(n => [n.id, n]))
  const right_by_id = new Map(right_nodes.map(n => [n.id, n]))

  // Build edge list with y-positions inside each node (proportional stacks).
  const edges = []
  const left_offset  = new Map(left_nodes.map(n => [n.id, 0]))
  const right_offset = new Map(right_nodes.map(n => [n.id, 0]))
  for (const [key, count] of Array.from(flow.entries())
       .sort((a, b) => b[1] - a[1])) {
    const [sd, seq] = key.split("|")
    const L = left_by_id.get(sd), R = right_by_id.get(seq)
    const l_h = (count / L.total) * L.h
    const r_h = (count / R.total) * R.h
    const l_off = left_offset.get(sd)
    const r_off = right_offset.get(seq)
    edges.push({
      subdomain: sd, sequencer: seq, count,
      l_y0: L.y0 + l_off, l_y1: L.y0 + l_off + l_h,
      r_y0: R.y0 + r_off, r_y1: R.y0 + r_off + r_h,
      color: subdomain_color[sd] ?? "#888"
    })
    left_offset.set(sd,  l_off + l_h)
    right_offset.set(seq, r_off + r_h)
  }

  const svg = d3.create("svg")
    .attr("viewBox", [0, 0, width, height])
    .attr("style", "max-width:100%; height:auto; background:#fbfbfd; font:11px sans-serif;")

  const g = svg.append("g").attr("transform", `translate(${margin.left}, ${margin.top})`)

  // Ribbon paths (S-curves between left and right node segments).
  const ribbon = d => {
    const x0 = 0, x1 = inner_w
    const mid = (x0 + x1) / 2
    return `M ${x0} ${d.l_y0}
            C ${mid} ${d.l_y0}, ${mid} ${d.r_y0}, ${x1} ${d.r_y0}
            L ${x1} ${d.r_y1}
            C ${mid} ${d.r_y1}, ${mid} ${d.l_y1}, ${x0} ${d.l_y1} Z`
  }
  // Styled tooltip for the ribbons (matches world-map / co-auth / bipartite).
  // aria-label carries the same info for keyboard / screen-reader users.
  const sankey_tooltip = html`<div class="map-tooltip" role="tooltip" aria-hidden="true"></div>`
  const move_sankey_tip = ev => {
    const pad = 12
    const box = sankey_tooltip.getBoundingClientRect()
    const left = Math.min(ev.clientX + pad, window.innerWidth  - box.width  - pad)
    const top  = Math.min(ev.clientY + pad, window.innerHeight - box.height - pad)
    sankey_tooltip.style.left = `${Math.max(pad, left)}px`
    sankey_tooltip.style.top  = `${Math.max(pad, top)}px`
  }
  const ribbon_paths = g.append("g")
    .selectAll("path")
    .data(edges)
    .join("path")
    .attr("d", ribbon)
    .attr("fill", d => d.color)
    .attr("fill-opacity", 0.55)
    .attr("stroke", "none")
    .attr("cursor", "pointer")
    .attr("aria-label", d => `${subdomain_label[d.subdomain] ?? d.subdomain} to ${d.sequencer}. ${d.count} paper${d.count === 1 ? "" : "s"}.`)
    .on("mouseenter", (ev, d) => {
      sankey_tooltip.replaceChildren(html`<div>
        <div class="map-tooltip-title">${subdomain_label[d.subdomain] ?? d.subdomain} → ${d.sequencer}</div>
        <div class="map-tooltip-meta">${d.count} paper${d.count === 1 ? "" : "s"}</div>
        <div class="map-tooltip-row">
          <span class="map-tooltip-swatch" style=${`background:${d.color}`}></span>
          <span>${subdomain_label[d.subdomain] ?? d.subdomain}</span>
        </div>
      </div>`)
      sankey_tooltip.classList.add("visible")
      sankey_tooltip.setAttribute("aria-hidden", "false")
      move_sankey_tip(ev)
    })
    .on("mousemove", move_sankey_tip)
    .on("mouseleave", () => {
      sankey_tooltip.classList.remove("visible")
      sankey_tooltip.setAttribute("aria-hidden", "true")
    })

  // Left node bars + labels.
  g.append("g")
    .selectAll("rect")
    .data(left_nodes)
    .join("rect")
    .attr("x", -8).attr("y", d => d.y0)
    .attr("width", 8).attr("height", d => d.y1 - d.y0)
    .attr("fill", d => subdomain_color[d.id] ?? "#888")
  g.append("g")
    .selectAll("text")
    .data(left_nodes)
    .join("text")
    .attr("x", -14).attr("y", d => (d.y0 + d.y1) / 2 + 4)
    .attr("text-anchor", "end")
    .attr("font-weight", "bold")
    .attr("fill", d => subdomain_color[d.id] ?? "#333")
    .text(d => `${subdomain_label[d.id] ?? d.id}  (${d.total})`)

  // Right node bars + labels.
  g.append("g")
    .selectAll("rect")
    .data(right_nodes)
    .join("rect")
    .attr("x", inner_w).attr("y", d => d.y0)
    .attr("width", 8).attr("height", d => d.y1 - d.y0)
    .attr("fill", "#1f6feb")
  g.append("g")
    .selectAll("text")
    .data(right_nodes)
    .join("text")
    .attr("x", inner_w + 14).attr("y", d => (d.y0 + d.y1) / 2 + 4)
    .attr("text-anchor", "start")
    .attr("fill", "#1d2330")
    .text(d => `${d.id}  (${d.total})`)

  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${svg.node()}</div>
    ${sankey_tooltip}
  </div>`
}

Where the work happens

Authors and the organizations behind them span countries. Pan and zoom the map. Zoomed out, nearby work collapses to country-level pies; zoom in past about 2× and those aggregates split into city-level pies. Circle area scales with the selected impact metric; pie colours group organizations by type. Organization type is inferred from institution names and should be read as a practical display category.

viewof org_metric = Inputs.radio(
  ["citations", "authors", "papers"],
  { label: "Circle area", value: "citations" }
)

org_types_present = ["Academic", "Industry", "Research Institute", "Government", "Nonprofit", "Lab"]

viewof org_type_filter = Inputs.checkbox(
  org_types_present,
  { label: "Organization type", value: org_types_present }
)

topojson = require("topojson-client@3")
world_atlas = (await fetch("https://cdn.jsdelivr.net/npm/world-atlas@2/countries-110m.json")).json()

where_the_work_map = {
  // ===== Aggregate from the currently-filtered pubs at organization level =====
  const pub_ids = new Set(pubs_filtered.map(p => p.id))

  const org_type_color = {
    "Academic": "#2563eb",
    "Industry": "#ea580c",
    "Research Institute": "#7c3aed",
    "Government": "#059669",
    "Nonprofit": "#be123c",
    "Lab": "#0f766e"
  }

  const classify_org = name => {
    const n = (name ?? "").toLowerCase()
    if (/\b(inc|ltd|llc|gmbh|corp|corporation|company|co\.|biosystems|biotech|pharmaceuticals|pfizer|bruker|procter|protein metrics|instadeep|novor|sciex|bioinformatics solutions|dp technology|deepmind|microsoft|nvidia|google)\b/.test(n)) return "Industry"
    if (/\b(national laboratory|government|ministry|academy of sciences|nih|national institute|cnrs|csiro|bam|federal institute|nist|institute of metrology|los alamos)\b/.test(n)) return "Government"
    if (/\b(nonprofit|foundation|initiative|biohub|vib|wellcome|sanger)\b/.test(n)) return "Nonprofit"
    if (/\b(laboratory|lab)\b/.test(n) && !/\b(university|institute|hospital|center|centre)\b/.test(n)) return "Lab"
    if (/\b(institute|institut|instituto|center|centre|laboratory|hospital|clinic|medical center|research)\b/.test(n) && !/\b(university|college|school|faculty)\b/.test(n)) return "Research Institute"
    return "Academic"
  }

  const org_rows = new Map()

  for (const r of pub_authorship_t) {
    if (!pub_ids.has(r.pub_id) || !r.affiliation || !r.country || r.lat == null || r.lng == null) continue
    const k = r.affiliation
    if (!org_rows.has(k)) {
      org_rows.set(k, {
        organization: r.affiliation,
        type: classify_org(r.affiliation),
        city: r.city,
        country: r.country,
        lat_sum: 0,
        lng_sum: 0,
        loc_w: 0,
        authors: new Set(),
        papers: new Set(),
        affiliations: new Set()
      })
    }
    const org = org_rows.get(k)
    org.authors.add(r.author_id)
    org.papers.add(r.pub_id)
    org.affiliations.add(r.affiliation_id)
    org.lat_sum += r.lat
    org.lng_sum += r.lng
    org.loc_w += 1
  }

  const org_data = Array.from(org_rows.values()).map(r => ({
    organization: r.organization,
    type: r.type,
    city: r.city,
    country: r.country,
    lat: r.lat_sum / r.loc_w,
    lng: r.lng_sum / r.loc_w,
    authors: r.authors.size,
    papers: r.papers.size,
    affiliations: r.affiliations.size,
    citations: Array.from(r.papers).reduce((sum, pid) => sum + (publication_impact_by_pub.get(pid)?.cited_by_count ?? 0), 0)
  })).filter(r => org_type_filter.includes(r.type))

  const type_counts = org_types_present.map(type => ({
    type,
    n: org_data.filter(d => d.type === type).length
  }))

  const make_bucket = fields => ({
    ...fields,
    lat_sum: 0,
    lng_sum: 0,
    loc_w: 0,
    authors: new Set(),
    papers: new Set(),
    affiliations: new Set(),
    organizations: new Set(),
    types: new Map()
  })

  const add_to_bucket = (bucket, r, type) => {
    bucket.authors.add(r.author_id)
    bucket.papers.add(r.pub_id)
    bucket.affiliations.add(r.affiliation_id)
    bucket.organizations.add(r.affiliation)
    bucket.lat_sum += r.lat
    bucket.lng_sum += r.lng
    bucket.loc_w += 1

    if (!bucket.types.has(type)) {
      bucket.types.set(type, {
        type,
        authors: new Set(),
        papers: new Set(),
        affiliations: new Set(),
        organizations: new Set()
      })
    }
    const by_type = bucket.types.get(type)
    by_type.authors.add(r.author_id)
    by_type.papers.add(r.pub_id)
    by_type.affiliations.add(r.affiliation_id)
    by_type.organizations.add(r.affiliation)
  }

  const city_rows = new Map()
  const country_rows = new Map()
  for (const r of pub_authorship_t) {
    if (!pub_ids.has(r.pub_id) || !r.affiliation || !r.country || r.lat == null || r.lng == null) continue
    const type = classify_org(r.affiliation)
    if (!org_type_filter.includes(type)) continue

    const country_key = r.country
    if (!country_rows.has(country_key)) {
      country_rows.set(country_key, make_bucket({
        level: "country",
        label: r.country,
        country: r.country
      }))
    }
    add_to_bucket(country_rows.get(country_key), r, type)

    if (!r.city_id) continue
    const city_key = String(r.city_id)
    if (!city_rows.has(city_key)) {
      city_rows.set(city_key, make_bucket({
        level: "city",
        label: `${r.city}, ${r.country}`,
        city: r.city,
        country: r.country,
        lat: r.lat,
        lng: r.lng
      }))
    }
    add_to_bucket(city_rows.get(city_key), r, type)
  }

  const metric_value = (sets, metric) => {
    if (!sets) return 0
    if (metric === "authors") return sets.authors.size
    if (metric === "papers") return sets.papers.size
    return Array.from(sets.papers).reduce((sum, pid) => sum + (publication_impact_by_pub.get(pid)?.cited_by_count ?? 0), 0)
  }

  const finalize_bucket = bucket => {
    const lat = bucket.lat ?? (bucket.lat_sum / bucket.loc_w)
    const lng = bucket.lng ?? (bucket.lng_sum / bucket.loc_w)
    const type_values = org_types_present.map(type => ({
      type,
      value: metric_value(bucket.types.get(type), org_metric)
    })).filter(d => d.value > 0)

    return {
      level: bucket.level,
      label: bucket.label,
      city: bucket.city,
      country: bucket.country,
      lat,
      lng,
      authors: bucket.authors.size,
      papers: bucket.papers.size,
      affiliations: bucket.affiliations.size,
      organizations: bucket.organizations.size,
      citations: Array.from(bucket.papers).reduce((sum, pid) => sum + (publication_impact_by_pub.get(pid)?.cited_by_count ?? 0), 0),
      type_values
    }
  }

  const city_data = Array.from(city_rows.values())
    .filter(d => d.loc_w > 0)
    .map(finalize_bucket)
    .filter(d => Number.isFinite(d.lat) && Number.isFinite(d.lng))

  const country_data = Array.from(country_rows.values())
    .filter(d => d.loc_w > 0)
    .map(finalize_bucket)
    .filter(d => Number.isFinite(d.lat) && Number.isFinite(d.lng))

  // ===== Set up scales =====
  const width = 1100, height = 620
  const projection = d3.geoNaturalEarth1()
    .fitExtent([[10, 10], [width - 10, height - 10]],
               topojson.feature(world_atlas, world_atlas.objects.countries))
  const path = d3.geoPath(projection)

  // Size: sqrt(metric) so circle area is linear in the selected metric.
  const metric_label = { citations: "global citations", authors: "authors", papers: "papers" }[org_metric]
  const max_city = d3.max(city_data, d => d[org_metric]) || 1
  const max_country = d3.max(country_data, d => d[org_metric]) || 1
  const cityR = v => 2.5 + 13 * Math.sqrt((v || 0) / max_city)
  const countryR = v => 7 + 25 * Math.sqrt((v || 0) / max_country)
  const sorted_city_data = city_data.slice().sort((a, b) => d3.descending(a[org_metric], b[org_metric]))
  const sorted_country_data = country_data.slice().sort((a, b) => d3.descending(a[org_metric], b[org_metric]))

  // ===== Build SVG =====
  const svg = d3.create("svg")
    .attr("viewBox", [0, 0, width, height])
    .attr("style", "max-width:100%; height:auto; background:#f6f8fa; font:11px sans-serif; cursor: grab;")

  svg.append("rect")
    .attr("width", width)
    .attr("height", height)
    .attr("fill", "transparent")
    .attr("pointer-events", "all")

  const map_layer = svg.append("g")

  // Country fills (greyed base map).
  const countries_feature = topojson.feature(world_atlas, world_atlas.objects.countries)
  const base_layer = map_layer.append("g").attr("class", "base-map")
  base_layer
    .selectAll("path")
    .data(countries_feature.features)
    .join("path")
    .attr("d", path)
    .attr("fill", "#e9ecef")
    .attr("stroke", "#cfd4d9")
    .attr("stroke-width", 0.5)

  const type_breakdown = d => d.type_values
    .slice()
    .sort((a, b) => d3.descending(a.value, b.value))
    // Drop the metric label from per-row breakdowns: the chart's metric
    // selector already says which metric is being displayed, and the value
    // stands alone.
    .map(v => `${v.type}: ${v.value}`)
    .join("\n")

  const tooltip = d => `${d.label}
${d.authors} authors · ${d.papers} papers · ${d.citations} global citations
${d.organizations} organizations · ${d.affiliations} affiliation rows
${type_breakdown(d)}`

  const tooltip_el = html`<div class="map-tooltip" role="tooltip" aria-hidden="true"></div>`
  const fmt = v => Number(v ?? 0).toLocaleString()
  const move_tooltip = ev => {
    const pad = 16
    const { innerWidth, innerHeight } = window
    const box = tooltip_el.getBoundingClientRect()
    const left = Math.min(ev.clientX + pad, innerWidth - box.width - pad)
    const top = Math.min(ev.clientY + pad, innerHeight - box.height - pad)
    tooltip_el.style.left = `${Math.max(pad, left)}px`
    tooltip_el.style.top = `${Math.max(pad, top)}px`
  }
  const show_tooltip = (ev, d) => {
    const rows = d.type_values
      .slice()
      .sort((a, b) => d3.descending(a.value, b.value))
    // 'global citations' used to repeat in (a) the headline metric line,
    // (b) the canonical triple, and (c) every per-type breakdown row. Fold
    // (a) and (c) so the phrase only appears once in (b).
    tooltip_el.replaceChildren(html`<div>
      <div class="map-tooltip-title">${d.label}</div>
      <div class="map-tooltip-meta">
        ${fmt(d.authors)} authors · ${fmt(d.papers)} papers · ${fmt(d.citations)} global citations<br>
        ${fmt(d.organizations)} organizations · ${fmt(d.affiliations)} affiliation rows
      </div>
      ${rows.map(row => html`<div class="map-tooltip-row">
        <span class="map-tooltip-swatch" style=${`background:${org_type_color[row.type]}`}></span>
        <span>${row.type}: ${fmt(row.value)}</span>
      </div>`)}
    </div>`)
    tooltip_el.classList.add("visible")
    tooltip_el.setAttribute("aria-hidden", "false")
    move_tooltip(ev)
  }
  const hide_tooltip = () => {
    tooltip_el.classList.remove("visible")
    tooltip_el.setAttribute("aria-hidden", "true")
  }

  const marker_radius = (radius_fn, d, k) => radius_fn(d[org_metric]) / Math.sqrt(k)

  const draw_markers = (layer, data, radius_fn, k = 1) => {
    const markers = layer.selectAll("g.marker")
      .data(data, d => d.label)
      .join("g")
      .attr("class", "marker")
      .attr("transform", d => {
        const [x, y] = projection([d.lng, d.lat])
        return `translate(${x},${y})`
      })
      .style("cursor", "help")
      .on("mouseenter", show_tooltip)
      .on("mousemove", move_tooltip)
      .on("mouseleave", hide_tooltip)
      .on("focus", function(ev, d) {
        const rect = this.getBoundingClientRect()
        show_tooltip({ clientX: rect.left + rect.width / 2, clientY: rect.top + rect.height / 2 }, d)
      })
      .on("blur", hide_tooltip)

    markers.each(function(d) {
      const g = d3.select(this)
      const r = marker_radius(radius_fn, d, k)
      const slices = d.type_values.length ? d.type_values : [{ type: "Academic", value: 1 }]
      const pie = d3.pie().sort(null).value(s => s.value)(slices)
      const arc = d3.arc().innerRadius(0).outerRadius(r)

      g.selectAll("path.slice")
        .data(pie, p => p.data.type)
        .join("path")
        .attr("class", "slice")
        .attr("d", arc)
        .attr("fill", p => org_type_color[p.data.type])
        .attr("fill-opacity", 0.82)

      g.selectAll("circle.outline")
        .data([d])
        .join("circle")
        .attr("class", "outline")
        .attr("r", r)
        .attr("fill", "none")
        .attr("stroke", "#1f2328")
        .attr("stroke-width", 0.65 / Math.sqrt(k))

      g.selectAll("title").remove()
      g.attr("aria-label", tooltip(d))
    })
  }

  const country_layer = map_layer.append("g").attr("class", "country-layer")
  const city_layer = map_layer.append("g").attr("class", "city-layer").attr("opacity", 0).attr("pointer-events", "none")
  draw_markers(country_layer, sorted_country_data, countryR)
  draw_markers(city_layer, sorted_city_data, cityR)

  const SPLIT_K = 2.0
  const zoom = d3.zoom()
    .scaleExtent([1, 8])
    .translateExtent([[0, 0], [width, height]])
    .extent([[0, 0], [width, height]])
    .on("start", () => svg.style("cursor", "grabbing"))
    .on("zoom", ev => {
      const k = ev.transform.k
      const show_cities = k >= SPLIT_K
      map_layer.attr("transform", ev.transform)
      base_layer.selectAll("path").attr("stroke-width", 0.5 / Math.sqrt(k))
      country_layer.attr("opacity", show_cities ? 0 : 1).attr("pointer-events", show_cities ? "none" : "auto")
      city_layer.attr("opacity", show_cities ? 1 : 0).attr("pointer-events", show_cities ? "auto" : "none")
      draw_markers(country_layer, sorted_country_data, countryR, k)
      draw_markers(city_layer, sorted_city_data, cityR, k)
    })
    .on("end", () => svg.style("cursor", "grab"))
  svg.call(zoom)

  // ===== Legends =====
  const legend = svg.append("g").attr("transform", `translate(20, ${height - 74})`)
  legend.append("text").attr("x", 0).attr("y", -10)
    .attr("font-weight", "bold").attr("fill", "#2a3140")
    .text(`${org_data.length} organizations · ${city_data.length} cities · circle area ∝ ${metric_label}`)
  const visible_types = org_types_present.filter(t => type_counts.find(d => d.type === t)?.n)
  const leg = legend.selectAll("g.type")
    .data(visible_types)
    .join("g")
    .attr("transform", (_, i) => `translate(${(i % 3) * 210}, ${Math.floor(i / 3) * 24})`)
  leg.append("circle")
    .attr("r", 7)
    .attr("cx", 7)
    .attr("cy", 7)
    .attr("fill", d => org_type_color[d])
    .attr("fill-opacity", 0.78)
    .attr("stroke", "#1f2328")
    .attr("stroke-width", 0.5)
  leg.append("text")
    .attr("x", 20)
    .attr("y", 11)
    .attr("fill", "#2a3140")
    .text(d => `${d} (${type_counts.find(x => x.type === d)?.n ?? 0})`)

  const max_value = max_country
  const max_r = countryR(max_value)
  const size_legend = svg.append("g").attr("transform", `translate(${width - 170}, ${height - 86})`)
  size_legend.append("circle").attr("cx", 28).attr("cy", 36).attr("r", max_r).attr("fill", "none").attr("stroke", "#57606a")
  size_legend.append("circle").attr("cx", 28).attr("cy", 36 + max_r - countryR(max_value / 4)).attr("r", countryR(max_value / 4)).attr("fill", "none").attr("stroke", "#57606a")
  size_legend.append("text").attr("x", 70).attr("y", 24).attr("fill", "#57606a").text(`${max_value} ${metric_label}`)
  size_legend.append("text").attr("x", 70).attr("y", 48).attr("fill", "#57606a").text(`${Math.round(max_value / 4)} ${metric_label}`)

  const zoom_in = html`<button class="map-zoom-btn" title="Zoom in" aria-label="Zoom in">+</button>`
  zoom_in.onclick = () => svg.transition().duration(180).call(zoom.scaleBy, 1.45)
  const zoom_out = html`<button class="map-zoom-btn" title="Zoom out" aria-label="Zoom out">-</button>`
  zoom_out.onclick = () => svg.transition().duration(180).call(zoom.scaleBy, 1 / 1.45)
  const zoom_reset = html`<button class="map-zoom-btn" title="Reset map zoom" aria-label="Reset map zoom">↺</button>`
  zoom_reset.onclick = () => svg.transition().duration(220).call(zoom.transform, d3.zoomIdentity)

  return html`<div class="chart-wrap">
    <div class="map-zoom-controls">${zoom_in}${zoom_out}${zoom_reset}</div>
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${svg.node()}</div>
    ${tooltip_el}
  </div>`
}

Top institutions

{
  // Top-15 institutions by distinct authors, recomputed from filtered pubs so
  // the chart tracks the global Kind / Approach / Acquisition filters.
  const pub_ids = new Set(pubs_filtered.map(p => p.id))
  const authors_by_inst = new Map()  // affiliation → Set(author_id)
  const country_of      = new Map()  // affiliation → country (for color)
  for (const r of pub_authorship_t) {
    if (!pub_ids.has(r.pub_id) || !r.affiliation) continue
    if (!authors_by_inst.has(r.affiliation)) authors_by_inst.set(r.affiliation, new Set())
    authors_by_inst.get(r.affiliation).add(r.author_id)
    if (r.country) country_of.set(r.affiliation, r.country)
  }
  const rows = Array.from(authors_by_inst, ([institution, authors]) => ({
    institution,
    country: country_of.get(institution) ?? "",
    authors: authors.size
  }))
    .sort((a, b) => b.authors - a.authors)
    .slice(0, 15)

  return Plot.plot({
    marginLeft: 280,
    width: 1100,
    height: Math.max(260, rows.length * 26),
    x: { label: `Distinct authors (${pubs_filtered.length} papers)`, grid: true },
    y: { label: null },
    marks: [
      Plot.barX(rows, {
        x: "authors",
        y: d => `${d.institution} · ${d.country}`,
        fill: "country",
        sort: { y: "x", reverse: true },
        tip: plot_tip_style
      }),
      Plot.ruleX([0])
    ]
  })
}

Who’s driving it

The chart below shows the twenty most-published authors.

{
  // Re-aggregate top authors from the currently-filtered pubs (so the chart
  // reacts to the global Kind / Approach / Acquisition filters).
  const counts = new Map()
  for (const p of pubs_filtered) {
    for (const a of (p.authors ?? "").split(", ").filter(Boolean)) {
      counts.set(a, (counts.get(a) ?? 0) + 1)
    }
  }
  const top = Array.from(counts, ([name, publications]) => ({ name, publications }))
    .sort((a, b) => b.publications - a.publications)
    .slice(0, 20)

  return Plot.plot({
    marginLeft: 180,
    height: Math.max(260, top.length * 22),
    x: { label: `Papers in current filter (${pubs_filtered.length} total)`, grid: true },
    y: { label: null },
    marks: [
      Plot.barX(top, {
        x: "publications",
        y: "name",
        fill: "#1f6feb",
        sort: { y: "x", reverse: true },
        tip: plot_tip_style
      }),
      Plot.ruleX([0])
    ]
  })
}

The collaboration network

The network below shows how authors with ≥ 3 papers are connected through co-authorship; drag a node to reshape the layout, or hover to highlight a neighborhood.

Edge thickness is Newman fractional collaboration strength: a pair sharing an n-author paper earns 1/(n−1), summed over every paper they share. So an intimate two-author collaboration scores a full 1.0 while each pair in a 53-author consortium scores ~0.02. Without this correction one community benchmark paper alone contributes a 33-node clique that swamps the whole graph. Use min strength to peel away weak ties and expose the dense research groups, and max authors/paper to drop mega-author papers outright.

// Default min strength 0.1: drops the pure-consortium pairs (a 53-author paper
// gives 1/52 = 0.019) while keeping 125 of 129 authors on the canvas.
viewof coauth_min_strength = Inputs.range([0, 2], {
  value: 0.1, step: 0.02, label: "Min strength"
})

// Default 60 = no cutoff (largest paper in the catalog has 53 authors), so the
// fractional weighting does the work by default and this is an explore control.
viewof coauth_max_authors = Inputs.range([2, 60], {
  value: 60, step: 1, label: "Max authors/paper"
})

network_chart = {
  const width = 1400
  // Legend height grows so all N affiliations render — was clipping at LEGEND_H=110
  // (25-item limit) while all_affs runs to 70+. PLOT_H stays at 650 for the network.
  const PLOT_H = 650
  const cols_per_row = 6            // was 5; slightly denser
  const col_w = 220                 // 6 × 220 = 1320 px, fits in 1400 chart
  const row_h = 18
  const LEGEND_TITLE_PX = 20        // room for the "Affiliation (N)" header

  // All affiliations per author (an author can have multiple).
  const affs_by_author = d3.rollup(
    author_affs_t,
    v => Array.from(new Set(v.map(d => d.affiliation))).sort(),
    d => d.author
  )

  // Newman fractional strengths under the reactive max-authors cutoff, then
  // filtered to edges at or above the min-strength threshold. Nodes are derived
  // from the SURVIVING edges, so authors whose only ties were weak consortium
  // co-signatures drop off the canvas instead of floating as isolates.
  const links = coauth_strength(coauth_max_authors)
    .filter(e => e.weight >= coauth_min_strength)

  if (!links.length) {
    return html`<p style="color:#57606a; font-style:italic; padding:1rem;">
      No collaborations at min strength ≥ ${coauth_min_strength.toFixed(2)}
      with ≤ ${coauth_max_authors} authors/paper. Lower the threshold to see edges.
    </p>`
  }

  const node_set = new Set()
  for (const e of links) { node_set.add(e.source); node_set.add(e.target) }
  const nodes = Array.from(node_set, name => ({
    id: name,
    affiliations: affs_by_author.get(name) ?? ["Unknown"],
    degree: 0
  }))
  const node_by_name = new Map(nodes.map(n => [n.id, n]))
  for (const l of links) {
    node_by_name.get(l.source).degree += l.weight
    node_by_name.get(l.target).degree += l.weight
  }
  const max_deg = d3.max(nodes, n => n.degree) || 1
  // Edge widths scale against the strongest SURVIVING tie, so the thin/thick
  // contrast stays readable at any min-strength threshold.
  const max_link_w = d3.max(links, l => l.weight) || 1

  // Unique affiliations sorted for legend + a 22-slot palette so we don't run out of colors.
  const all_affs = Array.from(new Set(nodes.flatMap(n => n.affiliations))).sort()
  const palette = d3.schemeTableau10.concat(d3.schemeSet3)
  const color = d3.scaleOrdinal().domain(all_affs).range(all_affs.map((_, i) => palette[i % palette.length]))

  // Legend height grown to accommodate every affiliation (was hard-coded 110).
  const legend_rows = Math.ceil(all_affs.length / cols_per_row)
  const LEGEND_H = LEGEND_TITLE_PX + legend_rows * row_h + 12   // + padding
  const height = PLOT_H + LEGEND_H

  // Primary affiliation = the one shared with most neighbors (for edge coloring).
  const adj = new Map(nodes.map(n => [n.id, []]))
  for (const l of links) { adj.get(l.source).push(l.target); adj.get(l.target).push(l.source) }
  const primary_aff = new Map()
  for (const n of nodes) {
    if (n.affiliations.length === 1) { primary_aff.set(n.id, n.affiliations[0]); continue }
    let best = n.affiliations[0], best_count = -1
    for (const aff of n.affiliations) {
      const c = adj.get(n.id).reduce((acc, nb) => acc + (node_by_name.get(nb).affiliations.includes(aff) ? 1 : 0), 0)
      if (c > best_count) { best = aff; best_count = c }
    }
    primary_aff.set(n.id, best)
  }

  // Within-component clustering force: pull nodes of the same primary-aff together.
  const aff_centers = new Map()
  for (const aff of all_affs) {
    const i = all_affs.indexOf(aff)
    aff_centers.set(aff, [
      width / 2 + Math.cos(i * 2 * Math.PI / all_affs.length) * width * 0.3,
      PLOT_H / 2 + Math.sin(i * 2 * Math.PI / all_affs.length) * PLOT_H * 0.35
    ])
  }

  const NODE_R = d => 8 + 10 * Math.sqrt(d.degree / max_deg)

  const sim = d3.forceSimulation(nodes)
    .force("link", d3.forceLink(links).id(d => d.id).distance(70).strength(d => 0.15 + 0.25 * Math.min(1, d.weight)))
    .force("charge", d3.forceManyBody().strength(-220))
    .force("collide", d3.forceCollide().radius(d => NODE_R(d) + 4))
    .force("center", d3.forceCenter(width / 2, PLOT_H / 2))
    .force("aff_x", d3.forceX(d => aff_centers.get(primary_aff.get(d.id))[0]).strength(0.05))
    .force("aff_y", d3.forceY(d => aff_centers.get(primary_aff.get(d.id))[1]).strength(0.05))

  const svg = d3.create("svg")
    .attr("viewBox", [0, 0, width, height])
    .attr("style", "max-width: 100%; height: auto; font: 11px sans-serif; cursor: grab;")

  // Title
  svg.append("text")
    .attr("x", width / 2)
    .attr("y", 22)
    .attr("text-anchor", "middle")
    .attr("font-size", 16)
    .attr("font-weight", "bold")
    .attr("fill", "#24292f")
    .text(`Co-authorship network · ${nodes.length} authors, ${links.length} ties (Newman strength ≥ ${coauth_min_strength.toFixed(2)}${coauth_max_authors < 60 ? `, ≤ ${coauth_max_authors} authors/paper` : ""})`)

  const g = svg.append("g")
  svg.call(d3.zoom().on("zoom", ev => g.attr("transform", ev.transform)))

  // Edges: tint same-primary-affiliation edges with the affiliation color; others gray.
  const link = g.append("g")
    .attr("stroke-opacity", 0.45)
    .selectAll("line")
    .data(links)
    .join("line")
    .attr("stroke", d => {
      const a = primary_aff.get(d.source.id ?? d.source)
      const b = primary_aff.get(d.target.id ?? d.target)
      return a && a === b ? color(a) : "#bbb"
    })
    .attr("stroke-width", d => 0.8 + 3.2 * Math.sqrt(d.weight / max_link_w))

  // Node groups (one <g> per author; contains either a circle or pie slices).
  const node_g = g.append("g")
    .selectAll("g.node")
    .data(nodes)
    .join("g")
    .attr("class", "node")
    .call(drag(sim))

  // Rich HTML tooltip that follows the cursor, matching the world map's .map-tooltip.
  // Screen-reader accessibility uses aria-label (not <title>) to avoid stacking a
  // native black browser tooltip on top of the styled HTML one.
  node_g.attr("aria-label", d =>
    `${d.id}. ${d.affiliations.join(", ")}. Collaboration strength ${d.degree.toFixed(2)}.`
  )
  const tooltip_el = html`<div class="map-tooltip" role="tooltip" aria-hidden="true"></div>`
  const move_tip = ev => {
    const pad = 12
    const box = tooltip_el.getBoundingClientRect()
    const left = Math.min(ev.clientX + pad, window.innerWidth  - box.width  - pad)
    const top  = Math.min(ev.clientY + pad, window.innerHeight - box.height - pad)
    tooltip_el.style.left = `${Math.max(pad, left)}px`
    tooltip_el.style.top  = `${Math.max(pad, top)}px`
  }
  const show_tip = (ev, d) => {
    tooltip_el.replaceChildren(html`<div>
      <div class="map-tooltip-title">${d.id}</div>
      <div class="map-tooltip-meta">collaboration strength ${d.degree.toFixed(2)} · ${adj.get(d.id).length} co-author${adj.get(d.id).length === 1 ? "" : "s"}</div>
      ${d.affiliations.map(aff => html`<div class="map-tooltip-row">
        <span class="map-tooltip-swatch" style=${`background:${color(aff)}`}></span>
        <span>${aff}</span>
      </div>`)}
    </div>`)
    tooltip_el.classList.add("visible")
    tooltip_el.setAttribute("aria-hidden", "false")
    move_tip(ev)
  }
  const hide_tip = () => {
    tooltip_el.classList.remove("visible")
    tooltip_el.setAttribute("aria-hidden", "true")
  }
  node_g
    .style("cursor", "pointer")
    .on("mouseenter", show_tip)
    .on("mousemove",  move_tip)
    .on("mouseleave", hide_tip)

  // Render each node: single circle if 1 affiliation, pie wedges if multiple.
  node_g.each(function (d) {
    const r = NODE_R(d)
    const sel = d3.select(this)
    if (d.affiliations.length === 1) {
      sel.append("circle")
        .attr("r", r)
        .attr("fill", color(d.affiliations[0]))
        .attr("stroke", "#fff")
        .attr("stroke-width", 1.5)
    } else {
      const arc = d3.arc().innerRadius(0).outerRadius(r)
      const pie = d3.pie().value(1).sort(null)(d.affiliations.map(a => ({ aff: a })))
      sel.selectAll("path")
        .data(pie)
        .join("path")
        .attr("d", arc)
        .attr("fill", p => color(p.data.aff))
        .attr("stroke", "#fff")
        .attr("stroke-width", 1)
    }
  })

  const label = g.append("g")
    .attr("pointer-events", "none")
    .selectAll("text")
    .data(nodes)
    .join("text")
    .text(d => d.id)
    .attr("font-size", 10)
    .attr("font-weight", 500)
    .attr("fill", "#1d2330")
    .attr("dx", d => NODE_R(d) + 3)
    .attr("dy", 3)

  sim.on("tick", () => {
    link
      .attr("x1", d => d.source.x).attr("y1", d => d.source.y)
      .attr("x2", d => d.target.x).attr("y2", d => d.target.y)
    node_g.attr("transform", d => `translate(${d.x}, ${d.y})`)
    label.attr("x", d => d.x).attr("y", d => d.y)
  })

  // Affiliation legend at the bottom, now sized to show every affiliation.
  const legend_g = svg.append("g")
    .attr("transform", `translate(40, ${PLOT_H + 10})`)
  legend_g.append("text")
    .attr("x", 0).attr("y", 0)
    .attr("font-weight", "bold")
    .attr("font-size", 12)
    .attr("fill", "#57606a")
    .text(`Affiliation (${all_affs.length})`)
  legend_g.selectAll("g.leg-item")
    .data(all_affs)
    .join("g")
    .attr("class", "leg-item")
    .attr("transform", (_, i) => `translate(${(i % cols_per_row) * col_w}, ${LEGEND_TITLE_PX + Math.floor(i / cols_per_row) * row_h})`)
    .call(s => {
      s.append("circle").attr("r", 6).attr("cx", 6).attr("cy", -4).attr("fill", color)
      s.append("text")
        .attr("x", 18).attr("y", 0)
        .attr("font-size", 11)
        .attr("fill", "#24292f")
        .append("title").text(d => d)   // native tooltip shows the untruncated name
      // Truncate long names to keep the row width bounded (column is 220 px wide).
      s.select("text").text(d => d.length > 26 ? d.slice(0, 24) + "…" : d)
    })

  invalidation.then(() => sim.stop())

  function drag(simulation) {
    return d3.drag()
      .on("start", (ev, d) => { if (!ev.active) simulation.alphaTarget(0.3).restart(); d.fx = d.x; d.fy = d.y })
      .on("drag", (ev, d) => { d.fx = ev.x; d.fy = ev.y })
      .on("end", (ev, d) => { if (!ev.active) simulation.alphaTarget(0); d.fx = null; d.fy = null })
  }

  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${svg.node()}</div>
    ${tooltip_el}
  </div>`
}

Models and the authors behind them

This second view rewires the same network as a bipartite graph: every prolific author (≥ 3 papers) is linked to the models they helped publish. Algorithm nodes are diamonds colored by their architecture family, so you can see which research groups own which slice of the architectural landscape.

author_algo_network = {
  const width = 1400
  // Legend height computed dynamically after families_seen is known (below);
  // hardcoded 90 clipped anything past the first 3 rows × 5 cols = 15 items.
  const PLOT_H = 650
  const legend_cols_per_row = 5
  const legend_col_w = 260
  const legend_row_h = 22
  const LEGEND_TITLE_PX = 22

  // Same prolific-author filter as the co-auth network (authors with ≥ 3 papers).
  // Filter-independent: the co-auth network above has reactive strength/size
  // filters, but this chart should always show every prolific author.
  const prolific = coauth_all_authors

  // Author → algorithm edges, weighted by number of papers connecting them.
  // When a publication carries a version tag (Casanovo v1/v2/v5), the edge
  // points at the version-suffixed label so each release becomes its own node.
  const edge_w = new Map()  // key: `${author}|${algo}`
  const algo_to_base = new Map()  // versioned label → base algorithm name (for family lookup)
  for (const p of pubs_t) {
    if (!p.authors || !p.models) continue
    const authors = String(p.authors).split(",").map(s => s.trim()).filter(Boolean)
    const models  = String(p.models).split(",").map(s => s.trim()).filter(Boolean)
    for (const a of authors) {
      if (!prolific.has(a)) continue
      for (const m of models) {
        const label = p.version ? `${m} ${p.version}` : m
        algo_to_base.set(label, m)
        const key = a + "<<|>>" + label
        edge_w.set(key, (edge_w.get(key) ?? 0) + 1)
      }
    }
  }

  const algo_meta = new Map(algorithms_t.map(a => [a.model, a]))
  const links = []
  const author_set = new Set()
  const algo_set = new Set()
  for (const [k, w] of edge_w) {
    const [a, m] = k.split("<<|>>")
    author_set.add(a); algo_set.add(m)
    links.push({ source: a, target: m, weight: w })
  }

  // Resolve a display "family" for each algorithm. Row-level algorithm_family
  // is preferred, but when it's null (reviews / benchmarks / downstream-app
  // workflow rows / metas / uncategorised adjacent tools) fall back to a
  // kind-based pseudo-family so every diamond gets a distinct color instead
  // of lumping into a single "Unknown" gray bucket.
  const resolveFamily = meta => {
    if (meta?.family) return meta.family
    switch (meta?.kind) {
      case "review":                 return "Reviews"
      case "benchmark":              return "Benchmarks"
      case "meta":                   return "Meta / catalogs"
      case "post-processor":         return "Post-processors (misc)"
      case "adjacent":               return "Adjacent tools (misc)"
      case "downstream-application": return `Application: ${meta.subdomain ?? "misc"}`
      default:                       return "Unknown"
    }
  }
  const nodes = [
    ...Array.from(author_set, name => ({ id: name, kind: "author", degree: 0 })),
    ...Array.from(algo_set, name => {
      const base = algo_to_base.get(name) ?? name
      const meta = algo_meta.get(base)
      return { id: name, kind: "algo", family: resolveFamily(meta), degree: 0 }
    })
  ]
  const node_by_name = new Map(nodes.map(n => [n.id, n]))
  for (const l of links) {
    node_by_name.get(l.source).degree += l.weight
    node_by_name.get(l.target).degree += l.weight
  }

  // Color palette: gray for authors, family color for algorithms.
  // Canonical colors for the 13 arch-chart families (kept consistent with the
  // architectures swim-lane above). Everything else — adjacent-tool families
  // like 'Sequence tag' or 'Hybrid search', post-processor families like
  // 'Sequence assembly' — gets a deterministic slot from an extended palette
  // so no algorithm falls through to gray anymore.
  const family_color_canonical = {
    "Heuristic":         "#8a96a0",
    "Graph / DP":        "#6c757d",
    "HMM":               "#4d6a8c",
    "Decision tree":     "#7b6f43",
    "Random Forest":     "#a5673f",
    "Learning-to-rank":  "#5f6b7a",
    "CNN + RNN":         "#4C72B0",
    "Transformer (AR)":  "#DD8452",
    "GNN":               "#937860",
    "CNN":               "#8172B3",
    "Transformer (NAR)": "#55A868",
    "Diffusion":         "#C44E52",
    "Flow":              "#937DC2",
    "Unknown":           "#999999"
  }
  const extra_palette = [
    "#17becf", "#ff7f0e", "#98df8a", "#c5b0d5", "#e377c2",
    "#bcbd22", "#7cb342", "#ba55d3", "#4682b4", "#20b2aa",
    "#daa520", "#8250df", "#0a3069", "#ffa726", "#cd853f",
    "#a0522d", "#6b8e23", "#f7b6d2", "#ff9896", "#dbdb8d",
    "#aec7e8", "#9edae5", "#d5b895", "#b8860b", "#708090",
    "#116329", "#953800", "#0969da", "#a40e26", "#d4a72c",
    "#4e7a1a", "#e07c00", "#c71585", "#556b2f", "#5e35b1"
  ]
  // Build the full family→color map from canonical + deterministic fallback
  // (families sorted alphabetically so the assignment is stable across renders).
  const family_color = { ...family_color_canonical }
  const extra_families = Array.from(new Set(nodes.filter(n => n.kind === "algo").map(n => n.family)))
    .filter(f => !(f in family_color))
    .sort()
  extra_families.forEach((f, i) => { family_color[f] = extra_palette[i % extra_palette.length] })

  const nodeFill = d => d.kind === "author" ? "#cdd6e0" : (family_color[d.family] ?? "#999")
  const nodeStroke = d => d.kind === "author" ? "#8a94a3" : "#222"
  const max_algo_deg = d3.max(nodes.filter(n => n.kind === "algo"), n => n.degree) || 1
  const max_auth_deg = d3.max(nodes.filter(n => n.kind === "author"), n => n.degree) || 1
  const nodeR = d => d.kind === "author"
    ? 5 + 8 * Math.sqrt(d.degree / max_auth_deg)
    : 9 + 14 * Math.sqrt(d.degree / max_algo_deg)

  // Pull algorithm nodes toward a central ring so they cluster by family.
  // families_seen is only the families that ACTUALLY have a diamond on the
  // chart — a family present in the DB but with no algorithm linked to a
  // prolific author (≥3 papers) won't be here.
  const families_seen = Array.from(new Set(nodes.filter(n => n.kind === "algo").map(n => n.family))).sort()

  // Legend height + SVG total height derived from actual family count so the
  // legend never clips (was hard-coded to LEGEND_H=90 which only fit ~15
  // items; we now have ~25 pseudo-families thanks to resolveFamily fallback).
  const legend_row_count = Math.ceil((families_seen.length + 1) / legend_cols_per_row)   // +1 for the author swatch
  const LEGEND_H = LEGEND_TITLE_PX + legend_row_count * legend_row_h + 20
  const height = PLOT_H + LEGEND_H
  const family_target = new Map()
  for (let i = 0; i < families_seen.length; i++) {
    const angle = i * 2 * Math.PI / families_seen.length
    family_target.set(families_seen[i], [
      width / 2 + Math.cos(angle) * width * 0.25,
      PLOT_H / 2 + Math.sin(angle) * PLOT_H * 0.30
    ])
  }

  const sim = d3.forceSimulation(nodes)
    .force("link", d3.forceLink(links).id(d => d.id).distance(d => 60 + 8 / d.weight).strength(0.35))
    .force("charge", d3.forceManyBody().strength(d => d.kind === "algo" ? -350 : -90))
    .force("collide", d3.forceCollide().radius(d => nodeR(d) + 4))
    .force("center", d3.forceCenter(width / 2, PLOT_H / 2))
    .force("fam_x", d3.forceX(d => d.kind === "algo" ? family_target.get(d.family)[0] : width / 2).strength(d => d.kind === "algo" ? 0.08 : 0.01))
    .force("fam_y", d3.forceY(d => d.kind === "algo" ? family_target.get(d.family)[1] : PLOT_H / 2).strength(d => d.kind === "algo" ? 0.08 : 0.01))

  const svg = d3.create("svg")
    .attr("viewBox", [0, 0, width, height])
    .attr("style", "max-width: 100%; height: auto; font: 11px sans-serif; cursor: grab;")

  svg.append("text")
    .attr("x", width / 2).attr("y", 22)
    .attr("text-anchor", "middle")
    .attr("font-size", 16)
    .attr("font-weight", "bold")
    .attr("fill", "#24292f")
    .text("Authors ↔ models · diamonds are algorithms colored by family, circles are authors")

  const g = svg.append("g")
  svg.call(d3.zoom().on("zoom", ev => g.attr("transform", ev.transform)))

  const link = g.append("g")
    .attr("stroke", "#aaa")
    .attr("stroke-opacity", 0.45)
    .selectAll("line")
    .data(links)
    .join("line")
    .attr("stroke-width", d => Math.max(0.8, Math.sqrt(d.weight) * 0.7))

  const node_g = g.append("g")
    .selectAll("g.node")
    .data(nodes)
    .join("g")
    .attr("class", "node")
    .call(drag(sim))

  // Rich HTML tooltip that follows the cursor (matches world-map + co-auth style).
  // aria-label handles screen-reader accessibility so we don't also stack a
  // native black SVG <title> tooltip on top.
  node_g.attr("aria-label", d => d.kind === "algo"
    ? `${d.id} algorithm. Family: ${d.family}. ${d.degree} author links.`
    : `${d.id} author. ${d.degree} model links.`
  )
  const tooltip_el = html`<div class="map-tooltip" role="tooltip" aria-hidden="true"></div>`
  const move_tip = ev => {
    const pad = 12
    const box = tooltip_el.getBoundingClientRect()
    const left = Math.min(ev.clientX + pad, window.innerWidth  - box.width  - pad)
    const top  = Math.min(ev.clientY + pad, window.innerHeight - box.height - pad)
    tooltip_el.style.left = `${Math.max(pad, left)}px`
    tooltip_el.style.top  = `${Math.max(pad, top)}px`
  }
  const show_tip = (ev, d) => {
    if (d.kind === "algo") {
      tooltip_el.replaceChildren(html`<div>
        <div class="map-tooltip-title">${d.id}</div>
        <div class="map-tooltip-meta">algorithm · ${d.degree} author link${d.degree === 1 ? "" : "s"}</div>
        <div class="map-tooltip-row">
          <span class="map-tooltip-swatch" style=${`background:${family_color[d.family] ?? "#999"}`}></span>
          <span>${d.family}</span>
        </div>
      </div>`)
    } else {
      tooltip_el.replaceChildren(html`<div>
        <div class="map-tooltip-title">${d.id}</div>
        <div class="map-tooltip-meta">author · ${d.degree} model link${d.degree === 1 ? "" : "s"}</div>
      </div>`)
    }
    tooltip_el.classList.add("visible")
    tooltip_el.setAttribute("aria-hidden", "false")
    move_tip(ev)
  }
  const hide_tip = () => {
    tooltip_el.classList.remove("visible")
    tooltip_el.setAttribute("aria-hidden", "true")
  }
  node_g
    .style("cursor", "pointer")
    .on("mouseenter", show_tip)
    .on("mousemove",  move_tip)
    .on("mouseleave", hide_tip)

  // Authors as circles; algorithms as diamonds (rotated squares).
  node_g.each(function (d) {
    const sel = d3.select(this)
    const r = nodeR(d)
    if (d.kind === "author") {
      sel.append("circle")
        .attr("r", r)
        .attr("fill", nodeFill(d))
        .attr("stroke", nodeStroke(d))
        .attr("stroke-width", 1.2)
    } else {
      sel.append("rect")
        .attr("x", -r).attr("y", -r)
        .attr("width", r * 2).attr("height", r * 2)
        .attr("transform", "rotate(45)")
        .attr("fill", nodeFill(d))
        .attr("stroke", nodeStroke(d))
        .attr("stroke-width", 1.5)
    }
  })

  const label = g.append("g")
    .attr("pointer-events", "none")
    .selectAll("text")
    .data(nodes)
    .join("text")
    .text(d => d.id)
    .attr("font-size", d => d.kind === "algo" ? 12 : 9)
    .attr("font-weight", d => d.kind === "algo" ? "bold" : 500)
    .attr("fill", d => d.kind === "algo" ? "#0b0d12" : "#3b4150")
    .attr("dx", d => nodeR(d) + 3)
    .attr("dy", 3)

  sim.on("tick", () => {
    link
      .attr("x1", d => d.source.x).attr("y1", d => d.source.y)
      .attr("x2", d => d.target.x).attr("y2", d => d.target.y)
    node_g.attr("transform", d => `translate(${d.x}, ${d.y})`)
    label.attr("x", d => d.x).attr("y", d => d.y)
  })

  // Legend: algorithm-family colors + the author swatch.
  const legend_g = svg.append("g").attr("transform", `translate(40, ${PLOT_H + 15})`)
  legend_g.append("text")
    .attr("x", 0).attr("y", 0)
    .attr("font-weight", "bold")
    .attr("font-size", 12)
    .attr("fill", "#57606a")
    .text("Algorithm family")
  const legend_items = families_seen.map(f => ({ label: f, fill: family_color[f] ?? "#999", shape: "diamond" }))
    .concat([{ label: "Author (size = # model links)", fill: "#cdd6e0", shape: "circle" }])
  legend_g.selectAll("g.leg-item")
    .data(legend_items)
    .join("g")
    .attr("class", "leg-item")
    .attr("transform", (_, i) => `translate(${(i % legend_cols_per_row) * legend_col_w}, ${LEGEND_TITLE_PX + Math.floor(i / legend_cols_per_row) * legend_row_h})`)
    .call(s => {
      s.append(d => d.shape === "diamond"
        ? document.createElementNS("http://www.w3.org/2000/svg", "rect")
        : document.createElementNS("http://www.w3.org/2000/svg", "circle"))
        .attr("transform", d => d.shape === "diamond" ? "rotate(45) translate(0,0)" : null)
        .each(function (d) {
          const el = d3.select(this)
          if (d.shape === "diamond") el.attr("x", -7).attr("y", -7).attr("width", 14).attr("height", 14)
          else el.attr("r", 7).attr("cx", 0).attr("cy", 0)
          el.attr("fill", d.fill).attr("stroke", d.fill === "#cdd6e0" ? "#8a94a3" : "#222").attr("stroke-width", 1.2)
        })
      s.append("text")
        .attr("x", 14).attr("y", 4)
        .attr("font-size", 11)
        .attr("fill", "#24292f")
        .text(d => d.label)
    })

  invalidation.then(() => sim.stop())

  function drag(simulation) {
    return d3.drag()
      .on("start", (ev, d) => { if (!ev.active) simulation.alphaTarget(0.3).restart(); d.fx = d.x; d.fy = d.y })
      .on("drag", (ev, d) => { d.fx = ev.x; d.fy = ev.y })
      .on("end", (ev, d) => { if (!ev.active) simulation.alphaTarget(0); d.fx = null; d.fy = null })
  }

  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${svg.node()}</div>
    ${tooltip_el}
  </div>`
}

How the field cites itself

A chronological citation arc diagram. Papers are placed left-to-right by publication date and stratified vertically by kind; within each row, the most-cited papers float to the top. Each arc connects a citing paper (right end) to a paper it cites (left end), curving upward above the row. Hover any paper to highlight the citations into it (red) and out of it (blue), and dim everything else.

Edges resolved from Crossref (by DOI) and Semantic Scholar (by DOI or title-search fallback), matched back to publications via DOI-exact (and, for refs without a DOI, fuzzy-title with token-set ratio ≥ 92). Only intra-catalog citations are drawn; references to papers outside the catalog are filtered out. Every arrow runs citing → cited, so the arrowhead always lands on the older paper.

citation_arcs = {
  if (!citations_t.length) {
    return html`<p style="color:#57606a; font-style:italic; padding:1rem;">
      No citation edges yet. Run <code>uv run python build_citations.py</code> to populate the graph.
    </p>`
  }

  // ===== Layout parameters =====
  const width  = 1600
  const height = 820
  const marginTop    = 120          // extra headroom for arc apexes near the top rows
  const marginBottom = 60
  const marginLeft   = 130
  const marginRight  = 30
  const innerWidth   = width  - marginLeft - marginRight
  const innerHeight  = height - marginTop  - marginBottom

  // ===== Per-publication node data =====
  const pub_by_id = new Map(pubs_t.map(p => [p.id, p]))
  const cite_count = new Map()  // pub_id → in-degree (times cited by other catalog papers)
  const cite_out   = new Map()  // pub_id → out-degree (papers in catalog it cites)
  for (const e of citations_t) {
    cite_count.set(e.cited_id,  (cite_count.get(e.cited_id) ?? 0) + 1)
    cite_out.set(e.citing_id, (cite_out.get(e.citing_id) ?? 0) + 1)
  }

  // Y-strata by kind. Meta types at the top, core algorithms at the bottom, so
  // arcs (which always curve upward) have the most headroom for citations heading
  // *into* heavily-cited algorithm-row papers.
  const KIND_ORDER = [
    { kind: "meta",                   label: "Meta" },
    { kind: "benchmark",              label: "Benchmarks" },
    { kind: "review",                 label: "Reviews / surveys" },
    { kind: "adjacent",               label: "Adjacent" },
    { kind: "downstream-application", label: "Downstream apps" },
    { kind: "post-processor",         label: "Post-processors" },
    { kind: "algorithm",              label: "Algorithms" }
  ]
  const kind_color = {
    "algorithm":              "#1f6feb",
    "post-processor":         "#bf8700",
    "downstream-application": "#1a7f37",
    "adjacent":               "#a371f7",
    "review":                 "#cf222e",
    "benchmark":              "#0969da",
    "meta":                   "#8c959f",
    "unknown":                "#8c959f"
  }
  const row_h = innerHeight / KIND_ORDER.length
  const row_index = new Map(KIND_ORDER.map((d, i) => [d.kind, i]))

  // Only include papers that participate in ≥ 1 edge.
  const involved = new Set()
  for (const e of citations_t) { involved.add(e.citing_id); involved.add(e.cited_id) }
  const nodes = Array.from(involved, id => {
    const p = pub_by_id.get(id) ?? {}
    const d = p.date instanceof Date ? p.date : new Date(p.date ?? Date.now())
    return {
      id,
      title:  p.title ?? `#${id}`,
      year:   p.year,
      date:   d,
      kind:   p.kind ?? "unknown",
      models: p.models ?? "",
      in_deg: cite_count.get(id) ?? 0,
      out_deg: cite_out.get(id) ?? 0
    }
  }).filter(n => n.date instanceof Date && !isNaN(n.date))

  // X scale: publication date.
  const x_extent = d3.extent(nodes, n => n.date)
  const x_pad    = (x_extent[1] - x_extent[0]) * 0.02 || 1e9
  const xScale   = d3.scaleTime()
    .domain([new Date(+x_extent[0] - x_pad), new Date(+x_extent[1] + x_pad)])
    .range([0, innerWidth])

  // Within-row jitter: rank papers in each row by date so they bucket into vertical lanes.
  const max_in = d3.max(nodes, n => n.in_deg) || 1
  for (const n of nodes) {
    const ri  = row_index.get(n.kind) ?? KIND_ORDER.length - 1
    const top = ri * row_h + 16
    const bot = (ri + 1) * row_h - 16
    // Top of row = most-cited; spread less-cited papers downward.
    const r = 1 - Math.sqrt(n.in_deg / max_in)
    n.y = top + r * (bot - top)
    n.x = xScale(n.date)
  }
  const node_by_id = new Map(nodes.map(n => [n.id, n]))
  const links = citations_t
    .map(e => ({ source: node_by_id.get(e.citing_id), target: node_by_id.get(e.cited_id), source_kind: e.source }))
    .filter(l => l.source && l.target)

  // ===== Draw =====
  const svg = d3.create("svg")
    .attr("viewBox", [0, 0, width, height])
    .attr("style", "max-width: 100%; height: auto; font: 11px sans-serif;")

  const root = svg.append("g").attr("transform", `translate(${marginLeft}, ${marginTop})`)

  // Row backgrounds + labels
  root.selectAll("g.row")
    .data(KIND_ORDER)
    .join("g")
    .attr("class", "row")
    .call(g => {
      g.append("rect")
        .attr("x", 0)
        .attr("y", (_, i) => i * row_h)
        .attr("width", innerWidth)
        .attr("height", row_h)
        .attr("fill", d => kind_color[d.kind] ?? "#999")
        .attr("fill-opacity", 0.05)
      g.append("text")
        .attr("x", -8)
        .attr("y", (_, i) => i * row_h + row_h / 2 + 4)
        .attr("text-anchor", "end")
        .attr("font-size", 12)
        .attr("font-weight", "bold")
        .attr("fill", d => kind_color[d.kind] ?? "#999")
        .text(d => d.label)
    })

  // Time axis
  root.append("g")
    .attr("transform", `translate(0, ${innerHeight})`)
    .call(d3.axisBottom(xScale).ticks(d3.timeYear.every(2)).tickFormat(d3.timeFormat("%Y")))
    .selectAll("text")
    .attr("font-size", 11)

  // Title
  svg.append("text")
    .attr("x", width / 2).attr("y", 20)
    .attr("text-anchor", "middle")
    .attr("font-size", 15)
    .attr("font-weight", "bold")
    .attr("fill", "#24292f")
    .text(`Citation flow · ${nodes.length} papers, ${links.length} intra-catalog citations (citing → cited)`)

  // Citation arcs. Cubic-Bezier curving upward (above the baseline).
  function arcPath(d) {
    const x1 = d.source.x, y1 = d.source.y
    const x2 = d.target.x, y2 = d.target.y
    const mx = (x1 + x2) / 2
    // Lift the control points by a fraction of the horizontal span.
    const lift = Math.min(180, Math.abs(x1 - x2) * 0.6)
    const cy1 = Math.min(y1, y2) - lift
    const cy2 = cy1
    return `M ${x1} ${y1} C ${mx} ${cy1}, ${mx} ${cy2}, ${x2} ${y2}`
  }

  // Arrowhead markers. We need one per arc color (default/red/blue) because
  // SVG markers inherit their fill from the marker itself, not the path's
  // stroke. Refs get swapped on hover so the marker color matches the arc.
  const defs = svg.append("defs")
  const mk_marker = (id, color) => defs.append("marker")
    .attr("id", id).attr("viewBox", "0 0 10 10").attr("refX", 9).attr("refY", 5)
    .attr("markerWidth", 6).attr("markerHeight", 6).attr("orient", "auto-start-reverse")
    .append("path").attr("d", "M0,0 L10,5 L0,10 z").attr("fill", color)
  mk_marker("arr-default", "#94a3b8")
  mk_marker("arr-red",     "#cf222e")
  mk_marker("arr-blue",    "#1f6feb")

  const arc_layer = root.append("g").attr("class", "arcs").attr("fill", "none")
  const arc = arc_layer.selectAll("path")
    .data(links)
    .join("path")
    .attr("d", arcPath)
    .attr("stroke", "#94a3b8")
    .attr("stroke-opacity", 0.18)
    .attr("stroke-width", d => d.source_kind === "both" ? 0.9 : 0.6)
    .attr("marker-end", "url(#arr-default)")

  // Nodes
  const nodeR = d => 3 + 5 * Math.sqrt(d.in_deg / max_in)
  const node_layer = root.append("g").attr("class", "nodes")
  const node = node_layer.selectAll("circle")
    .data(nodes)
    .join("circle")
    .attr("cx", d => d.x)
    .attr("cy", d => d.y)
    .attr("r", nodeR)
    .attr("fill", d => kind_color[d.kind] ?? "#999")
    .attr("stroke", "#fff")
    .attr("stroke-width", 0.8)
    .attr("cursor", "pointer")

  // Styled tooltip that follows the cursor (matches world-map / co-auth /
  // bipartite / Sankey). aria-label handles screen-reader accessibility.
  node.attr("aria-label", d =>
    `${d.models || d.title}. ${d.year ?? ""}. ${d.kind}. Cited by ${d.in_deg}, cites ${d.out_deg}.`
  )
  const arc_tooltip = html`<div class="map-tooltip" role="tooltip" aria-hidden="true"></div>`
  const move_arc_tip = ev => {
    const pad = 12
    const box = arc_tooltip.getBoundingClientRect()
    const left = Math.min(ev.clientX + pad, window.innerWidth  - box.width  - pad)
    const top  = Math.min(ev.clientY + pad, window.innerHeight - box.height - pad)
    arc_tooltip.style.left = `${Math.max(pad, left)}px`
    arc_tooltip.style.top  = `${Math.max(pad, top)}px`
  }
  // Note: the actual mouseenter/mouseleave handlers are attached below,
  // AFTER out_edges/in_edges are computed, so the tooltip + highlight logic
  // can live in one handler. Attach mousemove here since it only needs the
  // tooltip element and never fights with the highlight handler.
  node.on("mousemove", move_arc_tip)

  // Labels: only the top in-degree papers (one per row, so labels are legible).
  const top_per_row = new Map()
  for (const n of nodes.slice().sort((a, b) => b.in_deg - a.in_deg)) {
    const r = n.kind
    if (!top_per_row.has(r)) top_per_row.set(r, [])
    if (top_per_row.get(r).length < 5 && n.in_deg >= 4) top_per_row.get(r).push(n)
  }
  const labelled = Array.from(top_per_row.values()).flat()
  const labelText = d => (d.models?.split(",")[0]?.trim()) || d.title.slice(0, 22)
  const label_boxes = []
  for (const group of d3.groups(labelled, d => d.kind).map(([, values]) => values)) {
    const placed = []
    const ordered = group.slice().sort((a, b) => d3.descending(a.in_deg, b.in_deg))
    for (const d of ordered) {
      const text = labelText(d)
      const ri = row_index.get(d.kind) ?? KIND_ORDER.length - 1
      const rowTop = ri * row_h + 6
      const rowBottom = (ri + 1) * row_h - 6
      const baseY = d.y - nodeR(d) - 5
      const w = Math.min(150, Math.max(32, text.length * 5.7))
      const h = 12
      const offsets = [0, -14, 14, -28, 28, -42, 42]
      let chosen = null
      for (const offset of offsets) {
        const y = Math.max(rowTop + h / 2, Math.min(rowBottom - h / 2, baseY + offset))
        const box = { x0: d.x - w / 2, x1: d.x + w / 2, y0: y - h / 2, y1: y + h / 2 }
        const overlaps = placed.some(p =>
          box.x0 < p.x1 + 6 && box.x1 + 6 > p.x0 && box.y0 < p.y1 + 3 && box.y1 + 3 > p.y0
        )
        if (!overlaps) {
          chosen = { ...box, d, text, y }
          break
        }
      }
      if (chosen) {
        placed.push(chosen)
        label_boxes.push(chosen)
      }
    }
  }
  const label_layer = root.append("g").attr("class", "labels").attr("pointer-events", "none")
  label_layer.selectAll("line")
    .data(label_boxes.filter(l => Math.abs(l.y - (l.d.y - nodeR(l.d) - 5)) > 8))
    .join("line")
    .attr("x1", d => d.d.x)
    .attr("y1", d => d.d.y - nodeR(d.d) - 1)
    .attr("x2", d => d.d.x)
    .attr("y2", d => d.y + 4)
    .attr("stroke", "#8c959f")
    .attr("stroke-opacity", 0.55)
    .attr("stroke-width", 0.6)
  label_layer.selectAll("text")
    .data(label_boxes)
    .join("text")
    .text(d => d.text)
    .attr("x", d => d.d.x)
    .attr("y", d => d.y)
    .attr("text-anchor", "middle")
    .attr("font-size", 10)
    .attr("font-weight", 600)
    .attr("paint-order", "stroke")
    .attr("stroke", "white")
    .attr("stroke-width", 3)
    .attr("fill", "#1d2330")

  // ===== Hover behaviour =====
  // Precompute neighborhood maps.
  const out_edges = new Map()
  const in_edges  = new Map()
  for (const l of links) {
    if (!out_edges.has(l.source.id)) out_edges.set(l.source.id, [])
    if (!in_edges.has (l.target.id)) in_edges.set (l.target.id, [])
    out_edges.get(l.source.id).push(l)
    in_edges.get (l.target.id).push(l)
  }

  // Combined mouseenter: show the styled tooltip AND highlight the citation
  // neighbourhood in one pass — attaching two separate .on('mouseenter')
  // handlers here would silently replace the first (D3 semantics).
  node.on("mouseenter", function (ev, focus) {
    // 1. Show styled tooltip
    arc_tooltip.replaceChildren(html`<div>
      <div class="map-tooltip-title">${focus.models || "(no model)"}</div>
      <div class="map-tooltip-meta">${focus.title}</div>
      <div class="map-tooltip-row">
        <span class="map-tooltip-swatch" style=${`background:${kind_color[focus.kind] ?? "#999"}`}></span>
        <span>${focus.kind}${focus.year ? ` · ${focus.year}` : ""}</span>
      </div>
      <div class="map-tooltip-row"><span>cited by <b>${focus.in_deg}</b> · cites <b>${focus.out_deg}</b></span></div>
    </div>`)
    arc_tooltip.classList.add("visible")
    arc_tooltip.setAttribute("aria-hidden", "false")
    move_arc_tip(ev)

    // 2. Highlight citation neighbourhood
    const ins  = new Set((in_edges.get(focus.id)  ?? []).map(l => l.source.id))
    const outs = new Set((out_edges.get(focus.id) ?? []).map(l => l.target.id))
    arc
      .attr("stroke", l => {
        if (l.target.id === focus.id) return "#cf222e"   // who cites this paper
        if (l.source.id === focus.id) return "#1f6feb"   // who this paper cites
        return "#94a3b8"
      })
      .attr("marker-end", l => {
        if (l.target.id === focus.id) return "url(#arr-red)"
        if (l.source.id === focus.id) return "url(#arr-blue)"
        return "url(#arr-default)"
      })
      .attr("stroke-opacity", l => (l.source.id === focus.id || l.target.id === focus.id) ? 0.85 : 0.04)
      .attr("stroke-width",   l => (l.source.id === focus.id || l.target.id === focus.id) ? 1.4 : 0.4)
    node.attr("opacity", d => {
      if (d.id === focus.id) return 1
      if (ins.has(d.id) || outs.has(d.id)) return 1
      return 0.22
    })
  })
  node.on("mouseleave", function () {
    // 1. Hide tooltip
    arc_tooltip.classList.remove("visible")
    arc_tooltip.setAttribute("aria-hidden", "true")
    // 2. Reset arc + node styling
    arc.attr("stroke", "#94a3b8").attr("stroke-opacity", 0.18)
        .attr("stroke-width", d => d.source_kind === "both" ? 0.9 : 0.6)
        .attr("marker-end", "url(#arr-default)")
    node.attr("opacity", 1)
  })

  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${svg.node()}</div>
    ${arc_tooltip}
  </div>`
}

Academic impact by citation count

Publication-level global citation counts from OpenAlex cited_by_count. Counts are matched to catalog publications by DOI first, then by high-confidence title search when DOI lookup is unavailable.

impact_rows = {
  const rows = pubs_filtered.map(p => ({
    ...p,
    global_citations: publication_impact_by_pub.get(p.id)?.cited_by_count ?? null,
    citation_year: publication_impact_by_pub.get(p.id)?.year_collected ?? null,
    citation_match: publication_impact_by_pub.get(p.id)?.match_method ?? null
  }))
  return rows.sort((a, b) =>
    d3.descending(a.global_citations ?? -1, b.global_citations ?? -1) ||
    d3.ascending(a.year ?? Infinity, b.year ?? Infinity) ||
    d3.ascending(a.title, b.title)
  )
}

viewof impact_search = Inputs.search(impact_rows, {
  placeholder: `Search ${impact_rows.length} papers with citation counts (filtered)…`
})

{
  const table = Inputs.table(impact_search, {
    columns: ["global_citations", "year", "models", "kind", "title", "authors", "journal", "type", "citation_year"],
    header: {
      global_citations: "Global citations",
      year: "Year",
      models: "Method(s)",
      kind: "Kind",
      title: "Title",
      authors: "Authors",
      journal: "Venue",
      type: "Type",
      citation_year: "Collected"
    },
    format: {
      global_citations: c => c == null ? "" : String(c),
      year: y => y == null ? "" : String(y),
      citation_year: y => y == null ? "" : String(y),
      title: (t, i) => {
        const row = impact_search[i]
        const url = row?.url || (row?.doi ? `https://doi.org/${row.doi}` : null)
        return url ? htl.html`<a href="${url}" target="_blank" rel="noopener">${t}</a>` : t
      },
      authors: a => a && a.length > 90 ? a.slice(0, 90) + "…" : a
    },
    sort: "global_citations",
    reverse: true,
    rows: 30,
    width: { global_citations: 120, year: 60, models: 170, kind: 130, type: 110, citation_year: 90 }
  })
  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${table}</div>
  </div>`
}

Code activity

Open-source uptake snapshot from the GitHub API: stars, fork count, open / closed issues + PRs, the most recent push, and the latest released tag (falling back to the latest plain tag if the project doesn’t formally release). Pulled offline by build_repo_metrics.py against algorithm_repository.url. Non-GitHub repos (PyPI, project home pages, anonymised review repos) aren’t counted here.

viewof repo_metric = Inputs.radio(
  ["stars", "forks", "open_issues", "open_prs", "closed_issues", "closed_prs"],
  { label: "Rank by", value: "stars" }
)

{
  // Bar chart of the top 20 repos by the selected metric.
  // repo_metrics_by_url is deduped so repos backing multiple algorithms
  // (InstaNovo + InstaNovo-P, ...) surface as one bar with a combined label.
  const top = repo_metrics_by_url
    .filter(r => r[repo_metric] != null)
    .sort((a, b) => (b[repo_metric] ?? 0) - (a[repo_metric] ?? 0))
    .slice(0, 20)
    .map(r => ({
      ...r,
      label: r.model + (r.url.includes("/tree/") ? "*" : ""),
    }))

  const labelfmt = {
    stars: "Stars (★)", forks: "Forks", open_issues: "Open issues",
    open_prs: "Open PRs", closed_issues: "Closed issues", closed_prs: "Closed PRs"
  }
  const chart = Plot.plot({
    marginLeft: 180, width: 1100,
    height: Math.max(280, top.length * 24),
    x: { label: labelfmt[repo_metric], grid: true },
    y: { label: null },
    marks: [
      Plot.barX(top, {
        x: repo_metric,
        y: "label",
        fill: "#1f6feb",
        sort: { y: "x", reverse: true }
      }),
      Plot.ruleX([0])
    ]
  })

  // Rich .map-tooltip on the bars. Plot renders barX rects in the sort order
  // (already computed above) — we replicate that here to bind data by DOM
  // index. Same helper pattern as the swim-lane charts.
  const repo_bar_tooltip = html`<div class="map-tooltip" role="tooltip" aria-hidden="true"></div>`
  const move_repo_bar_tip = ev => {
    const pad = 12
    const box = repo_bar_tooltip.getBoundingClientRect()
    const left = Math.min(ev.clientX + pad, window.innerWidth  - box.width  - pad)
    const top  = Math.min(ev.clientY + pad, window.innerHeight - box.height - pad)
    repo_bar_tooltip.style.left = `${Math.max(pad, left)}px`
    repo_bar_tooltip.style.top  = `${Math.max(pad, top)}px`
  }
  const sorted = top.slice().sort((a, b) => (b[repo_metric] ?? 0) - (a[repo_metric] ?? 0))
  // Plot labels barX marks with aria-label='bar' (className was silently
  // ignored). Selector scoped to this chart's DOM.
  chart.querySelectorAll('g[aria-label="bar"] rect').forEach((rect, i) => {
    const d = sorted[i]
    if (!d) return
    rect.style.cursor = "pointer"
    rect.setAttribute("aria-label",
      `${d.model}. ${d.stars} stars, ${d.forks} forks. ${d.open_issues} open issues, ${d.open_prs} open PRs.`)
    rect.addEventListener("mouseenter", ev => {
      repo_bar_tooltip.replaceChildren(html`<div>
        <div class="map-tooltip-title">${d.model}</div>
        <div class="map-tooltip-meta">
          <a href=${d.url} target="_blank" rel="noopener" style="color:inherit;">${d.url.replace(/^https?:\/\/(?:www\.)?github\.com\//, "")}</a>
        </div>
        <div class="map-tooltip-row"><span>★ <b>${d.stars ?? 0}</b> · forks <b>${d.forks ?? 0}</b></span></div>
        <div class="map-tooltip-row"><span>issues open/closed: <b>${d.open_issues ?? 0}</b>/${d.closed_issues ?? 0}</span></div>
        <div class="map-tooltip-row"><span>PRs open/closed: <b>${d.open_prs ?? 0}</b>/${d.closed_prs ?? 0}</span></div>
        ${d.latest_release ? html`<div class="map-tooltip-row"><span>release: ${d.latest_release}</span></div>` : ""}
        ${d.last_pushed ? html`<div class="map-tooltip-row"><span>last pushed: ${d.last_pushed.toISOString().slice(0,10)}</span></div>` : ""}
      </div>`)
      repo_bar_tooltip.classList.add("visible")
      repo_bar_tooltip.setAttribute("aria-hidden", "false")
      move_repo_bar_tip(ev)
    })
    rect.addEventListener("mousemove", move_repo_bar_tip)
    rect.addEventListener("mouseleave", () => {
      repo_bar_tooltip.classList.remove("visible")
      repo_bar_tooltip.setAttribute("aria-hidden", "true")
    })
  })

  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${chart}</div>
    ${repo_bar_tooltip}
  </div>`
}

Plot popularity (★, log scale) against staleness (months since last push). Vibrant repos cluster on the right at higher star counts; abandoned-but-historically-popular projects drift toward the left. Dot size scales with open issues + open PRs. Hover any dot for the repo name, url, and counts.

{
  // Scatter: stars vs months since last push. Stale-but-popular repos sit in the
  // upper right; actively maintained but small projects sit lower left.
  // repo_metrics_by_url dedupes shared-repo algorithm pairs (e.g. InstaNovo +
  // InstaNovo-P) so we don't stack two labels on the same coordinate.
  const now = Date.now()
  const points = repo_metrics_by_url
    .filter(r => r.stars != null && r.last_pushed != null)
    .map(r => ({
      ...r,
      months_since_push: (now - r.last_pushed.getTime()) / (1000*60*60*24*30.44)
    }))
  const chart = Plot.plot({
    width: 1100, height: 460,
    marginLeft: 60, marginBottom: 50,
    x: { label: "Months since last push →", grid: true, reverse: true },
    y: { label: "Stars (★)", type: "log", grid: true },
    color: { legend: true, label: "Family" },
    marks: [
      // No always-on labels: even with dodgeY the ≥20-star subset still
      // overlaps in dense clusters, and choosing WHICH stars threshold to
      // label reads as arbitrary. Rely on the styled .map-tooltip on hover
      // for identification — every dot gets a rich popup with the same info.
      Plot.dot(points, {
        x: "months_since_push", y: "stars",
        fill: "family",
        r: d => 4 + Math.sqrt((d.open_issues ?? 0) + (d.open_prs ?? 0))
      })
    ]
  })

  // Same tooltip pattern as the repo bars.
  const repo_dot_tooltip = html`<div class="map-tooltip" role="tooltip" aria-hidden="true"></div>`
  const move_repo_dot_tip = ev => {
    const pad = 12
    const box = repo_dot_tooltip.getBoundingClientRect()
    const left = Math.min(ev.clientX + pad, window.innerWidth  - box.width  - pad)
    const top  = Math.min(ev.clientY + pad, window.innerHeight - box.height - pad)
    repo_dot_tooltip.style.left = `${Math.max(pad, left)}px`
    repo_dot_tooltip.style.top  = `${Math.max(pad, top)}px`
  }
  // Selector scoped to this Plot chart; aria-label='dot' is the marker Plot
  // uses for dot marks (className mark option was silently ignored).
  chart.querySelectorAll('g[aria-label="dot"] circle').forEach((circle, i) => {
    const d = points[i]
    if (!d) return
    // Pull the actual fill Plot picked for this dot — matches the legend swatch.
    const family_swatch = circle.getAttribute("fill") ?? "#4C72B0"
    circle.style.cursor = "pointer"
    circle.setAttribute("aria-label",
      `${d.model}. ${d.stars} stars. ${d.months_since_push.toFixed(1)} months since last push.`)
    circle.addEventListener("mouseenter", ev => {
      repo_dot_tooltip.replaceChildren(html`<div>
        <div class="map-tooltip-title">${d.model}</div>
        <div class="map-tooltip-meta">
          <a href=${d.url} target="_blank" rel="noopener" style="color:inherit;">${d.url.replace(/^https?:\/\/(?:www\.)?github\.com\//, "")}</a>
        </div>
        ${d.family ? html`<div class="map-tooltip-row">
          <span class="map-tooltip-swatch" style=${`background:${family_swatch}`}></span>
          <span>${d.family}</span>
        </div>` : ""}
        <div class="map-tooltip-row"><span>★ <b>${d.stars}</b> · ${d.months_since_push.toFixed(1)} mo since push</span></div>
        <div class="map-tooltip-row"><span>open issues <b>${d.open_issues ?? 0}</b> · open PRs <b>${d.open_prs ?? 0}</b></span></div>
      </div>`)
      repo_dot_tooltip.classList.add("visible")
      repo_dot_tooltip.setAttribute("aria-hidden", "false")
      move_repo_dot_tip(ev)
    })
    circle.addEventListener("mousemove", move_repo_dot_tip)
    circle.addEventListener("mouseleave", () => {
      repo_dot_tooltip.classList.remove("visible")
      repo_dot_tooltip.setAttribute("aria-hidden", "true")
    })
  })

  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${chart}</div>
    ${repo_dot_tooltip}
  </div>`
}

{
  // Full sortable table of every repository with metrics.
  const rows = repo_metrics_t.slice()
  const table = Inputs.table(rows, {
    columns: ["model", "family", "stars", "forks", "open_issues",
              "closed_issues", "open_prs", "closed_prs",
              "latest_release", "last_pushed", "url"],
    header: {
      model: "Method", family: "Family", stars: "★", forks: "Forks",
      open_issues: "Issues (open)", closed_issues: "Issues (closed)",
      open_prs: "PRs (open)", closed_prs: "PRs (closed)",
      latest_release: "Release", last_pushed: "Last updated", url: "Repo"
    },
    format: {
      stars: v => v == null ? "" : String(v),
      last_pushed: d => d ? d.toISOString().slice(0, 10) : "",
      latest_release: v => v ?? "",
      url: u => u ? htl.html`<a href="${u}" target="_blank" rel="noopener">${u.replace(/^https?:\/\/(?:www\.)?github\.com\//, "")}</a>` : ""
    },
    sort: "stars", reverse: true,
    rows: 25,
    width: { model: 130, family: 130, stars: 70, forks: 70,
             open_issues: 90, closed_issues: 100, open_prs: 80, closed_prs: 100,
             latest_release: 100, last_pushed: 110, url: 240 }
  })
  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${table}</div>
  </div>`
}

Where it appears

Most papers in this space appear first on bioRxiv or arXiv. Toggle preprints vs. peer-reviewed to see how the venue distribution shifts.

viewof venue_type = Inputs.radio(
  ["all", "preprint", "peer-reviewed"],
  { label: "Publication type", value: "all" }
)

{
  // Re-aggregate venues from the globally-filtered pubs so the chart respects
  // Kind / Approach / Acquisition, then apply the section's preprint/peer-reviewed radio.
  const subset = pubs_filtered.filter(p => venue_type === "all" || p.type === venue_type)
  const grouped = Array.from(
    d3.rollup(subset.filter(p => p.journal), v => v.length, p => p.journal),
    ([venue, papers]) => ({ venue, papers })
  ).sort((a, b) => d3.descending(a.papers, b.papers)).slice(0, 15)

  return Plot.plot({
    marginLeft: 180,
    height: Math.max(260, grouped.length * 24),
    x: { label: "Papers", grid: true },
    y: { label: null },
    marks: [
      Plot.barX(grouped, {
        x: "papers",
        y: "venue",
        fill: "#6f42c1",
        sort: { y: "x", reverse: true },
        tip: plot_tip_style
      }),
      Plot.ruleX([0])
    ]
  })
}

Venue citedness (open-data analog of the Impact Factor)

Two-year mean citedness from OpenAlex (summary_stats.2yr_mean_citedness). Methodologically equivalent to the Clarivate Impact Factor formula (mean citations in year t to articles published in years t-1 and t-2), but computed over OpenAlex’s open Crossref-aggregated citation graph rather than the paywalled Web of Science one. Conferences and preprint servers are omitted (their non-rolling publication schedule makes the metric misleading). Built offline via build_journal_metrics.py; refresh annually.

viewof venue_if_search = Inputs.search(journal_impact_t, {
  placeholder: `Search ${journal_impact_t.length} venues…`
})

{
  // Decorate each venue row with the catalog's paper count for that venue,
  // so users can spot where heavy curation overlaps with high-citedness venues.
  const paper_counts = new Map()
  for (const p of pubs_t) {
    if (!p.journal) continue
    paper_counts.set(p.journal, (paper_counts.get(p.journal) ?? 0) + 1)
  }
  const rows = venue_if_search.map(v => ({
    ...v,
    papers_in_catalog: paper_counts.get(v.journal) ?? 0
  }))
  const table = Inputs.table(rows, {
    columns: ["journal", "two_yr_citedness", "h_index", "papers_in_catalog", "year_collected"],
    header: {
      journal:           "Venue",
      two_yr_citedness:  "IF₂ᵧᵣ (OpenAlex)",
      h_index:           "h-index",
      papers_in_catalog: "Papers in catalog",
      year_collected:    "Year collected"
    },
    format: {
      two_yr_citedness: c => c == null ? "" : c.toFixed(2),
      h_index:          v => v == null ? "" : String(v),
      year_collected:   y => y == null ? "" : String(y)
    },
    sort: "two_yr_citedness",
    reverse: true,
    rows: 30,
    width: { two_yr_citedness: 130, h_index: 90, papers_in_catalog: 140, year_collected: 130 }
  })
  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${table}</div>
  </div>`
}

Publication lifecycle

How a method goes from arXiv / bioRxiv preprint to a peer-reviewed publication. For each algorithm, every preprint is paired greedily with the earliest following peer-reviewed publication (peer-reviewed / ML-conference / thesis all count as “post-preprint”). The Status column then tells you whether each row is paired (lifecycle complete), preprint-only (still in flight), or peer-reviewed-only (published without a preprint we have on file).

lifecycle_rows = {
  // Group publications by (method name, version). A pub without a model link
  // is bucketed under a per-pub synthetic key so it can't accidentally pair
  // with another stand-alone pub. Versioned publications (currently Casanovo
  // v1 / v2 / v5) are bucketed by `${method}|${version}` so each version
  // release pairs independently rather than collapsing into a single Casanovo
  // row.
  const by_group = new Map()
  for (const p of pubs_filtered) {
    const method = (p.models ?? "").split(",")[0]?.trim() || `_pub_${p.id}`
    const key    = p.version ? `${method}|${p.version}` : method
    if (!by_group.has(key)) by_group.set(key, { method, version: p.version || null, pubs: [] })
    by_group.get(key).pubs.push(p)
  }

  const POST_PREPRINT = new Set(["peer-reviewed", "ML conference", "thesis"])

  const rows = []
  for (const { method, version, pubs } of by_group.values()) {
    // Sort each method's pubs by date.
    const sorted = pubs.slice().sort((a, b) => (a.date ?? 0) - (b.date ?? 0))
    const preprints = sorted.filter(p => p.type === "preprint")
    const peers     = sorted.filter(p => POST_PREPRINT.has(p.type))

    // Greedy: each preprint claims the earliest unclaimed later peer-reviewed pub.
    const claimed = new Set()
    for (const pp of preprints) {
      const target = peers.find(p => !claimed.has(p.id) && (p.date ?? 0) >= (pp.date ?? 0))
      if (target) {
        claimed.add(target.id)
        const gap_days = ((target.date ?? 0) - (pp.date ?? 0)) / 86400000
        const gap_months = gap_days / 30.44
        rows.push({
          method,
          version,
          status: "paired",
          preprint_date: pp.date,
          preprint_title: pp.title,
          preprint_url:   pp.url || (pp.doi ? `https://doi.org/${pp.doi}` : null),
          peer_date:  target.date,
          peer_title: target.title,
          peer_url:   target.url || (target.doi ? `https://doi.org/${target.doi}` : null),
          gap_months: Math.round(gap_months * 10) / 10
        })
      } else {
        rows.push({
          method,
          version,
          status: "preprint-only",
          preprint_date: pp.date,
          preprint_title: pp.title,
          preprint_url:   pp.url || (pp.doi ? `https://doi.org/${pp.doi}` : null),
          peer_date:  null,
          peer_title: null,
          peer_url:   null,
          gap_months: null
        })
      }
    }
    for (const p of peers) {
      if (claimed.has(p.id)) continue
      rows.push({
        method,
        version,
        status: "peer-reviewed-only",
        preprint_date: null,
        preprint_title: null,
        preprint_url: null,
        peer_date:  p.date,
        peer_title: p.title,
        peer_url:   p.url || (p.doi ? `https://doi.org/${p.doi}` : null),
        gap_months: null
      })
    }
  }
  // Order: paired first (by gap, shortest → longest), then preprint-only by
  // preprint date (oldest first = candidates for follow-up), then
  // peer-reviewed-only by year.
  const status_rank = { "paired": 0, "preprint-only": 1, "peer-reviewed-only": 2 }
  return rows.sort((a, b) => {
    const s = status_rank[a.status] - status_rank[b.status]
    if (s !== 0) return s
    if (a.status === "paired") return a.gap_months - b.gap_months
    return (a.preprint_date ?? a.peer_date ?? 0) - (b.preprint_date ?? b.peer_date ?? 0)
  })
}

viewof lifecycle_status_filter = Inputs.checkbox(
  ["paired", "preprint-only", "peer-reviewed-only"],
  { label: "Show status", value: ["paired"] }
)

lifecycle_visible_rows = lifecycle_rows.filter(r => lifecycle_status_filter.includes(r.status))

viewof lifecycle_search = Inputs.search(lifecycle_visible_rows, {
  placeholder: `Search ${lifecycle_visible_rows.length} lifecycle rows (filtered)…`
})

{
  const table = Inputs.table(lifecycle_search, {
    columns: ["method", "version", "status", "preprint_date", "peer_date", "gap_months"],
    header: {
      method:        "Method",
      version:       "Version",
      status:        "Status",
      preprint_date: "Preprint",
      peer_date:     "Peer-reviewed",
      gap_months:    "Gap (months)"
    },
    format: {
      preprint_date: (d, i) => {
        const row = lifecycle_search[i]
        if (!d) return ""
        const label = d.toISOString().slice(0, 7)
        return row.preprint_url
          ? htl.html`<a href="${row.preprint_url}" target="_blank" rel="noopener" title="${row.preprint_title}">${label}</a>`
          : label
      },
      peer_date: (d, i) => {
        const row = lifecycle_search[i]
        if (!d) return ""
        const label = d.toISOString().slice(0, 7)
        return row.peer_url
          ? htl.html`<a href="${row.peer_url}" target="_blank" rel="noopener" title="${row.peer_title}">${label}</a>`
          : label
      },
      gap_months: g => g == null ? "" : `${g.toFixed(1)} mo`
    },
    rows: 25,
    width: { method: 200, version: 70, status: 160, preprint_date: 110, peer_date: 110, gap_months: 110 }
  })
  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${table}</div>
  </div>`
}

{
  // Label = method + version (when versioned). Casanovo's three versions then
  // get their own y-axis rows; everything else stays as just the method name.
  const paired = lifecycle_rows
    .filter(r => r.status === "paired")
    .map(r => ({
      ...r,
      label: r.version ? `${r.method} ${r.version}` : r.method
    }))
  if (!paired.length) {
    return html`<p style="color:#57606a; font-style:italic;">No paired methods in the current filter.</p>`
  }
  return Plot.plot({
    marginLeft: 200,
    height: Math.max(220, paired.length * 20),
    x: { label: "Preprint → peer-reviewed gap (months)", grid: true },
    y: { label: null },
    marks: [
      Plot.barX(paired, {
        x1: 0,
        x2: "gap_months",   // explicit x1/x2 bypasses Plot's auto stackX transform
        y: "label",
        fill: "#1f6feb",
        sort: { y: "x2", reverse: true },
        tip: plot_tip_style
      }),
      Plot.ruleX([0])
    ]
  })
}

Browse all papers

viewof search = Inputs.search(pubs_filtered, { placeholder: `Search ${pubs_filtered.length} papers (filtered)…` })

viewof selected_pubs = {
  // Decorate each row with the venue's OpenAlex 2-year citedness (or null).
  // Also rewrite the comma-separated `models` string to append known aliases
  // (e.g. π-HelixNovo → "π-HelixNovo (aka PandaNovo)") so renamed methods stay
  // searchable by their historical name in the table's filter box.
  const aliases_by_model = new Map(
    algorithms_t.filter(a => a.aliases).map(a => [a.model, a.aliases])
  )
  const annotate_models = s => (s ?? "")
    .split(",").map(x => x.trim()).filter(Boolean)
    .map(m => aliases_by_model.has(m) ? `${m} (aka ${aliases_by_model.get(m)})` : m)
    .join(", ")
  const search_with_if = search.map(r => ({
    ...r,
    models: annotate_models(r.models),
    citedness: journal_impact_by_name.get(r.journal)?.two_yr_citedness ?? null
  }))
  const table = Inputs.table(search_with_if, {
    columns: ["year", "models", "version", "kind", "is_dl", "acquisition", "title", "authors", "journal", "citedness", "type", "repo"],
    header: {
      year: "Year", models: "Method(s)", version: "Ver.", kind: "Kind", is_dl: "DL?", acquisition: "Acq.",
      title: "Title", authors: "Authors", journal: "Venue", citedness: "IF₂ᵧᵣ", type: "Type", repo: "Code"
    },
    format: {
      year: y => y == null ? "" : String(y),
      is_dl: v => v === 1 || v === true ? "DL" : (v === 0 || v === false ? "classical" : ""),
      citedness: c => c == null ? "" : c.toFixed(1),
      title: (t, i) => {
        const row = search_with_if[i]
        const url = row?.url || (row?.doi ? `https://doi.org/${row.doi}` : null)
        return url ? htl.html`<a href="${url}" target="_blank" rel="noopener">${t}</a>` : t
      },
      authors: a => a && a.length > 80 ? a.slice(0, 80) + "…" : a,
      repo: r => {
        if (!r) return ""
        // The repository column holds either a single URL or two whitespace-separated URLs
        // (a few RNovA-style entries). Render up to three short link chips.
        const urls = String(r).split(/\s+/).filter(s => /^https?:\/\//.test(s)).slice(0, 3)
        if (!urls.length) return ""
        return htl.html`${urls.map(u => htl.html`<a href="${u}" target="_blank" rel="noopener" title="${u}" style="margin-right:4px">↗</a>`)}`
      }
    },
    sort: "year",
    reverse: true,
    rows: 25,
    width: { year: 60, type: 100, models: 130, version: 60, kind: 130, is_dl: 70, acquisition: 70, citedness: 70, repo: 60 }
  })
  // Reflect the inner Inputs.table's value/input on the fullscreen wrapper so
  // `viewof selected_pubs` exposes the rows the user has ticked, used by the
  // BibTeX download button below.
  const wrapper = html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${table}</div>
  </div>`
  Object.defineProperty(wrapper, "value", { get: () => table.value })
  table.addEventListener("input", e => wrapper.dispatchEvent(new CustomEvent("input", { bubbles: false })))
  return wrapper
}

{
  // Format one publication as a BibTeX entry. Entry type picked from
  // publication_type; key is a stable first-author-lastname + year + title-word slug.
  const slug = s => (s ?? "").toString().toLowerCase().replace(/[^a-z0-9]+/g, "")
  const escape = s => (s ?? "").toString().replace(/[{}\\]/g, "\\$&")
  const entry_type_of = t => ({
    "ML conference": "inproceedings",
    "thesis":        "phdthesis",
    "preprint":      "misc",
    "commentary":    "misc"
  })[t] ?? "article"
  const to_bibtex = p => {
    const author_list = (p.authors ?? "").split(",").map(s => s.trim()).filter(Boolean)
    const first_last  = author_list[0]?.split(/\s+/).pop() ?? "anon"
    const title_word  = (p.title ?? "ref").split(/\s+/).find(w => w.length > 3) ?? "ref"
    const key = `${slug(first_last)}${p.year ?? ""}${slug(title_word)}`
    const fields = []
    if (p.title)   fields.push(`  title   = {${escape(p.title)}}`)
    if (author_list.length) fields.push(`  author  = {${author_list.map(escape).join(" and ")}}`)
    if (p.year)    fields.push(`  year    = {${p.year}}`)
    if (p.journal) fields.push(`  journal = {${escape(p.journal)}}`)
    if (p.doi)     fields.push(`  doi     = {${escape(p.doi)}}`)
    if (p.url)     fields.push(`  url     = {${escape(p.url)}}`)
    if (p.type === "preprint") fields.push(`  note    = {preprint}`)
    return `@${entry_type_of(p.type)}{${key},\n${fields.join(",\n")}\n}`
  }
  const handle_download = () => {
    if (!selected_pubs.length) return
    const text = selected_pubs.map(to_bibtex).join("\n\n") + "\n"
    const blob = new Blob([text], { type: "application/x-bibtex;charset=utf-8" })
    const a = document.createElement("a")
    a.href = URL.createObjectURL(blob)
    a.download = `de-novo-papers-${new Date().toISOString().slice(0,10)}.bib`
    document.body.appendChild(a); a.click(); a.remove()
    URL.revokeObjectURL(a.href)
  }
  const disabled = selected_pubs.length === 0
  const btn = html`<button class="download-btn" ?disabled=${disabled}>
    ⬇ Download ${selected_pubs.length || "0"} selected as BibTeX
  </button>`
  btn.disabled = disabled
  btn.onclick = handle_download
  return html`<div style="margin: 0.5rem 0 1rem; display: flex; gap: 0.75rem; align-items: center;">
    ${btn}
    <span style="color:#57606a; font-size: 0.9em;">Tick rows in the table above to enable.</span>
  </div>`
}

Browse all authors

Aggregated from the currently-filtered set of papers. Searching is case-insensitive across every column (name, affiliation, country, methods).

authors_table_rows = {
  // Roll up authors across the filtered pubs, tagging each with the
  // model names and kinds they touched in that subset.
  const detail = new Map(author_details_t.map(d => [d.name, d]))
  const counts = new Map()   // name → { papers, models:Set, kinds:Set }
  for (const p of pubs_filtered) {
    const ms = (p.models ?? "").split(", ").filter(Boolean)
    for (const a of (p.authors ?? "").split(", ").filter(Boolean)) {
      const slot = counts.get(a) ?? { papers: 0, models: new Set(), kinds: new Set() }
      slot.papers += 1
      for (const m of ms) slot.models.add(m)
      if (p.kind) slot.kinds.add(p.kind)
      counts.set(a, slot)
    }
  }
  return Array.from(counts, ([name, v]) => ({
    name,
    papers:       v.papers,
    methods:      Array.from(v.models).sort().join(", "),
    kinds:        Array.from(v.kinds).sort().join(", "),
    affiliations: detail.get(name)?.affiliations ?? "",
    countries:    detail.get(name)?.countries ?? ""
  })).sort((a, b) => b.papers - a.papers || a.name.localeCompare(b.name))
}

viewof author_search = Inputs.search(authors_table_rows, {
  placeholder: `Search ${authors_table_rows.length} authors (filtered)…`
})

{
  const table = Inputs.table(author_search, {
    columns: ["name", "papers", "methods", "kinds", "affiliations", "countries"],
    header: {
      name: "Author", papers: "Papers", methods: "Method(s)",
      kinds: "Kind(s)", affiliations: "Affiliation(s)", countries: "Country / countries"
    },
    format: {
      papers:       n => n == null ? "" : String(n),
      affiliations: s => !s ? "" : (s.length > 80 ? s.slice(0, 80) + "…" : s),
      methods:      s => !s ? "" : (s.length > 60 ? s.slice(0, 60) + "…" : s)
    },
    sort: "papers",
    reverse: true,
    rows: 25,
    width: { name: 180, papers: 70, methods: 220, kinds: 130, affiliations: 320, countries: 120 }
  })
  return html`<div class="chart-wrap">
    <button class="fs-btn" onclick="
      const el = this.parentElement;
      if (document.fullscreenElement) document.exitFullscreen();
      else el.requestFullscreen();
    ">⛶ Fullscreen</button>
    <div class="chart-scroll">${table}</div>
  </div>`
}

Contributing

Easiest path: open a GitHub issue with a link to the paper (DOI / arXiv / bioRxiv / OpenReview / …) and I’ll wire it into the database. Corrections are equally welcome: wrong author lists, missing affiliations, mis-classified kind / DL / acquisition, broken hyperlinks, anything that looks off.

Advanced: edit the database directly

The site is generated from denovo.db (SQLite, the source of truth). If you’re comfortable with SQL:

Edit denovo.db with any SQLite tool (sqlite3 CLI, DB Browser for SQLite, DataGrip, …). A new paper typically needs rows in publication, publication_author, and publication_algorithm; a new model also needs a row in algorithm (set kind, is_deep_learning, acquisition_mode). Affiliations cascade through country → city → affiliation and link to authors via author_affiliation.
Regenerate the human-readable SQL dump so the diff is reviewable:
```
sqlite3 denovo.db .dump > denovo.sql
```
Open a PR with both denovo.db and denovo.sql. The GitHub Action rebuilds the site and publishes to gh-pages on merge; typically live within ~3 minutes.

Cite this catalog

If you use this catalog, please cite it as:

Van Goey, J. Awesome De Novo Peptide Sequencing. Zenodo. https://doi.org/10.5281/zenodo.20825737

Machine-readable metadata is in CITATION.cff; GitHub’s “Cite this repository” button exports BibTeX / APA.

This page is a comprehensive map of de novo peptide sequencing covering algorithms, post-processors, downstream applications and adjacent tools, deep-learning and classical alike. Source data and code: GitHub, rebuilt automatically on every push to main.

md`<div class="last-updated">Last updated on ${build_date}.</div>`