SourceScore

Integration guide

LlamaIndex + SourceScore VERITAS

Custom retriever for signed claims + a node post-processor that attaches verification badges. Drop-in for any QueryEngine or ChatEngine.

Install

pip install llama-index requests

Custom VERITAS retriever

Subclass BaseRetriever and translate VERITAS search hits into LlamaIndex nodes. Each node carries the claim id, confidence, and a back-link to the canonical page in metadata so downstream prompts can render badges.

import requests
from typing import List
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.schema import NodeWithScore, TextNode

VERITAS = "https://sourcescore.org/api/v1"

class VeritasRetriever(BaseRetriever):
    def __init__(self, top_k: int = 5):
        super().__init__()
        self.top_k = top_k

    def _retrieve(self, query_bundle) -> List[NodeWithScore]:
        r = requests.get(
            f"{VERITAS}/search",
            params={"q": query_bundle.query_str, "limit": self.top_k},
            timeout=8,
        )
        r.raise_for_status()
        out = []
        for c in r.json().get("matches", []):
            node = TextNode(
                text=c["statement"],
                metadata={
                    "claim_id": c["id"],
                    "confidence": c["confidence"],
                    "source_count": c["sourceCount"],
                    "url": f"https://sourcescore.org/claims/{c['id']}/",
                    "vertical": c.get("vertical", "ai-ml"),
                    "tags": c.get("tags", []),
                },
            )
            out.append(NodeWithScore(node=node, score=c.get("matchScore", c["confidence"])))
        return out

Wire into a QueryEngine

from llama_index.core import PromptTemplate
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.response_synthesizers import get_response_synthesizer
from llama_index.llms.openai import OpenAI

qa_template = PromptTemplate("""You are a precise assistant. Answer using ONLY the verified
claims below. Cite every fact with [claim_id]. If the claims do not cover
the question, say so — do not improvise.

Verified claims:
{context_str}

Question: {query_str}
Answer (every fact ends with [claim_id]):""")

retriever = VeritasRetriever(top_k=5)
synthesizer = get_response_synthesizer(
    llm=OpenAI(model="gpt-4o-mini", temperature=0),
    text_qa_template=qa_template,
)
engine = RetrieverQueryEngine(retriever=retriever, response_synthesizer=synthesizer)

resp = engine.query("Who introduced the Transformer architecture?")
print(resp)
for n in resp.source_nodes:
    print(f"  → [{n.metadata['claim_id']}] confidence {n.metadata['confidence']:.2f} {n.metadata['url']}")

Post-process verification (NodePostProcessor)

For chains that already have a different primary retriever, you can layer VERITAS as a post-processor that verifies each retrieved node and drops anything that doesn't match a high-confidence claim.

import requests
from typing import List, Optional
from llama_index.core.postprocessor.types import BaseNodePostprocessor
from llama_index.core.schema import NodeWithScore, QueryBundle

class VeritasVerifyPostprocessor(BaseNodePostprocessor):
    min_confidence: float = 0.85

    def _postprocess_nodes(
        self,
        nodes: List[NodeWithScore],
        query_bundle: Optional[QueryBundle] = None,
    ) -> List[NodeWithScore]:
        out = []
        for n in nodes:
            r = requests.post(
                "https://sourcescore.org/api/v1/verify",
                json={"claim": n.node.text, "minConfidence": self.min_confidence},
                timeout=8,
            ).json()
            best = r.get("bestMatch")
            if best:
                n.node.metadata["veritas_claim_id"] = best["id"]
                n.node.metadata["veritas_confidence"] = best["confidence"]
                out.append(n)
        return out

Next steps