Back to Theory
Tutorial8 min read · June 16, 2026

Upgrading from LangChain ConversationBufferMemory to Feather DB

ConversationBufferMemory grows unbounded, has no decay, and no graph. Here's a migration guide with a FeatherMemory drop-in class, side-by-side comparison, and performance numbers showing what you gain.

F
Feather DB
Engineering

Why ConversationBufferMemory fails at scale

LangChain's ConversationBufferMemory is fine for demos. In production, it hits three walls:

  • Grows unbounded: Every message is appended to the buffer. After 50 exchanges, you're passing 10,000+ tokens to the LLM on every call. At 100 exchanges, you've likely blown the context window or are spending $0.20 per query on tokens that are mostly irrelevant to the current question.
  • No decay: A factual error from turn 2 gets as much weight as a correction from turn 48. An outdated preference sits alongside a new one with equal authority. The buffer has no way to indicate that some information is more reliable or more recent.
  • No graph: If the user says "remember what I said about the API" — the buffer doesn't know that "the API" refers to the decision made in turn 15. Everything is flat text with no structure.

Feather DB solves all three: vector search retrieves only relevant context (not the full history), adaptive scoring handles decay and recency, and edges link related memories into a traversable graph.

The FeatherMemory drop-in class

We'll implement FeatherMemory as a BaseChatMemory subclass, making it a drop-in for ConversationBufferMemory in any LangChain chain:

from langchain.memory.chat_memory import BaseChatMemory
from langchain.schema import BaseMessage, HumanMessage, AIMessage
from langchain.schema.messages import get_buffer_string
from typing import Any, Dict, List
import feather_db as fdb
from datetime import datetime

class FeatherMemory(BaseChatMemory):
    """
    LangChain BaseChatMemory backed by Feather DB.
    Drop-in replacement for ConversationBufferMemory.
    Retrieves semantically relevant history instead of the full buffer.
    """
    db: Any = None
    embed_fn: Any = None
    namespace: str = "default"
    k: int = 6              # number of memories to retrieve
    half_life: int = 30     # decay in days
    memory_key: str = "chat_history"
    return_messages: bool = True

    class Config:
        arbitrary_types_allowed = True

    def __init__(self, db_path: str, embed_fn, namespace: str = "default",
                  k: int = 6, half_life: int = 30, **kwargs):
        super().__init__(**kwargs)
        self.db = fdb.DB.open(db_path, dim=768)
        self.embed_fn = embed_fn
        self.namespace = namespace
        self.k = k
        self.half_life = half_life

    @property
    def memory_variables(self) -> List[str]:
        return [self.memory_key]

    def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
        """
        Called by LangChain to load context before the LLM call.
        Returns the k most relevant past messages.
        """
        # Extract the current query from inputs
        query = inputs.get("input") or inputs.get("question") or ""
        if not query:
            return {self.memory_key: []}

        vec = self.embed_fn(query)
        results = self.db.context_chain(
            vec,
            k=self.k,
            namespace=self.namespace,
            max_depth=1,
            half_life=self.half_life
        )

        # Reconstruct as LangChain messages
        messages = []
        for mem in results:
            role = mem.meta.get_attribute("role") or "human"
            if role == "human":
                messages.append(HumanMessage(content=mem.text))
            else:
                messages.append(AIMessage(content=mem.text))

        if self.return_messages:
            return {self.memory_key: messages}
        return {self.memory_key: get_buffer_string(messages)}

    def save_context(self, inputs: Dict[str, Any],
                      outputs: Dict[str, str]) -> None:
        """
        Called by LangChain after the LLM call.
        Saves the human input and AI output to Feather DB.
        """
        human_text = inputs.get("input") or inputs.get("question") or ""
        ai_text = outputs.get("output") or outputs.get("answer") or ""

        if human_text:
            vec = self.embed_fn(human_text)
            mem = self.db.add(vec, text=human_text,
                               namespace=self.namespace,
                               entity="conversation")
            mem.meta.set_attribute("role", "human")
            mem.meta.set_attribute("importance", 1.0)
            mem.meta.set_attribute("created_at", datetime.utcnow().isoformat())
            human_id = mem.id

        if ai_text:
            vec = self.embed_fn(ai_text)
            mem = self.db.add(vec, text=ai_text,
                               namespace=self.namespace,
                               entity="conversation")
            mem.meta.set_attribute("role", "ai")
            mem.meta.set_attribute("importance", 1.0)
            mem.meta.set_attribute("created_at", datetime.utcnow().isoformat())

            # Link: human message leads_to AI response
            if human_text:
                self.db.add_edge(human_id, mem.id, edge_type="leads_to")

    def clear(self) -> None:
        """Clear all memories in this namespace."""
        # Note: clears only this namespace's memories
        self.db.delete_namespace(self.namespace)

Side-by-side: ConversationBufferMemory vs FeatherMemory in a chain

from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# BEFORE: ConversationBufferMemory
buffer_chain = ConversationChain(
    llm=llm,
    memory=ConversationBufferMemory(return_messages=True)
)

# AFTER: FeatherMemory — one-line swap
from langchain_openai import OpenAIEmbeddings

oai_embed = OpenAIEmbeddings(model="text-embedding-3-small")

def embed(text: str) -> list:
    return oai_embed.embed_query(text)

feather_chain = ConversationChain(
    llm=llm,
    memory=FeatherMemory(
        db_path="conversation.feather",
        embed_fn=embed,
        namespace="user-session-1",
        k=6,
        half_life=30
    )
)

# Both chains have the same interface
buffer_chain.predict(input="My name is Alex and I work on a fintech startup.")
feather_chain.predict(input="My name is Alex and I work on a fintech startup.")

Performance comparison

MetricConversationBufferMemoryFeatherMemory
Tokens per LLM call (at turn 50)~8,000–15,000~400–800
Tokens per LLM call (at turn 200)Context limit exceeded~400–800 (stable)
Memory retrieval latency0ms (in-memory dict)~1–3ms (ANN search)
Relevance of retrieved contextAll history (mostly irrelevant)Semantically matched
Handles stale informationNoYes (adaptive decay)
Persists across sessionsNo (in-memory)Yes (.feather file)
Cost at 1000 turns (gpt-4o-mini)~$1.20+ (growing)~$0.04 (stable)

Advanced: boosting important memories

# After saving a memory, boost its importance if it's a key fact
def save_important_fact(chain_memory: FeatherMemory, fact: str,
                         importance: float = 2.0):
    """Save a high-importance memory that should always surface."""
    vec = chain_memory.embed_fn(fact)
    mem = chain_memory.db.add(
        vec, text=fact,
        namespace=chain_memory.namespace,
        entity="key-facts"
    )
    mem.meta.set_attribute("role", "system")
    mem.meta.set_attribute("importance", importance)
    mem.meta.set_attribute("created_at", datetime.utcnow().isoformat())
    return mem

# Usage: save the user's name as a permanent high-importance fact
memory = feather_chain.memory
save_important_fact(memory, "User's name is Alex.", importance=2.5)
save_important_fact(memory, "Alex works at a fintech startup on a FastAPI backend.",
                     importance=2.0)

The migration cost is low: implement FeatherMemory once, swap it in wherever you use ConversationBufferMemory, and you get persistent cross-session memory, adaptive decay, graph structure, and a token cost that stays flat regardless of how long the conversation has been running. The 1–3ms retrieval latency is the only trade-off — and it's invisible in any LLM-latency-dominated application.

Install: pip install feather-db langchain langchain-openai · GitHub: github.com/feather-store/feather