Step-by-step integration patterns for the most popular Python agent frameworks

Installation

pip install zep-cloud
# Or for self-hosted:
pip install zep-python
 
export ZEP_API_KEY=your_api_key_here
 

LangChain: ZepCloudChatMessageHistory

The ZepCloudChatMessageHistory class is a drop-in replacement for any LangChain memory component. It stores messages in Zep and automatically extracts facts.

from langchain_community.memory.zep_cloud_memory import ZepCloudChatMessageHistory
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationChain
 
SESSION_ID = 'session-abc'
USER_ID = 'user-123'
 
history = ZepCloudChatMessageHistory(
    session_id=SESSION_ID,
    api_key='YOUR_ZEP_API_KEY',
)
 
memory = ConversationBufferMemory(
    chat_memory=history,
    return_messages=True,
)
 
llm = ChatOpenAI(model='gpt-4o-mini')
chain = ConversationChain(llm=llm, memory=memory)
 
# Messages are automatically stored in Zep
response = chain.predict(input='My name is Alice and I prefer brief answers.')
print(response)
 

LangChain: injecting Zep memory into the system prompt

For more control, fetch Zep memory explicitly and inject it into your prompt template.

import asyncio
from zep_cloud.client import AsyncZep
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
 
zep = AsyncZep(api_key='YOUR_ZEP_API_KEY')
llm = ChatOpenAI(model='gpt-4o')
 
async def chat_with_memory(user_id: str, session_id: str, message: str) -> str:
    # 1. Get Zep memory context
    memory = await zep.memory.get(session_id=session_id)
    memory_context = memory.context or 'No previous context.'
 
    # 2. Build prompt with memory injected
    prompt = ChatPromptTemplate.from_messages([
        ('system', 'You are a helpful assistant.\n\nWhat you know about this user:\n{memory}'),
        ('human', '{message}'),
    ])
    chain = prompt | llm
    response = await chain.ainvoke({'memory': memory_context, 'message': message})
 
    # 3. Save this exchange to Zep
    from zep_cloud.types import Message
    await zep.memory.add(
        session_id=session_id,
        messages=[
            Message(role='user', role_type='user', content=message),
            Message(role='assistant', role_type='assistant', content=response.content),
        ]
    )
 
    return response.content
 

LangGraph: Zep as a memory node

Add a dedicated load_memory node to your LangGraph graph that fetches Zep context before the main LLM call.

from typing import TypedDict, Annotated
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain_core.messages import BaseMessage, SystemMessage
from langchain_openai import ChatOpenAI
from zep_cloud.client import AsyncZep
 
zep = AsyncZep(api_key='YOUR_ZEP_API_KEY')
llm = ChatOpenAI(model='gpt-4o')
 
class State(TypedDict):
    messages: Annotated[list[BaseMessage], add_messages]
    user_id: str
    session_id: str
    zep_context: str
 
async def load_memory(state: State) -> dict:
    """Fetch Zep memory and store in state."""
    memory = await zep.memory.get(session_id=state['session_id'])
    return {'zep_context': memory.context or ''}
 
async def llm_node(state: State) -> dict:
    """Call LLM with Zep context injected."""
    system = SystemMessage(
        content=f'You are helpful.\n\nUser context:\n{state["zep_context"]}'
    )
    msgs = [system] + state['messages']
    response = await llm.ainvoke(msgs)
    return {'messages': [response]}
 
async def save_memory(state: State) -> dict:
    """Save the latest exchange to Zep."""
    from zep_cloud.types import Message
    last_human = next(m for m in reversed(state['messages']) if m.type == 'human')
    last_ai = next(m for m in reversed(state['messages']) if m.type == 'ai')
    await zep.memory.add(
        session_id=state['session_id'],
        messages=[
            Message(role='user', role_type='user', content=last_human.content),
            Message(role='assistant', role_type='assistant', content=last_ai.content),
        ]
    )
    return {}
 
builder = StateGraph(State)
builder.add_node('load_memory', load_memory)
builder.add_node('llm', llm_node)
builder.add_node('save_memory', save_memory)
builder.add_edge(START, 'load_memory')
builder.add_edge('load_memory', 'llm')
builder.add_edge('llm', 'save_memory')
builder.add_edge('save_memory', END)
graph = builder.compile()
 

PydanticAI: Zep via dependency injection

from dataclasses import dataclass
from pydantic_ai import Agent, RunContext
from zep_cloud.client import AsyncZep
 
@dataclass
class Deps:
    zep: AsyncZep
    user_id: str
    session_id: str
 
agent = Agent('openai:gpt-4o', deps_type=Deps)
 
@agent.system_prompt
async def system_prompt_with_memory(ctx: RunContext[Deps]) -> str:
    memory = await ctx.deps.zep.memory.get(session_id=ctx.deps.session_id)
    context = memory.context or 'No previous context.'
    return f'You are a helpful assistant.\n\nUser context:\n{context}'
 
@agent.tool
async def remember_fact(ctx: RunContext[Deps], fact: str) -> str:
    """Store an important fact about the user for future reference."""
    from zep_cloud.types import Message
    await ctx.deps.zep.memory.add(
        session_id=ctx.deps.session_id,
        messages=[Message(role='system', role_type='system', content=f'Important: {fact}')]
    )
    return f'Remembered: {fact}'
 
async def main():
    zep = AsyncZep(api_key='YOUR_ZEP_API_KEY')
    deps = Deps(zep=zep, user_id='user-123', session_id='session-abc')
 
    result = await agent.run('I always prefer morning meetings.', deps=deps)
    print(result.data)
 

Debugging memory retrieval

When Zep memory does not seem to be working, check these things in order:

  1. Confirm the session_id matches across your add() and get() calls
  2. Check that you have added at least 2-3 message exchanges — Zep needs a minimum of context to extract facts
  3. Use client.memory.get_session(session_id=...) to verify the session exists
  4. In Zep Cloud, open the dashboard and inspect the session's extracted facts
  5. For self-hosted, check the NLP extractor service logs — fact extraction runs asynchronously
# Debug: check what Zep has extracted
async def debug_zep_memory(session_id: str):
    memory = await zep.memory.get(session_id=session_id)
    print('--- Zep Memory Debug ---')
    print(f'Facts ({len(memory.facts)})')
    for f in memory.facts:
        print(f'  [{f.rating:.2f}] {f.fact}')
    print(f'Summary: {memory.summary.content if memory.summary else "none"}')
    print(f'Context length: {len(memory.context or "")} chars')