Step-by-step integration patterns for the most popular Python agent frameworks
Installation
pip install zep-cloud
# Or for self-hosted:
pip install zep-python
export ZEP_API_KEY=your_api_key_here
LangChain: ZepCloudChatMessageHistory
The ZepCloudChatMessageHistory class is a drop-in replacement for any LangChain memory component. It stores messages in Zep and automatically extracts facts.
from langchain_community.memory.zep_cloud_memory import ZepCloudChatMessageHistory
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationChain
SESSION_ID = 'session-abc'
USER_ID = 'user-123'
history = ZepCloudChatMessageHistory(
session_id=SESSION_ID,
api_key='YOUR_ZEP_API_KEY',
)
memory = ConversationBufferMemory(
chat_memory=history,
return_messages=True,
)
llm = ChatOpenAI(model='gpt-4o-mini')
chain = ConversationChain(llm=llm, memory=memory)
# Messages are automatically stored in Zep
response = chain.predict(input='My name is Alice and I prefer brief answers.')
print(response)
LangChain: injecting Zep memory into the system prompt
For more control, fetch Zep memory explicitly and inject it into your prompt template.
import asyncio
from zep_cloud.client import AsyncZep
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
zep = AsyncZep(api_key='YOUR_ZEP_API_KEY')
llm = ChatOpenAI(model='gpt-4o')
async def chat_with_memory(user_id: str, session_id: str, message: str) -> str:
# 1. Get Zep memory context
memory = await zep.memory.get(session_id=session_id)
memory_context = memory.context or 'No previous context.'
# 2. Build prompt with memory injected
prompt = ChatPromptTemplate.from_messages([
('system', 'You are a helpful assistant.\n\nWhat you know about this user:\n{memory}'),
('human', '{message}'),
])
chain = prompt | llm
response = await chain.ainvoke({'memory': memory_context, 'message': message})
# 3. Save this exchange to Zep
from zep_cloud.types import Message
await zep.memory.add(
session_id=session_id,
messages=[
Message(role='user', role_type='user', content=message),
Message(role='assistant', role_type='assistant', content=response.content),
]
)
return response.content
LangGraph: Zep as a memory node
Add a dedicated load_memory node to your LangGraph graph that fetches Zep context before the main LLM call.
from typing import TypedDict, Annotated
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain_core.messages import BaseMessage, SystemMessage
from langchain_openai import ChatOpenAI
from zep_cloud.client import AsyncZep
zep = AsyncZep(api_key='YOUR_ZEP_API_KEY')
llm = ChatOpenAI(model='gpt-4o')
class State(TypedDict):
messages: Annotated[list[BaseMessage], add_messages]
user_id: str
session_id: str
zep_context: str
async def load_memory(state: State) -> dict:
"""Fetch Zep memory and store in state."""
memory = await zep.memory.get(session_id=state['session_id'])
return {'zep_context': memory.context or ''}
async def llm_node(state: State) -> dict:
"""Call LLM with Zep context injected."""
system = SystemMessage(
content=f'You are helpful.\n\nUser context:\n{state["zep_context"]}'
)
msgs = [system] + state['messages']
response = await llm.ainvoke(msgs)
return {'messages': [response]}
async def save_memory(state: State) -> dict:
"""Save the latest exchange to Zep."""
from zep_cloud.types import Message
last_human = next(m for m in reversed(state['messages']) if m.type == 'human')
last_ai = next(m for m in reversed(state['messages']) if m.type == 'ai')
await zep.memory.add(
session_id=state['session_id'],
messages=[
Message(role='user', role_type='user', content=last_human.content),
Message(role='assistant', role_type='assistant', content=last_ai.content),
]
)
return {}
builder = StateGraph(State)
builder.add_node('load_memory', load_memory)
builder.add_node('llm', llm_node)
builder.add_node('save_memory', save_memory)
builder.add_edge(START, 'load_memory')
builder.add_edge('load_memory', 'llm')
builder.add_edge('llm', 'save_memory')
builder.add_edge('save_memory', END)
graph = builder.compile()
PydanticAI: Zep via dependency injection
from dataclasses import dataclass
from pydantic_ai import Agent, RunContext
from zep_cloud.client import AsyncZep
@dataclass
class Deps:
zep: AsyncZep
user_id: str
session_id: str
agent = Agent('openai:gpt-4o', deps_type=Deps)
@agent.system_prompt
async def system_prompt_with_memory(ctx: RunContext[Deps]) -> str:
memory = await ctx.deps.zep.memory.get(session_id=ctx.deps.session_id)
context = memory.context or 'No previous context.'
return f'You are a helpful assistant.\n\nUser context:\n{context}'
@agent.tool
async def remember_fact(ctx: RunContext[Deps], fact: str) -> str:
"""Store an important fact about the user for future reference."""
from zep_cloud.types import Message
await ctx.deps.zep.memory.add(
session_id=ctx.deps.session_id,
messages=[Message(role='system', role_type='system', content=f'Important: {fact}')]
)
return f'Remembered: {fact}'
async def main():
zep = AsyncZep(api_key='YOUR_ZEP_API_KEY')
deps = Deps(zep=zep, user_id='user-123', session_id='session-abc')
result = await agent.run('I always prefer morning meetings.', deps=deps)
print(result.data)
Debugging memory retrieval
When Zep memory does not seem to be working, check these things in order:
- Confirm the session_id matches across your add() and get() calls
- Check that you have added at least 2-3 message exchanges — Zep needs a minimum of context to extract facts
- Use client.memory.get_session(session_id=...) to verify the session exists
- In Zep Cloud, open the dashboard and inspect the session's extracted facts
- For self-hosted, check the NLP extractor service logs — fact extraction runs asynchronously
# Debug: check what Zep has extracted
async def debug_zep_memory(session_id: str):
memory = await zep.memory.get(session_id=session_id)
print('--- Zep Memory Debug ---')
print(f'Facts ({len(memory.facts)})')
for f in memory.facts:
print(f' [{f.rating:.2f}] {f.fact}')
print(f'Summary: {memory.summary.content if memory.summary else "none"}')
print(f'Context length: {len(memory.context or "")} chars')