Learn 🧠 All Concepts (20) 🤖 What is an LLM? 📚 RAG Explained ⚡ AI Agents 💻 Run AI Locally 🇮🇳 AI in India 📖 Learn Tracks 🔧 DevOps Track ⚙️ AI Ops Track 🗺️ AI Engineer Roadmap
Tools 🔧 AI Tools Directory 🔓 Open Source AI ⭐ Top GitHub Repos ✦ Claude Skill Repos 🚀 Ready-to-Deploy Projects
Build 🏗️ Build Hub 🎯 Master Prompts 🧩 RAG Agents 🚀 App Megaprompts
Workflows ⚡ All Workflows (22) 🎥 Text to Video 🎞️ Image to Video 🔊 Text to Speech ♻️ Automation
Resources 🧪 Colab Notebooks ⚙️ n8n Workflows 📈 Algo Trading 💰 Passive Income
🗂️ Browse All Topics About AItheGuru
← RAG agents
⚖️ RAG Agent · Legal

Legal Document Analysis Agent

Upload contracts and legal documents. Ask "What are the termination clauses?" or "Compare these two agreements on IP ownership." Saves hours of manual review.

Legal Intermediate LocalStreamlit Cloud

Quick info

CategoryLegal
DifficultyIntermediate
Deploy onLocal

Get the code

Includes install commands in comments

What it does

Multi-document comparison
Clause extraction
Risk flagging
Plain-English summaries

Stack

Claude APILlamaIndexChromaDBStreamlit

Deploy on

✓ Local✓ Streamlit Cloud✓ Hugging Face Spaces

Full source code

Install commands are in the top comments. Copy and run.

# Legal Document RAG Agent # Uses Claude for its 200k context window — ideal for long legal documents # Stack: Anthropic Claude + LlamaIndex + ChromaDB + Streamlit # pip install anthropic llama-index llama-index-llms-anthropic # llama-index-embeddings-huggingface chromadb streamlit pypdf import streamlit as st import os, tempfile from pathlib import Path from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings from llama_index.core.storage.storage_context import StorageContext from llama_index.llms.anthropic import Anthropic from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.vector_stores.chroma import ChromaVectorStore import chromadb # ── SETUP ───────────────────────────────────────────────────────── ANTHROPIC_KEY = os.environ.get("ANTHROPIC_API_KEY") # Use local HuggingFace embeddings (free) + Claude for LLM @st.cache_resource def init_components(): Settings.llm = Anthropic( model="claude-sonnet-4-5", # Best for long docs api_key=ANTHROPIC_KEY, max_tokens=4096 ) # Free local embeddings - no API cost Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") chroma_client = chromadb.PersistentClient(path="./legal_chroma") collection = chroma_client.get_or_create_collection("legal_docs") vector_store = ChromaVectorStore(chroma_collection=collection) storage_context = StorageContext.from_defaults(vector_store=vector_store) return vector_store, storage_context, collection vector_store, storage_context, collection = init_components() # ── LEGAL ANALYSIS PROMPTS ───────────────────────────────────────── LEGAL_SYSTEM_PROMPT = """You are an expert legal analyst. When analysing documents: 1. Quote exact clauses when answering — use quotation marks 2. Flag potential risks or unusual terms with ⚠️ 3. Clearly state if something is NOT found in the documents 4. Compare documents objectively when asked 5. Use plain English — explain legal jargon when you use it 6. Always cite which document and which section your answer is from IMPORTANT: This is for informational purposes only. Always recommend consulting a qualified lawyer for legal decisions.""" ANALYSIS_TEMPLATES = { "Summary": "Provide a comprehensive summary of this document including: parties involved, key obligations, important dates, payment terms, and any unusual clauses.", "Risk Analysis": "Identify all potential risks and unfavourable terms in this contract. Flag anything that could be problematic. Rate each risk as High/Medium/Low.", "Termination Clauses": "Extract and explain all termination clauses, including notice periods, grounds for termination, and consequences.", "IP & Confidentiality": "Extract all clauses related to intellectual property, ownership, and confidentiality obligations.", "Payment Terms": "Extract all payment-related clauses including amounts, schedules, penalties, and dispute processes.", } # ── INDEX DOCUMENTS ──────────────────────────────────────────────── def index_documents(uploaded_files): with tempfile.TemporaryDirectory() as tmp_dir: for f in uploaded_files: dest = Path(tmp_dir) / f.name dest.write_bytes(f.read()) documents = SimpleDirectoryReader(tmp_dir, required_exts=[".pdf", ".txt"]).load_data() for doc in documents: doc.metadata["uploaded_at"] = str(st.session_state.get("session_id", "")) index = VectorStoreIndex.from_documents( documents, storage_context=storage_context, show_progress=False ) return index, len(documents) # ── QUERY ───────────────────────────────────────────────────────── def query_legal(question: str, index): query_engine = index.as_query_engine( similarity_top_k=8, system_prompt=LEGAL_SYSTEM_PROMPT ) response = query_engine.query(question) sources = [] for node in response.source_nodes: sources.append({ "file": node.metadata.get("file_name", "Document"), "page": node.metadata.get("page_label", "?"), "score": round(node.score or 0, 3) }) return str(response), sources # ── STREAMLIT UI ─────────────────────────────────────────────────── st.set_page_config(page_title="Legal Document Agent", layout="wide", page_icon="⚖️") st.title("⚖️ Legal Document Analysis Agent") st.caption("Upload contracts and legal documents — ask questions in plain English") if "messages" not in st.session_state: st.session_state.messages = [] if "index" not in st.session_state: st.session_state.index = None # Sidebar with st.sidebar: st.subheader("📄 Upload Documents") uploads = st.file_uploader("Upload PDF contracts or text files", type=["pdf", "txt"], accept_multiple_files=True) if uploads and st.button("Index Documents", type="primary"): with st.spinner("Analysing documents..."): idx, count = index_documents(uploads) st.session_state.index = idx st.success(f"✅ Indexed {len(uploads)} documents ({count} sections)") st.divider() st.subheader("⚡ Quick Analysis") for template_name, template_prompt in ANALYSIS_TEMPLATES.items(): if st.button(f"📋 {template_name}"): st.session_state.pending_query = template_prompt # Main chat for msg in st.session_state.messages: with st.chat_message(msg["role"]): st.markdown(msg["content"]) if msg.get("sources"): with st.expander("📚 Sources"): for s in msg["sources"]: st.caption(f"📄 {s['file']} (p.{s['page']}) — score: {s['score']}") pending = st.session_state.pop("pending_query", None) prompt = st.chat_input("Ask about your legal documents...") or pending if prompt: if not st.session_state.index: st.warning("⚠️ Please upload and index documents first using the sidebar.") else: st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): with st.spinner("Analysing..."): answer, sources = query_legal(prompt, st.session_state.index) st.markdown(answer) if sources: with st.expander(f"📚 {len(sources)} source sections"): for s in sources[:4]: st.caption(f"📄 {s['file']} — p.{s['page']}") st.session_state.messages.append({ "role": "assistant", "content": answer, "sources": sources })