Learn 🧠 All Concepts (20) 🤖 What is an LLM? 📚 RAG Explained ⚡ AI Agents 💻 Run AI Locally 🇮🇳 AI in India 📖 Learn Tracks 🔧 DevOps Track ⚙️ AI Ops Track 🗺️ AI Engineer Roadmap
Tools 🔧 AI Tools Directory 🔓 Open Source AI ⭐ Top GitHub Repos ✦ Claude Skill Repos 🚀 Ready-to-Deploy Projects
Build 🏗️ Build Hub 🎯 Master Prompts 🧩 RAG Agents 🚀 App Megaprompts
Workflows ⚡ All Workflows (22) 🎥 Text to Video 🎞️ Image to Video 🔊 Text to Speech ♻️ Automation
Resources 🧪 Colab Notebooks ⚙️ n8n Workflows 📈 Algo Trading 💰 Passive Income
🗂️ Browse All Topics About AItheGuru
← RAG agents
📈 RAG Agent · Finance

NSE Stock Research RAG Agent

Indexes annual reports, earnings transcripts, and news. Ask "What did the Reliance MD say about Jio in the last 3 earnings calls?" with cited answers.

Finance Advanced Streamlit Cloud (free)Hugging Face Spaces

Quick info

CategoryFinance
DifficultyAdvanced
Deploy onStreamlit Cloud (free)

Get the code

Includes install commands in comments

What it does

Indexes PDF annual reports
Live NSE price data
Earnings call transcripts
Competitor comparison

Stack

LangChainOpenAIPineconeyfinanceStreamlit

Deploy on

✓ Streamlit Cloud (free)✓ Hugging Face Spaces✓ Railway

Full source code

Install commands are in the top comments. Copy and run.

# NSE Stock Research RAG Agent # Stack: LangChain + OpenAI + Pinecone + yfinance + Streamlit # pip install langchain langchain-openai langchain-pinecone pinecone-client # yfinance streamlit pypdf requests beautifulsoup4 import streamlit as st import yfinance as yf import os, requests from langchain_openai import ChatOpenAI, OpenAIEmbeddings from langchain_pinecone import PineconeVectorStore from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferWindowMemory from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader from pinecone import Pinecone, ServerlessSpec from datetime import datetime import tempfile # ── INIT ────────────────────────────────────────────────────────── OPENAI_KEY = os.environ.get("OPENAI_API_KEY") PINECONE_KEY = os.environ.get("PINECONE_API_KEY") INDEX_NAME = "stock-research" @st.cache_resource def init_pinecone(): pc = Pinecone(api_key=PINECONE_KEY) if INDEX_NAME not in pc.list_indexes().names(): pc.create_index( name=INDEX_NAME, dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1") ) return pc.Index(INDEX_NAME) @st.cache_resource def init_chain(): embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_KEY) vectorstore = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings) llm = ChatOpenAI( model="gpt-4o", temperature=0, openai_api_key=OPENAI_KEY ) memory = ConversationBufferWindowMemory( memory_key="chat_history", k=5, return_messages=True, output_key="answer" ) chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=vectorstore.as_retriever(search_kwargs={"k": 6}), memory=memory, return_source_documents=True, verbose=False, combine_docs_chain_kwargs={ "prompt": get_stock_prompt() } ) return chain def get_stock_prompt(): from langchain.prompts import PromptTemplate template = """You are a senior equity research analyst specialising in Indian markets (NSE/BSE). Answer using ONLY the provided context from annual reports and filings. If data isn't in the context, say "Not found in indexed documents" — never guess. Always cite: which document, which year, and direct quotes where possible. Context: {context} Question: {question} Analysis:""" return PromptTemplate(template=template, input_variables=["context", "question"]) # ── LIVE STOCK DATA ──────────────────────────────────────────────── def get_stock_summary(symbol: str) -> dict: """Get live NSE stock data""" ticker = yf.Ticker(f"{symbol}.NS") info = ticker.info hist = ticker.history(period="1y") return { "name": info.get("longName", symbol), "price": info.get("currentPrice"), "pe_ratio": info.get("trailingPE"), "pb_ratio": info.get("priceToBook"), "market_cap_cr": round(info.get("marketCap", 0) / 1e7, 0), "52w_high": info.get("fiftyTwoWeekHigh"), "52w_low": info.get("fiftyTwoWeekLow"), "revenue_growth": info.get("revenueGrowth"), "roe": info.get("returnOnEquity"), "analyst_rating": info.get("recommendationKey"), "1y_return": round((hist["Close"].iloc[-1] / hist["Close"].iloc[0] - 1) * 100, 1) if not hist.empty else None } # ── INDEX DOCUMENTS ──────────────────────────────────────────────── def index_pdf(uploaded_file): """Index an uploaded PDF (annual report, transcript)""" embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_KEY) with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: tmp.write(uploaded_file.read()) tmp_path = tmp.name loader = PyPDFLoader(tmp_path) documents = loader.load() splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " "] ) chunks = splitter.split_documents(documents) # Add filename to metadata for chunk in chunks: chunk.metadata["source_file"] = uploaded_file.name chunk.metadata["indexed_at"] = datetime.now().isoformat() PineconeVectorStore.from_documents(chunks, embeddings, index_name=INDEX_NAME) os.unlink(tmp_path) return len(chunks) # ── STREAMLIT UI ─────────────────────────────────────────────────── st.set_page_config(page_title="Stock Research Agent", layout="wide", page_icon="📈") st.title("📈 NSE Stock Research Agent") st.caption("Ask questions across annual reports, earnings transcripts, and filings") # Sidebar: Stock live data + PDF upload with st.sidebar: st.subheader("📊 Live Data") symbol = st.text_input("NSE Symbol", "RELIANCE").upper() if st.button("Fetch Data"): with st.spinner("Fetching..."): data = get_stock_summary(symbol) st.metric("Price", f"₹{data['price']:,}" if data['price'] else "N/A") st.metric("P/E Ratio", data['pe_ratio'] or "N/A") st.metric("Market Cap", f"₹{data['market_cap_cr']:,.0f} Cr" if data['market_cap_cr'] else "N/A") st.metric("1Y Return", f"{data['1y_return']}%" if data['1y_return'] else "N/A") st.metric("ROE", f"{round(data['roe']*100,1)}%" if data['roe'] else "N/A") st.divider() st.subheader("📄 Index Documents") uploaded = st.file_uploader("Upload Annual Report / Transcript PDF", type=["pdf"]) if uploaded and st.button("Index PDF"): with st.spinner("Indexing..."): chunks = index_pdf(uploaded) st.success(f"✅ Indexed {chunks} chunks from {uploaded.name}") # Main chat interface if "messages" not in st.session_state: st.session_state.messages = [] # Display history for msg in st.session_state.messages: with st.chat_message(msg["role"]): st.markdown(msg["content"]) if msg.get("sources"): with st.expander(f"📚 {len(msg['sources'])} sources"): for src in msg["sources"]: st.caption(f"📄 {src.metadata.get('source_file','Unknown')} — p.{src.metadata.get('page','?')}") # Chat input if prompt := st.chat_input("Ask about any indexed stock documents..."): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): with st.spinner("Researching..."): chain = init_chain() result = chain.invoke({"question": prompt}) answer = result["answer"] sources = result.get("source_documents", []) st.markdown(answer) if sources: with st.expander(f"📚 {len(sources)} sources used"): for src in sources[:4]: st.caption(f"📄 {src.metadata.get('source_file','Doc')} — relevance: high") st.session_state.messages.append({ "role": "assistant", "content": answer, "sources": sources })