What it does
Aggregates 20+ RSS feeds
Hourly background updates
Semantic search with citations
Source URL in every answer
FAISS vector index
Stack
PythonfeedparserOpenAIFAISS
Deploy on
✓ Google Colab✓ Render✓ Cron job
Full source code
Install commands are in the top comments. Copy and run.
import feedparser, hashlib
from openai import OpenAI
import numpy as np, faiss
client = OpenAI()
FEEDS = [
'https://economictimes.indiatimes.com/markets/rssfeeds/1977021501.cms',
'https://www.moneycontrol.com/rss/business.xml',
'https://venturebeat.com/category/ai/feed/',
]
store, ids, findex = {}, [], None
def fetch():
global findex
new_embs, new_ids = [], []
for url in FEEDS:
for e in feedparser.parse(url).entries[:15]:
uid = hashlib.md5(e.link.encode()).hexdigest()
if uid in store: continue
text = f"{e.title}. {e.get('summary','')[:400]}"
emb = np.array(client.embeddings.create(input=text,model='text-embedding-3-small').data[0].embedding,dtype='float32')
store[uid] = {'title':e.title,'url':e.link,'text':text}
new_ids.append(uid); new_embs.append(emb)
if new_embs:
m = np.stack(new_embs); faiss.normalize_L2(m)
if findex is None: findex = faiss.IndexFlatIP(1536)
findex.add(m); ids.extend(new_ids)
def query(question, k=6):
q = np.array(client.embeddings.create(input=question,model='text-embedding-3-small').data[0].embedding,dtype='float32').reshape(1,-1)
faiss.normalize_L2(q)
_, found = findex.search(q, k)
arts = [store[ids[i]] for i in found[0] if i < len(ids)]
ctx = '\n\n'.join(f"[{a['title']}]: {a['text']}" for a in arts)
ans = client.chat.completions.create(model='gpt-4o-mini',messages=[
{'role':'system','content':'Answer from news. Cite sources.'},
{'role':'user','content':f'Articles:\n{ctx}\n\nQ: {question}'}])
return {'answer':ans.choices[0].message.content,'sources':[a['url'] for a in arts]}
fetch()