What it does
Whisper transcription pipeline
Cross-call pattern analysis
Objection extraction
Rep performance analysis
pgvector semantic search
Stack
PythonWhisperLangChainPostgreSQL + pgvector
Deploy on
✓ PostgreSQL on Render✓ Railway
Full source code
Install commands are in the top comments. Copy and run.
import whisper, os
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import PGVector
from langchain.schema import Document
from langchain.chains import RetrievalQA
PG = os.getenv('PG_CONN')
wm = whisper.load_model('base')
def index_call(audio_file, meta):
transcript = wm.transcribe(audio_file)['text']
words = transcript.split()
chunks = [' '.join(words[i:i+300]) for i in range(0,len(words),280)]
docs = [Document(page_content=c, metadata={**meta,'chunk':i}) for i,c in enumerate(chunks)]
PGVector.from_documents(docs, OpenAIEmbeddings(), collection_name='sales_calls', connection_string=PG, pre_delete_collection=False)
def query_calls(question):
db = PGVector(collection_name='sales_calls', connection_string=PG, embedding_function=OpenAIEmbeddings())
result = RetrievalQA.from_chain_type(
llm=ChatOpenAI(model='gpt-4o',temperature=0),
retriever=db.as_retriever(search_kwargs={'k':6}),
return_source_documents=True
).invoke({'query':question})
return {'answer':result['result'],'reps':list(set(d.metadata.get('rep_name') for d in result['source_documents']))}
index_call('call.mp3',{'rep_name':'Rahul','outcome':'closed','deal_size':150000})
print(query_calls('What objections come up about pricing?'))