Install Ollama
ollama run phi3
ollama pull nomic-embed-text
pip install langchain_experimental
indexer.py
from langchain_experimental.text_splitter import SemanticChunker
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
# Load documents from a directory
loader = DirectoryLoader("./places_transcripts", glob="**/*.txt")
print("dir loaded loader")
documents = loader.load()
print(len(documents))
# # Create embeddingsclear
embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)
# # Create Semantic Text Splitter
# text_splitter = SemanticChunker(embeddings, breakpoint_threshold_type="interquartile")
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1500,
chunk_overlap=300,
add_start_index=True,
)
# # Split documents into chunks
texts = text_splitter.split_documents(documents)
# # Create vector store
vectorstore = Chroma.from_documents(
documents=texts,
embedding= embeddings,
persist_directory="./db-place")
print("vectorstore created")
ollama_phi3_rag.py
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.chat_models import ChatOllama
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
# # Create embeddingsclear
embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=False)
db = Chroma(persist_directory="./db-place",
embedding_function=embeddings)
# # Create retriever
retriever = db.as_retriever(
search_type="similarity",
search_kwargs= {"k": 5}
)
# # Create Ollama language model - Gemma 2
local_llm = 'phi3'
llm = ChatOllama(model=local_llm,
keep_alive="3h",
max_tokens=512,
temperature=0)
# Create prompt template
template = """<bos><start_of_turn>user\nAnswer the question based only on the following context and extract out a meaningful answer. \
Please write in full sentences with correct spelling and punctuation. if it makes sense use lists. \
If the context doen't contain the answer, just respond that you are unable to find an answer. \
CONTEXT: {context}
QUESTION: {question}
<end_of_turn>
<start_of_turn>model\n
ANSWER:"""
prompt = ChatPromptTemplate.from_template(template)
# Create the RAG chain using LCEL with prompt printing and streaming output
rag_chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| llm
)
# Function to ask questions
def ask_question(question):
print("Answer:\n\n", end=" ", flush=True)
for chunk in rag_chain.stream(question):
print(chunk.content, end="", flush=True)
print("\n")
# Example usage
if __name__ == "__main__":
while True:
user_question = input("Ask a question (or type 'quit' to exit): ")
if user_question.lower() == 'quit':
break
answer = ask_question(user_question)
# print("\nFull answer received.\n")