Category: langchain

  • phi3_local_rag

    Install Ollama

    ollama run phi3

    ollama pull nomic-embed-text

    pip install langchain_experimental

    indexer.py
    
    from langchain_experimental.text_splitter import SemanticChunker
    from langchain_text_splitters import RecursiveCharacterTextSplitter
    
    
    from langchain_community.document_loaders import DirectoryLoader
    from langchain_community.embeddings import OllamaEmbeddings
    from langchain_community.vectorstores import Chroma
    
    # Load documents from a directory
    loader = DirectoryLoader("./places_transcripts", glob="**/*.txt")
    
    print("dir loaded loader")
    
    documents = loader.load()
    
    print(len(documents))
    
    # # Create embeddingsclear
    embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)
    
    # # Create Semantic Text Splitter
    # text_splitter = SemanticChunker(embeddings, breakpoint_threshold_type="interquartile")
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1500,
        chunk_overlap=300,
        add_start_index=True,
    )
    
    # # Split documents into chunks
    texts = text_splitter.split_documents(documents)
    
    # # Create vector store
    vectorstore = Chroma.from_documents(
        documents=texts, 
        embedding= embeddings,
        persist_directory="./db-place")
    
    print("vectorstore created")
    ollama_phi3_rag.py
    
    
    from langchain_community.embeddings import OllamaEmbeddings
    from langchain_community.vectorstores import Chroma
    from langchain_community.chat_models import ChatOllama
    
    from langchain.prompts import ChatPromptTemplate
    from langchain.schema.runnable import RunnablePassthrough
    from langchain.schema.output_parser import StrOutputParser
    
    
    # # Create embeddingsclear
    embeddings = OllamaEmbeddings(model="nomic-embed-text", show_progress=False)
    
    db = Chroma(persist_directory="./db-place",
                embedding_function=embeddings)
    
    # # Create retriever
    retriever = db.as_retriever(
        search_type="similarity",
        search_kwargs= {"k": 5}
    )
    
    # # Create Ollama language model - Gemma 2
    local_llm = 'phi3'
    
    llm = ChatOllama(model=local_llm,
                     keep_alive="3h", 
                     max_tokens=512,  
                     temperature=0)
    
    # Create prompt template
    template = """<bos><start_of_turn>user\nAnswer the question based only on the following context and extract out a meaningful answer. \
    Please write in full sentences with correct spelling and punctuation. if it makes sense use lists. \
    If the context doen't contain the answer, just respond that you are unable to find an answer. \
    
    CONTEXT: {context}
    
    QUESTION: {question}
    
    <end_of_turn>
    <start_of_turn>model\n
    ANSWER:"""
    prompt = ChatPromptTemplate.from_template(template)
    
    # Create the RAG chain using LCEL with prompt printing and streaming output
    rag_chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
    )
    
    # Function to ask questions
    def ask_question(question):
        print("Answer:\n\n", end=" ", flush=True)
        for chunk in rag_chain.stream(question):
            print(chunk.content, end="", flush=True)
        print("\n")
    
    # Example usage
    if __name__ == "__main__":
        while True:
            user_question = input("Ask a question (or type 'quit' to exit): ")
            if user_question.lower() == 'quit':
                break
            answer = ask_question(user_question)
            # print("\nFull answer received.\n")