annas4421 commited on
Commit
346b8db
β€’
1 Parent(s): 6d31911

Upload 3 files

Browse files
Files changed (3) hide show
  1. Ingest.py +18 -0
  2. app.py +128 -0
  3. config.py +1 -0
Ingest.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
2
+ from langchain.embeddings import HuggingFaceEmbeddings
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import FAISS
5
+
6
+ loader = DirectoryLoader('/content/data', glob="./*.pdf", loader_cls=PyPDFLoader)
7
+ documents = loader.load()
8
+
9
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200)
10
+ texts = text_splitter.split_documents(documents)
11
+
12
+ embedings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
13
+
14
+ # Creates vector embeddings and saves it in the FAISS DB
15
+ faiss_db = FAISS.from_documents(texts, embedings)
16
+ #vectordb=Chroma.from_documents(document_chunks,embedding=embedings)
17
+ # Saves and export the vector embeddings databse
18
+ faiss_db.save_local("/content/ipc_vector_db")
app.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
2
+ from langchain.embeddings import HuggingFaceEmbeddings
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import FAISS
5
+ import os
6
+ from langchain.vectorstores import Chroma
7
+ loader = DirectoryLoader('/content/data', glob="./*.pdf", loader_cls=PyPDFLoader)
8
+ documents = loader.load()
9
+
10
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200)
11
+ texts = text_splitter.split_documents(documents)
12
+
13
+ embeddings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
14
+ vectordb = Chroma.from_documents(texts, embedding=embeddings, persist_directory="./data")
15
+
16
+ from langchain.llms import HuggingFaceHub
17
+ from langchain.prompts import PromptTemplate
18
+
19
+ from langchain_community.vectorstores import FAISS
20
+ from langchain_community.embeddings import HuggingFaceEmbeddings
21
+ from langchain.prompts import PromptTemplate
22
+ from langchain_together import Together
23
+ import os
24
+ from langchain.memory import ConversationBufferWindowMemory
25
+ from langchain.chains import ConversationalRetrievalChain
26
+ import streamlit as st
27
+ import time
28
+ st.set_page_config(page_title="LawGPT")
29
+ col1, col2, col3 = st.columns([1,4,1])
30
+ with col2:
31
+ st.image("https://s3.ap-south-1.amazonaws.com/makerobosfastcdn/cms-assets/Legal_AI_Chatbot.png")
32
+
33
+ st.markdown(
34
+ """
35
+ <style>
36
+ div.stButton > button:first-child {
37
+ background-color: #ffd0d0;
38
+ }
39
+ div.stButton > button:active {
40
+ background-color: #ff6262;
41
+ }
42
+ div[data-testid="stStatusWidget"] div button {
43
+ display: none;
44
+ }
45
+
46
+ .reportview-container {
47
+ margin-top: -2em;
48
+ }
49
+ #MainMenu {visibility: hidden;}
50
+ .stDeployButton {display:none;}
51
+ footer {visibility: hidden;}
52
+ #stDecoration {display:none;}
53
+ button[title="View fullscreen"]{
54
+ visibility: hidden;}
55
+ </style>
56
+ """,
57
+ unsafe_allow_html=True,
58
+ )
59
+
60
+ def reset_conversation():
61
+ st.session_state.messages = []
62
+ st.session_state.memory.clear()
63
+
64
+ if "messages" not in st.session_state:
65
+ st.session_state.messages = []
66
+
67
+ if "memory" not in st.session_state:
68
+ st.session_state.memory = ConversationBufferWindowMemory(k=2, memory_key="chat_history",return_messages=True)
69
+
70
+ embeddings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
71
+ #db=FAISS.load_local("/content/ipc_vector_db", embeddings, allow_dangerous_deserialization=True)
72
+ db_retriever =vectordb.as_retriever(search_type="similarity",search_kwargs={'k':4})
73
+
74
+ prompt_template = """<s>[INST]This is a chat template and As a legal chat bot specializing in pakistan Penal Code queries and , your primary objective is to provide accurate and concise information based on the user's questions. Do not generate your own questions and answers. You will adhere strictly to the instructions provided, offering relevant context from the knowledge base while avoiding unnecessary details. Your responses will be brief, to the point, and in compliance with the established format. If a question falls outside the given context, you will refrain from utilizing the chat history and instead rely on your own knowledge base to generate an appropriate response. You will prioritize the user's query and refrain from posing additional questions. The aim is to deliver professional, precise, and contextually relevant information pertaining to the Indian Penal Code.
75
+ CONTEXT: {context}
76
+ CHAT HISTORY: {chat_history}
77
+ QUESTION: {question}
78
+ ANSWER:
79
+ </s>[INST]
80
+ """
81
+
82
+ prompt = PromptTemplate(template=prompt_template,
83
+ input_variables=['context', 'question', 'chat_history'])
84
+
85
+ #llm=HuggingFaceHub(repo_id="mistralai/Mistral-7B-Instruct-v0.2", model_kwargs={"temperature":0.5, "max_length":1024})
86
+ # You can also use other LLMs options from https://python.langchain.com/docs/integrations/llms. Here I have used TogetherAI API
87
+
88
+ from config import together_api
89
+ llm = Together(
90
+ model="mistralai/Mistral-7B-Instruct-v0.2",
91
+ temperature=0.5,
92
+ max_tokens=1024,
93
+ together_api_key=together_api
94
+ )
95
+ qa = ConversationalRetrievalChain.from_llm(
96
+ llm=llm,
97
+ memory=st.session_state.memory,
98
+ retriever=db_retriever,
99
+ combine_docs_chain_kwargs={'prompt': prompt}
100
+ )
101
+
102
+ for message in st.session_state.messages:
103
+ with st.chat_message(message.get("role")):
104
+ st.write(message.get("content"))
105
+
106
+ input_prompt = st.chat_input("Say something")
107
+
108
+ if input_prompt:
109
+ with st.chat_message("user"):
110
+ st.write(input_prompt)
111
+
112
+ st.session_state.messages.append({"role":"user","content":input_prompt})
113
+
114
+ with st.chat_message("assistant"):
115
+ with st.status("Thinking πŸ’‘...",expanded=True):
116
+ result = qa.invoke(input=input_prompt)
117
+
118
+ message_placeholder = st.empty()
119
+
120
+ full_response = "⚠️ **_Note: Information provided may be inaccurate._** \n\n\n"
121
+ for chunk in result["answer"]:
122
+ full_response+=chunk
123
+ time.sleep(0.02)
124
+
125
+ message_placeholder.markdown(full_response+" β–Œ")
126
+ st.button('Reset All Chat πŸ—‘οΈ', on_click=reset_conversation)
127
+
128
+ st.session_state.messages.append({"role":"assistant","content":result["answer"]})
config.py ADDED
@@ -0,0 +1 @@
 
 
1
+ together_api= "f211723ae9a92b449fba71eaeec325854444012f347089c3c443c65bb5dccfbc"