|
from langchain.llms import CTransformers |
|
from langchain.chains import LLMChain |
|
from langchain.prompts import PromptTemplate |
|
import os |
|
import io |
|
import gradio as gr |
|
import time |
|
|
|
custom_prompt_template = """ |
|
You are an AI coding assistant and your task is to solve coding problems and |
|
return code snippets based on the user's query. Below is the user's query. |
|
Query: {query} |
|
You just return the helpful code and related details |
|
Helpful code and related details: |
|
""" |
|
|
|
def set_custom_prompt(): |
|
prompt = PromptTemplate( |
|
template = custom_prompt_template, |
|
input_variables = ['query'] |
|
) |
|
return prompt |
|
|
|
def load_model(): |
|
llm = CTransformers( |
|
model = 'TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf', |
|
model_type = 'llama', |
|
max_new_tokens = 1096, |
|
temperature = 0.2, |
|
repetition_penalty = 1.13, |
|
gpu_layers = 2 |
|
) |
|
return llm |
|
|
|
def chain_pipeline(): |
|
llm = load_model() |
|
qa_prompt = set_custom_prompt() |
|
qa_chain = LLMChain( |
|
prompt = qa_prompt, |
|
llm=llm |
|
) |
|
return qa_chain |
|
|
|
llmcahin = chain_pipeline() |
|
|
|
def bot(query): |
|
llm_response = llmcahin.run({"query":query}) |
|
return llm_response |
|
|
|
with gr.Blocks(title="code llama 7b") as demo: |
|
gr.Markdown("# Code llama") |
|
chatbot = gr.Chatbot([],elem_id="chatbot",height=700) |
|
msg = gr.Textbox() |
|
clear = gr.ClearButton([msg,chatbot]) |
|
|
|
def respond(message, chat_history): |
|
bot_message = bot(message) |
|
chat_history.append((message, bot_message)) |
|
time.sleep(2) |
|
return "",chat_history |
|
|
|
msg.submit(respond,[msg, chatbot],[msg, chatbot]) |
|
|
|
demo.launch(share=True) |