from langchain.llms import CTransformers from langchain.chains import LLMChain from langchain.prompts import PromptTemplate import os import io import gradio as gr import time custom_prompt_template = """ You are an AI coding assistant and your task is to solve coding problems and return code snippets based on the user's query. Below is the user's query. Query: {query} You just return the helpful code and related details Helpful code and related details: """ def set_custom_prompt(): prompt = PromptTemplate( template = custom_prompt_template, input_variables = ['query'] ) return prompt def load_model(): llm = CTransformers( model = 'TheBloke/CodeLlama-7B-GGUF/codellama-7b.Q4_K_M.gguf', model_type = 'llama', max_new_tokens = 1096, temperature = 0.2, repetition_penalty = 1.13, gpu_layers = 2 ) return llm def chain_pipeline(): llm = load_model() qa_prompt = set_custom_prompt() qa_chain = LLMChain( prompt = qa_prompt, llm=llm ) return qa_chain llmcahin = chain_pipeline() def bot(query): llm_response = llmcahin.run({"query":query}) return llm_response with gr.Blocks(title="code llama 7b") as demo: gr.Markdown("# Code llama") chatbot = gr.Chatbot([],elem_id="chatbot",height=700) msg = gr.Textbox() clear = gr.ClearButton([msg,chatbot]) def respond(message, chat_history): bot_message = bot(message) chat_history.append((message, bot_message)) time.sleep(2) return "",chat_history msg.submit(respond,[msg, chatbot],[msg, chatbot]) demo.launch(share=True)