DictaLM 2.0 - Completions Demo (NOT Chat)

import asyncio
import random
import json, time
import os
import gradio as gr
from http import HTTPStatus
import openai
from typing import Generator, List, Optional, Tuple, Dict
from urllib.error import HTTPError
import requests

API_URL = os.getenv('API_URL')
API_KEY = os.getenv('API_KEY')
CUSTOM_JS = os.getenv('CUSTOM_JS', None)
HEADERS = json.loads(os.getenv('API_HEADERS', '{}'))
oai_client = openai.OpenAI(api_key=API_KEY, base_url=API_URL, default_headers=HEADERS)

History = List[Tuple[str, str]]

def clear_session() -> History:
    return '', []

def history_to_message(history: History) -> str:
    return history[-1][1].strip() if history else ''
    
def model_chat(query: Optional[str], history: Optional[History]) -> Generator[Tuple[str, History], None, None]:
    if query is None:
        query = ''
    if history is None:
        history = []
    if not query.strip():
        return []
    message = history_to_message(history).strip()
    message += ' ' + query.strip()
    
    gen = oai_client.completions.create(
        model='dicta-il/dictalm2.0',
        prompt=message,
        temperature=0.7,
        max_tokens=512,
        top_p=0.9,
        stream=True
    )
    full_response = message
    for completion in gen:
        text = completion.choices[0].text
        if text:
            full_response += text.replace('<', '&lt;').replace('>', '&gt;') or ''
        yield full_response
        
def create_stream_fn(orig_stream_fn):
  async def stream_fn(*args, **kwargs):
      generator = orig_stream_fn(*args, **kwargs)
      async for (update, update) in generator:
          yield (update[:-1], update[:-1])
  return stream_fn


with gr.Blocks(css='''
    .gr-group {direction: rtl;}
    .chatbot{text-align:right;}
  .dicta-header {
    background-color: var(--input-background-fill);  /* Replace with desired background color */
    border-radius: 10px;
    padding: 20px;
    text-align: center;
    display: flex;
    flex-direction: row;
    align-items: center;
    box-shadow: var(--block-shadow);
    border-color: var(--block-border-color);
    border-width: 1px;
  }
               
               
  @media (max-width: 768px) {
    .dicta-header {
      flex-direction: column; /* Change to vertical for mobile devices */
    }
  }

  .chatbot.prose {
    font-size: 1.2em;
  }
  .dicta-logo {
    width: 150px; /* Replace with actual logo width as desired */
    height: auto;
    margin-bottom: 20px;
  }

  .dicta-intro-text {
    margin-bottom: 20px;
    text-align: center;
    display: flex;
    flex-direction: column;
    align-items: center;
    width: 100%;
    font-size: 1.1em;
  }
               
  textarea {
    font-size: 1.2em;
  }
''', js=CUSTOM_JS) as demo:
    gr.Markdown("""
<div class="dicta-header">
  <a href="https://dicta.org.il">
    <img src="file/dicta-logo.jpg" alt="Dicta Logo" class="dicta-logo">
  </a>  
  <div class="dicta-intro-text">
    <h1>DictaLM 2.0 - Completions Demo (NOT Chat)</h1>
    <p>Welcome to the interactive demo of the base model version of DictaLM-2.0. Explore the capabilities of our model and see how it can assist with your tasks.<br/>
    <span dir='rtl'> ברוכים הבאים לדמו האינטראקטיבי של DictaLM-2.0. חקרו את יכולות המודל שלנו וראו כיצד הוא יכול לסייע לכם במשימותיכם.</span><br/>
    <span dir='rtl'> המודל משוחרר לנחלת הכלל ואפשר להורידו בקישור: <a href="https://huggingface.co/dicta-il/dictalm2.0">כאן</a></span><br/>
    <span dir='rtl' style='color: red'>שימו לב: זהו מודל הבסיס ונועד אך ורק להשלמות ולא לצ'אט</span><br/>
    <span style='color: red'>Please note: This is the base model and is meant for completions only, not for chat.</span></p>
  </div>
</div>
""")
    
    interface = gr.ChatInterface(model_chat, fill_height=False)
    interface.chatbot.rtl = True
    interface.textbox.placeholder = "הכנס שאלה בעברית (או באנגלית!)"
    interface.textbox.rtl = True
    interface.textbox.text_align = 'right'
    interface.theme_css += '.gr-group {direction: rtl !important;}'
    
demo.queue(api_open=False, default_concurrency_limit=5).launch(max_threads=20, share=False, allowed_paths=['dicta-logo.jpg'])