alex buz commited on
Commit
a7368c8
1 Parent(s): 84ce934
Files changed (6) hide show
  1. .gitignore +1 -0
  2. 1t.py +17 -0
  3. app copy.py +55 -0
  4. app.py +66 -62
  5. push.bat +3 -0
  6. requirements.txt +84 -1
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ cache
1t.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
+ import torch
3
+ # Load a GPT-2 model for general question answering
4
+ tokenizer = AutoTokenizer.from_pretrained("gpt2-medium", cache_dir="./cache")
5
+ model = AutoModelForCausalLM.from_pretrained("gpt2-medium", cache_dir="./cache")
6
+ question = "What is the capital of France?"
7
+ question = "List all US presidents in order of their presidency"
8
+ input_ids = tokenizer.encode(f"Q: {question}\nA:", return_tensors="pt")
9
+
10
+ # Generate a response
11
+ with torch.no_grad():
12
+ output = model.generate(input_ids, max_length=150, num_return_sequences=1,
13
+ temperature=0.7, top_k=50, top_p=0.95)
14
+
15
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
16
+
17
+ print(response)
app copy.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
4
+
5
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
6
+ qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
7
+
8
+ def transcribe(audio):
9
+ if audio is None:
10
+ return "No audio recorded."
11
+ sr, y = audio
12
+ y = y.astype(np.float32)
13
+ y /= np.max(np.abs(y))
14
+
15
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
16
+
17
+ def answer(transcription):
18
+ context = "You are chatbot answering general questions"
19
+ print(transcription)
20
+ result = qa_model(question=transcription, context=context)
21
+ print(result)
22
+ return result['answer']
23
+
24
+ def process_audio(audio):
25
+ if audio is None:
26
+ return "No audio recorded.", ""
27
+ transcription = transcribe(audio)
28
+ answer_result = answer(transcription)
29
+ return transcription, answer_result
30
+
31
+ def clear_all():
32
+ return None, "", ""
33
+
34
+ with gr.Blocks() as demo:
35
+ gr.Markdown("# Audio Transcription and Question Answering")
36
+
37
+ audio_input = gr.Audio(label="Audio Input", sources=["microphone"], type="numpy")
38
+ transcription_output = gr.Textbox(label="Transcription")
39
+ answer_output = gr.Textbox(label="Answer Result")
40
+
41
+ clear_button = gr.Button("Clear")
42
+
43
+ audio_input.stop_recording(
44
+ fn=process_audio,
45
+ inputs=[audio_input],
46
+ outputs=[transcription_output, answer_output]
47
+ )
48
+
49
+ clear_button.click(
50
+ fn=clear_all,
51
+ inputs=[],
52
+ outputs=[audio_input, transcription_output, answer_output]
53
+ )
54
+
55
+ demo.launch()
app.py CHANGED
@@ -1,63 +1,67 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
-
61
-
62
- if __name__ == "__main__":
63
- demo.launch()
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
+ import numpy as np
4
+ import torch
5
+
6
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
7
+
8
+ # Load a GPT-2 model for general question answering
9
+ tokenizer = AutoTokenizer.from_pretrained("gpt2-medium", cache_dir="./cache")
10
+ model = AutoModelForCausalLM.from_pretrained("gpt2-medium", cache_dir="./cache")
11
+
12
+ def transcribe(audio):
13
+ if audio is None:
14
+ return "No audio recorded."
15
+ sr, y = audio
16
+ y = y.astype(np.float32)
17
+ y /= np.max(np.abs(y))
18
+
19
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
20
+
21
+ def answer(question):
22
+ input_ids = tokenizer.encode(f"Q: {question}\nA:", return_tensors="pt")
23
+
24
+ # Generate a response
25
+ with torch.no_grad():
26
+ output = model.generate(input_ids, max_length=150, num_return_sequences=1,
27
+ temperature=0.7, top_k=50, top_p=0.95)
28
+
29
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
30
+
31
+ # Extract only the answer part
32
+ answer = response.split("A:")[-1].strip()
33
+ print(answer)
34
+ return response
35
+
36
+ def process_audio(audio):
37
+ if audio is None:
38
+ return "No audio recorded.", ""
39
+ transcription = transcribe(audio)
40
+ answer_result = answer(transcription)
41
+ return transcription, answer_result
42
+
43
+ def clear_all():
44
+ return None, "", ""
45
+
46
+ with gr.Blocks() as demo:
47
+ gr.Markdown("# Audio Transcription and Question Answering")
48
+
49
+ audio_input = gr.Audio(label="Audio Input", sources=["microphone"], type="numpy")
50
+ transcription_output = gr.Textbox(label="Transcription")
51
+ answer_output = gr.Textbox(label="Answer Result", lines=10)
52
+
53
+ clear_button = gr.Button("Clear")
54
+
55
+ audio_input.stop_recording(
56
+ fn=process_audio,
57
+ inputs=[audio_input],
58
+ outputs=[transcription_output, answer_output]
59
+ )
60
+
61
+ clear_button.click(
62
+ fn=clear_all,
63
+ inputs=[],
64
+ outputs=[audio_input, transcription_output, answer_output]
65
+ )
66
+
67
+ demo.launch()
push.bat ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ git add .
2
+ git commit -m "%1"
3
+ git push
requirements.txt CHANGED
@@ -1 +1,84 @@
1
- huggingface_hub==0.22.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.3.0
3
+ annotated-types==0.7.0
4
+ anyio==4.4.0
5
+ attrs==23.2.0
6
+ certifi==2024.7.4
7
+ charset-normalizer==3.3.2
8
+ click==8.1.7
9
+ colorama==0.4.6
10
+ contourpy==1.2.1
11
+ cycler==0.12.1
12
+ dnspython==2.6.1
13
+ email_validator==2.2.0
14
+ fastapi==0.111.1
15
+ fastapi-cli==0.0.4
16
+ ffmpy==0.3.2
17
+ filelock==3.15.4
18
+ fonttools==4.53.1
19
+ fsspec==2024.6.1
20
+ gradio==4.29.0
21
+ gradio_client==0.16.1
22
+ h11==0.14.0
23
+ httpcore==1.0.5
24
+ httptools==0.6.1
25
+ httpx==0.27.0
26
+ huggingface-hub==0.23.5
27
+ idna==3.7
28
+ importlib_resources==6.4.0
29
+ intel-openmp==2021.4.0
30
+ Jinja2==3.1.4
31
+ jsonschema==4.23.0
32
+ jsonschema-specifications==2023.12.1
33
+ kiwisolver==1.4.5
34
+ markdown-it-py==3.0.0
35
+ MarkupSafe==2.1.5
36
+ matplotlib==3.9.1
37
+ mdurl==0.1.2
38
+ mkl==2021.4.0
39
+ mpmath==1.3.0
40
+ networkx==3.3
41
+ numpy==1.26.4
42
+ orjson==3.10.6
43
+ packaging==24.1
44
+ pandas==2.2.2
45
+ pillow==10.4.0
46
+ pydantic==2.8.2
47
+ pydantic_core==2.20.1
48
+ pydub==0.25.1
49
+ Pygments==2.18.0
50
+ pyparsing==3.1.2
51
+ python-dateutil==2.9.0.post0
52
+ python-dotenv==1.0.1
53
+ python-multipart==0.0.9
54
+ pytz==2024.1
55
+ PyYAML==6.0.1
56
+ referencing==0.35.1
57
+ regex==2024.5.15
58
+ requests==2.32.3
59
+ rich==13.7.1
60
+ rpds-py==0.19.0
61
+ ruff==0.5.2
62
+ safetensors==0.4.3
63
+ semantic-version==2.10.0
64
+ shellingham==1.5.4
65
+ six==1.16.0
66
+ sniffio==1.3.1
67
+ SpeechRecognition==3.10.4
68
+ starlette==0.37.2
69
+ sympy==1.13.0
70
+ tbb==2021.13.0
71
+ tokenizers==0.19.1
72
+ tomlkit==0.12.0
73
+ toolz==0.12.1
74
+ torch==2.3.1
75
+ torchaudio==2.3.1
76
+ tqdm==4.66.4
77
+ transformers==4.42.4
78
+ typer==0.12.3
79
+ typing_extensions==4.12.2
80
+ tzdata==2024.1
81
+ urllib3==2.2.2
82
+ uvicorn==0.30.1
83
+ watchfiles==0.22.0
84
+ websockets==11.0.3