KoonJamesZ commited on
Commit
7d09b5b
1 Parent(s): a2e264b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from FlagEmbedding import BGEM3FlagModel
4
+
5
+ # Load the pre-trained embedding model
6
+ model = BGEM3FlagModel('BAAI/bge-m3', use_fp16=True)
7
+
8
+ # Load the JSON data into a DataFrame
9
+ df = pd.read_json('White-Stride-Red-68.json')
10
+ df['embeding_context'] = df['embeding_context'].astype(str).fillna('')
11
+
12
+ # Filter out any rows where 'embeding_context' might be empty or invalid
13
+ df = df[df['embeding_context'] != '']
14
+
15
+ # Encode the 'embeding_context' column
16
+ embedding_contexts = df['embeding_context'].tolist()
17
+ embeddings_csv = model.encode(embedding_contexts, batch_size=12, max_length=2048)['dense_vecs']
18
+ # Function to perform search and return all columns
19
+ def search_query(query_text):
20
+ num_records = 50
21
+
22
+ # Encode the input query text
23
+ embeddings_query = model.encode([query_text], batch_size=12, max_length=2048)['dense_vecs']
24
+
25
+ # Compute similarity between the query and the CSV embeddings
26
+ similarity_matrix = embeddings_query @ embeddings_csv.T
27
+
28
+ # Rank records by similarity and select the top 'num_records'
29
+ similarity_scores = similarity_matrix.max(axis=0)
30
+ top_indices = similarity_scores.argsort()[-num_records:][::-1]
31
+
32
+ # Get the top results and return all columns
33
+ result_df = df.iloc[top_indices].drop_duplicates(subset=df.columns.difference(['embedding_context']), keep='first')
34
+
35
+
36
+ return result_df
37
+
38
+
39
+ # Gradio interface function
40
+ def gradio_interface(query_text):
41
+ search_results = search_query(query_text)
42
+ return search_results
43
+
44
+ # Gradio interface setup
45
+ with gr.Blocks() as app:
46
+ gr.Markdown("<h1>White Stride Red Search (BEG-M3)</h1>")
47
+
48
+ # Input text box for the search query
49
+ search_input = gr.Textbox(label="Search Query", placeholder="Enter search text", interactive=True)
50
+
51
+ # Output table for displaying results
52
+ search_output = gr.DataFrame(label="Search Results")
53
+
54
+ # Search button
55
+ search_button = gr.Button("Search")
56
+
57
+ # Link button click to action
58
+ search_button.click(fn=gradio_interface, inputs=search_input, outputs=search_output)
59
+
60
+ # Launch the Gradio app
61
+ app.launch()