KoonJamesZ commited on
Commit
99bb109
1 Parent(s): 4d1b45b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -7
app.py CHANGED
@@ -14,8 +14,35 @@ df['embeding_context'] = df['embeding_context'].astype(str).fillna('')
14
 
15
  # Filter out any rows where 'embeding_context' might be empty or invalid
16
  df = df[df['embeding_context'] != '']
17
-
18
- index = faiss.read_index('vector_store.index')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
 
21
  # Function to perform search and return all columns
@@ -23,7 +50,7 @@ def search_query(query_text):
23
  num_records = 50
24
 
25
  # Encode the input query text
26
- embeddings_query = model.encode([query_text], batch_size=12, max_length=2048)['dense_vecs']
27
  embeddings_query_np = np.array(embeddings_query).astype('float32')
28
 
29
  # Search in FAISS index for nearest neighbors
@@ -41,16 +68,16 @@ def gradio_interface(query_text):
41
 
42
  with gr.Blocks() as app:
43
  gr.Markdown("<h1>White Stride Red Search (BEG-M3)</h1>")
44
-
45
  # Input text box for the search query
46
  search_input = gr.Textbox(label="Search Query", placeholder="Enter search text", interactive=True)
47
-
48
  # Search button below the text box
49
  search_button = gr.Button("Search")
50
-
51
  # Output table for displaying results
52
  search_output = gr.DataFrame(label="Search Results")
53
-
54
  # Link button click to action
55
  search_button.click(fn=gradio_interface, inputs=search_input, outputs=search_output)
56
 
 
14
 
15
  # Filter out any rows where 'embeding_context' might be empty or invalid
16
  df = df[df['embeding_context'] != '']
17
+
18
+ # Encode the 'embeding_context' column
19
+ embedding_contexts = df['embeding_context'].tolist()
20
+ embeddings_csv = model.encode(embedding_contexts, batch_size=12, max_length=1024)['dense_vecs']
21
+
22
+ # Convert embeddings to numpy array
23
+ embeddings_np = np.array(embeddings_csv).astype('float32')
24
+
25
+ # FAISS index file path
26
+ index_file_path = 'vector_store_bge_m3.index'
27
+
28
+ # Check if FAISS index file already exists
29
+ if os.path.exists(index_file_path):
30
+ # Load the existing FAISS index from file
31
+ index = faiss.read_index(index_file_path)
32
+ print("FAISS index loaded from file.")
33
+ else:
34
+ # Initialize FAISS index (for L2 similarity)
35
+ dim = embeddings_np.shape[1]
36
+ index = faiss.IndexFlatL2(dim)
37
+
38
+ # Add embeddings to the FAISS index
39
+ index.add(embeddings_np)
40
+
41
+ # Save the FAISS index to a file for future use
42
+ faiss.write_index(index, index_file_path)
43
+ print("FAISS index created and saved to file.")
44
+
45
+ index = faiss.read_index(index_file_path)
46
 
47
 
48
  # Function to perform search and return all columns
 
50
  num_records = 50
51
 
52
  # Encode the input query text
53
+ embeddings_query = model.encode([query_text], batch_size=12, max_length=1024)['dense_vecs']
54
  embeddings_query_np = np.array(embeddings_query).astype('float32')
55
 
56
  # Search in FAISS index for nearest neighbors
 
68
 
69
  with gr.Blocks() as app:
70
  gr.Markdown("<h1>White Stride Red Search (BEG-M3)</h1>")
71
+
72
  # Input text box for the search query
73
  search_input = gr.Textbox(label="Search Query", placeholder="Enter search text", interactive=True)
74
+
75
  # Search button below the text box
76
  search_button = gr.Button("Search")
77
+
78
  # Output table for displaying results
79
  search_output = gr.DataFrame(label="Search Results")
80
+
81
  # Link button click to action
82
  search_button.click(fn=gradio_interface, inputs=search_input, outputs=search_output)
83