elibrowne commited on
Commit
8861533
1 Parent(s): df99dda

Persistence and data collection?

Browse files
Files changed (1) hide show
  1. app.py +32 -35
app.py CHANGED
@@ -4,25 +4,9 @@ import os
4
  # PERSISTENT DATA STORAGE: this code is used to make commits
5
 
6
  import json
7
- from datetime import datetime
8
- from pathlib import Path
9
- from uuid import uuid4
10
- from huggingface_hub import CommitScheduler, hf_hub_download, file_exists, HfApi
11
  from random import shuffle
12
 
13
- JSON_DATASET_DIR = Path("json_dataset")
14
- JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
15
-
16
- JSON_DATASET_PATH = JSON_DATASET_DIR / f"train-{uuid4()}.json"
17
-
18
- scheduler = CommitScheduler(
19
- repo_id="ebrowne/test-data",
20
- repo_type="dataset",
21
- folder_path=JSON_DATASET_DIR,
22
- path_in_repo="data",
23
- token = os.getenv("HF_TOKEN")
24
- )
25
-
26
  # Global variables which interact with loading and unloading
27
  user_data = {}
28
  current_response = {}
@@ -81,11 +65,11 @@ def update_huggingface(id):
81
  token = os.getenv("HF_TOKEN")
82
  )
83
 
84
- def reset_current_response():
85
  global current_response
86
  current_response = {
87
  "user_id": user_id,
88
- "question_id": "QID",
89
  "user_answer": 0,
90
  "e5_scores": [], # list of ten [score, score, score, score]
91
  "e5_set": [], # two values
@@ -108,17 +92,11 @@ def load_current_question():
108
  print("Done")
109
  gr.Info("You've finished — thank you so much! There are no more questions. :)")
110
  current_question = {"question": "You're done! Thanks so much for your help.", "answers": ["I want to log out now.", "I want to keep answering questions.","I want to keep answering questions.", "I want to keep answering questions."], "correct_answer_index": 0, "top10_e5": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_e5": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_colbert": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_colbert": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_contains_gold_passage": False, "gold_passage": "GOLD PASSAGE: LOG OFF!", "gold_passage_generation": "what do you gain"}
 
111
  else:
112
  qid = user_data["order"][q_index]
113
  current_question = all_questions[qid]
114
-
115
- # This method is being used to save each set of individual scores (in case the main files have issues, the data should be saved)
116
- def commit_current_and_reset():
117
- with scheduler.lock:
118
- with JSON_DATASET_PATH.open("a") as f:
119
- json.dump(current_response, f)
120
- f.write("\n")
121
- reset_current_response()
122
 
123
  # THEMING: colors and styles (Gradio native)
124
 
@@ -189,9 +167,11 @@ with gr.Blocks(theme = theme) as user_eval:
189
  def next_p(e0, e1, e2, e3):
190
  global step
191
  global mode
 
192
  step += 1
193
- print(e0)
194
- print(e1 + e2 + e3)
 
195
  if step == len(current_question["top10_" + user_data["modes"][user_data["current"]][mode]]): # should always be 10
196
  # Step 10: all sources
197
  collapsible_string = ""
@@ -224,10 +204,13 @@ with gr.Blocks(theme = theme) as user_eval:
224
  global step
225
  global mode
226
  global user_data
 
227
  step += 1
228
- print(e_h + e_s)
229
  if step == 11:
230
  # Step 11: guaranteed to be generation
 
 
231
  return {
232
  selection: gr.HTML("""
233
  <h2> Autogenerated Response </h2>
@@ -236,9 +219,11 @@ with gr.Blocks(theme = theme) as user_eval:
236
  eval_satisfied: gr.Slider(value = 1)
237
  }
238
  # Steps 12 and 13 are gold passage + gold passage generation IF it is applicable
239
- if step > 11 and not current_question["top10_contains_gold_passage"]:
240
  # When mode is 0 -> reset with mode = 1
241
  if mode == 0:
 
 
242
  return {
243
  selection: gr.HTML("""
244
  <h2> Retrieved Passage </h2>
@@ -249,6 +234,8 @@ with gr.Blocks(theme = theme) as user_eval:
249
  }
250
  # When mode is 1 -> display GP and GP generation, then switch
251
  if step == 12:
 
 
252
  return {
253
  selection: gr.HTML("""
254
  <h2> Retrieved Passage </h2>
@@ -258,6 +245,8 @@ with gr.Blocks(theme = theme) as user_eval:
258
  eval_satisfied: gr.Slider(value = 1)
259
  }
260
  elif step == 13:
 
 
261
  return {
262
  selection: gr.HTML("""
263
  <h2> Autogenerated Response </h2>
@@ -266,9 +255,11 @@ with gr.Blocks(theme = theme) as user_eval:
266
  eval_helps: gr.Slider(value = 1),
267
  eval_satisfied: gr.Slider(value = 1)
268
  }
269
- else:
 
 
270
  user_data["current"] += 1
271
- # TODO save answers x2
272
  update_huggingface(user_id) # persistence — update progress online, save answers
273
  load_current_question()
274
  return {
@@ -277,19 +268,24 @@ with gr.Blocks(theme = theme) as user_eval:
277
  eval_helps: gr.Slider(value = 1),
278
  eval_satisfied: gr.Slider(value = 1)
279
  }
 
 
 
280
  else:
281
  # When mode is 0 -> reset with mode = 1
282
  if mode == 0:
283
  return {
284
- selection: gr.HTML("""
285
  <h2> Retrieved Passage </h2>
286
- <p> """ + current_question["top10_" + user_data["modes"][user_data["current"]][1]][0] + "</p>"), # hard coded: first passage (0) of mode 2 (1)
287
  forward_btn: gr.Textbox("load new data"),
288
  eval_helps: gr.Slider(value = 1),
289
  eval_satisfied: gr.Slider(value = 1)
290
  }
291
  # When mode is 1 -> change question
292
  user_data["current"] += 1
 
 
293
  update_huggingface(user_id)
294
  load_current_question()
295
  return {
@@ -298,6 +294,7 @@ with gr.Blocks(theme = theme) as user_eval:
298
  eval_helps: gr.Slider(value = 1),
299
  eval_satisfied: gr.Slider(value = 1)
300
  }
 
301
  btn_p.click(fn = next_p, inputs = [eval_0, eval_1, eval_2, eval_3], outputs = [selection, scores_p, scores_g, eval_0, eval_1, eval_2, eval_3])
302
  btn_g.click(fn = next_g, inputs = [eval_helps, eval_satisfied], outputs = [selection, forward_btn, eval_helps, eval_satisfied])
303
 
 
4
  # PERSISTENT DATA STORAGE: this code is used to make commits
5
 
6
  import json
7
+ from huggingface_hub import hf_hub_download, file_exists, HfApi
 
 
 
8
  from random import shuffle
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  # Global variables which interact with loading and unloading
11
  user_data = {}
12
  current_response = {}
 
65
  token = os.getenv("HF_TOKEN")
66
  )
67
 
68
+ def reset_current_response(qid):
69
  global current_response
70
  current_response = {
71
  "user_id": user_id,
72
+ "question_id": qid,
73
  "user_answer": 0,
74
  "e5_scores": [], # list of ten [score, score, score, score]
75
  "e5_set": [], # two values
 
92
  print("Done")
93
  gr.Info("You've finished — thank you so much! There are no more questions. :)")
94
  current_question = {"question": "You're done! Thanks so much for your help.", "answers": ["I want to log out now.", "I want to keep answering questions.","I want to keep answering questions.", "I want to keep answering questions."], "correct_answer_index": 0, "top10_e5": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_e5": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_colbert": ["You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!", "You're done; thank you!"], "generation_colbert": "I don't know how to exit this code right now, so you're in an endless loop of this question until you quit.", "top10_contains_gold_passage": False, "gold_passage": "GOLD PASSAGE: LOG OFF!", "gold_passage_generation": "what do you gain"}
95
+ reset_current_response("USER FINISHED")
96
  else:
97
  qid = user_data["order"][q_index]
98
  current_question = all_questions[qid]
99
+ reset_current_response(user_data["order"][q_index])
 
 
 
 
 
 
 
100
 
101
  # THEMING: colors and styles (Gradio native)
102
 
 
167
  def next_p(e0, e1, e2, e3):
168
  global step
169
  global mode
170
+ global current_response
171
  step += 1
172
+ # Add user data to the current response
173
+ current_response[user_data["modes"][user_data["current"]][mode] + "_scores"].append([e0, e1, e2, e3])
174
+ # Next item
175
  if step == len(current_question["top10_" + user_data["modes"][user_data["current"]][mode]]): # should always be 10
176
  # Step 10: all sources
177
  collapsible_string = ""
 
204
  global step
205
  global mode
206
  global user_data
207
+ global current_response
208
  step += 1
209
+
210
  if step == 11:
211
  # Step 11: guaranteed to be generation
212
+ # Add user data to the current response as SET evaluation, which comes before the generation
213
+ current_response[user_data["modes"][user_data["current"]][mode] + "_set"] = [e_h, e_s]
214
  return {
215
  selection: gr.HTML("""
216
  <h2> Autogenerated Response </h2>
 
219
  eval_satisfied: gr.Slider(value = 1)
220
  }
221
  # Steps 12 and 13 are gold passage + gold passage generation IF it is applicable
222
+ if step > 11: # and not current_question["top10_contains_gold_passage"]
223
  # When mode is 0 -> reset with mode = 1
224
  if mode == 0:
225
+ # The user just evaluated a generation for mode 0
226
+ current_response[user_data["modes"][user_data["current"]][mode] + "_generation"] = [e_h, e_s]
227
  return {
228
  selection: gr.HTML("""
229
  <h2> Retrieved Passage </h2>
 
234
  }
235
  # When mode is 1 -> display GP and GP generation, then switch
236
  if step == 12:
237
+ # The user just evaluated a generation for mode 1
238
+ current_response[user_data["modes"][user_data["current"]][mode] + "_generation"] = [e_h, e_s]
239
  return {
240
  selection: gr.HTML("""
241
  <h2> Retrieved Passage </h2>
 
245
  eval_satisfied: gr.Slider(value = 1)
246
  }
247
  elif step == 13:
248
+ # The user just evaluated the gold passage
249
+ current_response["gold_set"] = [e_h, e_s]
250
  return {
251
  selection: gr.HTML("""
252
  <h2> Autogenerated Response </h2>
 
255
  eval_helps: gr.Slider(value = 1),
256
  eval_satisfied: gr.Slider(value = 1)
257
  }
258
+ else: # step = 14
259
+ # The user just evaluated the gold passage generation
260
+ current_response["gold_generation"] = [e_h, e_s]
261
  user_data["current"] += 1
262
+ user_data["responses"].append(current_response) # adds new answers to current list of responses
263
  update_huggingface(user_id) # persistence — update progress online, save answers
264
  load_current_question()
265
  return {
 
268
  eval_helps: gr.Slider(value = 1),
269
  eval_satisfied: gr.Slider(value = 1)
270
  }
271
+
272
+ # VERY UNCLEAN CODE: for practical purposes, this else block is unreachable: not current_question["top10_contains_gold_passage"] will always be True
273
+ """
274
  else:
275
  # When mode is 0 -> reset with mode = 1
276
  if mode == 0:
277
  return {
278
+ selection: gr.HTML(\"""
279
  <h2> Retrieved Passage </h2>
280
+ <p> \""" + current_question["top10_" + user_data["modes"][user_data["current"]][1]][0] + "</p>"), # hard coded: first passage (0) of mode 2 (1)
281
  forward_btn: gr.Textbox("load new data"),
282
  eval_helps: gr.Slider(value = 1),
283
  eval_satisfied: gr.Slider(value = 1)
284
  }
285
  # When mode is 1 -> change question
286
  user_data["current"] += 1
287
+ user_data["responses"].append(current_response) # adds new answers to current list of responses
288
+ # Update stored data with new current, additional data
289
  update_huggingface(user_id)
290
  load_current_question()
291
  return {
 
294
  eval_helps: gr.Slider(value = 1),
295
  eval_satisfied: gr.Slider(value = 1)
296
  }
297
+ """
298
  btn_p.click(fn = next_p, inputs = [eval_0, eval_1, eval_2, eval_3], outputs = [selection, scores_p, scores_g, eval_0, eval_1, eval_2, eval_3])
299
  btn_g.click(fn = next_g, inputs = [eval_helps, eval_satisfied], outputs = [selection, forward_btn, eval_helps, eval_satisfied])
300