|
import gradio as gr |
|
import os |
|
|
|
|
|
|
|
import json |
|
from datetime import datetime |
|
from pathlib import Path |
|
from uuid import uuid4 |
|
from huggingface_hub import CommitScheduler, hf_hub_download, file_exists, HfApi |
|
from random import shuffle |
|
|
|
JSON_DATASET_DIR = Path("json_dataset") |
|
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
JSON_DATASET_PATH = JSON_DATASET_DIR / f"train-{uuid4()}.json" |
|
|
|
scheduler = CommitScheduler( |
|
repo_id="ebrowne/test-data", |
|
repo_type="dataset", |
|
folder_path=JSON_DATASET_DIR, |
|
path_in_repo="data", |
|
token = os.getenv("HF_TOKEN") |
|
) |
|
|
|
|
|
user_data = {} |
|
current_response = {} |
|
current_question = {} |
|
user_id = "no_id" |
|
qIDs = ["mbe_46", "mbe_132", "mbe_287", "mbe_326", "mbe_334", "mbe_389", "mbe_563", "mbe_614", "mbe_642", "mbe_747", "mbe_779", "mbe_826", "mbe_845", "mbe_1042", "mbe_1134"] |
|
mode_options = ["e5", "colbert"] |
|
|
|
step = 0 |
|
mode = 0 |
|
|
|
def load_user_data(id): |
|
global user_data |
|
filename = id.replace('@', '_AT_').replace('.', '_DOT_') |
|
if file_exists("ebrowne/test-data", "users/" + filename + ".json"): |
|
print("File exists, downloading data.") |
|
|
|
hf_hub_download(repo_id="ebrowne/test-data", token = os.getenv("HF_TOKEN"), filename="users/" + filename + ".json") |
|
|
|
else: |
|
|
|
shuffle(qIDs) |
|
modes = [] |
|
for i in range(len(qIDs)): |
|
temp = mode_options[:] |
|
shuffle(temp) |
|
modes.append(temp) |
|
|
|
user_data = { |
|
"user_id": id, |
|
"order": qIDs, |
|
"modes": modes, |
|
"current": 0, |
|
"responses": [] |
|
} |
|
|
|
update_huggingface(id) |
|
|
|
|
|
shuffle(qIDs) |
|
modes = [] |
|
for i in range(len(qIDs)): |
|
temp = mode_options[:] |
|
shuffle(temp) |
|
modes.append(temp) |
|
|
|
user_data = { |
|
"user_id": id, |
|
"order": qIDs, |
|
"modes": modes, |
|
"current": 0, |
|
"responses": [] |
|
} |
|
|
|
def update_huggingface(id): |
|
global user_data |
|
filename = id.replace('@', '_AT_').replace('.', '_DOT_') |
|
|
|
with open(filename + ".json", "w") as f: |
|
json.dump(user_data, f) |
|
|
|
api = HfApi() |
|
api.upload_file( |
|
path_or_fileobj=filename + ".json", |
|
path_in_repo="users/" + filename + ".json", |
|
repo_id="ebrowne/test-data", |
|
repo_type="dataset", |
|
token = os.getenv("HF_TOKEN") |
|
) |
|
|
|
def reset_current_response(): |
|
global current_response |
|
current_response = { |
|
"user_id": user_id, |
|
"question_id": "QID", |
|
"user_answer": 0, |
|
"e5_scores": [], |
|
"e5_set": [], |
|
"e5_generation": [], |
|
"colbert_scores": [], |
|
"colbert_set": [], |
|
"colbert_generation": [], |
|
"gold_set": [], |
|
"gold_generation": [] |
|
} |
|
|
|
|
|
def commit_current_and_reset(): |
|
with scheduler.lock: |
|
with JSON_DATASET_PATH.open("a") as f: |
|
json.dump(current_response, f) |
|
f.write("\n") |
|
reset_current_response() |
|
|
|
|
|
|
|
with open("example.json", "r") as f: |
|
current_question = json.load(f) |
|
|
|
|
|
|
|
theme = gr.themes.Soft( |
|
primary_hue="sky", |
|
secondary_hue="sky", |
|
neutral_hue="slate", |
|
font=[gr.themes.GoogleFont('Inter'), 'ui-sans-serif', 'system-ui', 'sans-serif'], |
|
) |
|
|
|
|
|
|
|
with gr.Blocks(theme = theme) as user_eval: |
|
|
|
forward_btn = gr.Textbox("unchanged", visible = False, elem_id = "togglebutton") |
|
gr.HTML(""" |
|
<h1> Legal Retriever Evaluation Study </h1> |
|
<p> Score the passages based on the question and provided answer choices. Detailed instructions are found <a href="https://docs.google.com/document/d/1ReODJ0hlXz_M3kE2UG1cwSRVoyDLQo88OvG71Gt8lUQ/edit?usp=sharing" target="_blank">here</a>. </p> |
|
""") |
|
gr.Markdown("---") |
|
|
|
|
|
with gr.Row(equal_height = False, visible = False) as evals: |
|
|
|
with gr.Column(scale = 2) as passages: |
|
selection = gr.HTML(""" |
|
<h2> Retrieved Passage </h2> |
|
<p> """ + current_question["top10_" + user_data["modes"][user_data["current"]][mode]][0] + "</p>") |
|
line = gr.Markdown("---") |
|
|
|
new_answers = current_question["answers"].copy() |
|
new_answers[current_question["correct_answer_index"]] = "**" + current_question["answers"][current_question["correct_answer_index"]] + "** ✅" |
|
passage_display = gr.Markdown(""" |
|
## Question and Answer |
|
*""" + current_question["question"] + |
|
"""* \n |
|
+ """ + new_answers[0] + |
|
""" \n |
|
+ """ + new_answers[1] + |
|
""" \n |
|
+ """ + new_answers[2] + |
|
""" \n |
|
+ """ + new_answers[3]) |
|
|
|
|
|
|
|
with gr.Column(scale = 1) as scores_p: |
|
desc_0 = gr.Markdown("Does the passage describe **a legal rule?**") |
|
eval_0 = gr.Radio(["Yes", "No"], label = "Legal Rule?") |
|
desc_1 = gr.Markdown("How **relevant** is this passage to the question?") |
|
eval_1 = gr.Slider(1, 5, step = 0.5, label = "Relevance") |
|
desc_2 = gr.Markdown("How would you rate the passage's **quality** in terms of detail, clarity, and focus?") |
|
eval_2 = gr.Slider(1, 5, step = 0.5, label = "Quality") |
|
desc_3 = gr.Markdown("How effectively does the passage **lead you to the correct answer?**") |
|
eval_3 = gr.Slider(1, 5, step = 0.5, label = "Helpfulness") |
|
btn_p = gr.Button("Next", interactive = False) |
|
|
|
def sanitize_score(rad): |
|
if rad == None: |
|
return {btn_p: gr.Button(interactive = False)} |
|
else: |
|
return {btn_p: gr.Button(interactive = True)} |
|
eval_0.change(fn = sanitize_score, inputs = [eval_0], outputs = [btn_p]) |
|
|
|
with gr.Column(scale = 1, visible = False) as scores_g: |
|
helps = gr.Markdown("Does this information **help answer** the question?") |
|
eval_helps = gr.Slider(1, 5, step = 0.5, label = "Helpfulness") |
|
satisfied = gr.Markdown("How **satisfied** are you by this answer?") |
|
eval_satisfied = gr.Slider(1, 5, step = 0.5, label = "User Satisfaction") |
|
btn_g = gr.Button("Next") |
|
|
|
def next_p(e0, e1, e2, e3): |
|
global step |
|
global mode |
|
step += 1 |
|
print(e0) |
|
print(e1 + e2 + e3) |
|
if step == len(current_question["top10_" + user_data["modes"][user_data["current"]][mode]]): |
|
|
|
collapsible_string = "" |
|
for i, passage in enumerate(current_question["top10_" + user_data["modes"][user_data["current"]][mode]]): |
|
collapsible_string += """ |
|
<strong>Passage """ + str(i + 1) + """</strong> |
|
<p> """ + passage + """ </p> |
|
""" |
|
return { |
|
selection: gr.HTML(collapsible_string), |
|
scores_p: gr.Column(visible = False), |
|
scores_g: gr.Column(visible = True), |
|
eval_0: gr.Radio(value = None), |
|
eval_1: gr.Slider(value = 3), |
|
eval_2: gr.Slider(value = 3), |
|
eval_3: gr.Slider(value = 3) |
|
} |
|
else: |
|
return { |
|
selection: gr.HTML(""" |
|
<h2> Retrieved Passage </h2> |
|
<p> """ + current_question["top10_" + user_data["modes"][user_data["current"]][mode]][step] + "</p>"), |
|
eval_0: gr.Radio(value = None), |
|
eval_1: gr.Slider(value = 3), |
|
eval_2: gr.Slider(value = 3), |
|
eval_3: gr.Slider(value = 3) |
|
} |
|
|
|
def next_g(e_h, e_s): |
|
global step |
|
global mode |
|
step += 1 |
|
print(e_h + e_s) |
|
if step == 11: |
|
|
|
return { |
|
selection: gr.HTML(""" |
|
<h2> Autogenerated Response </h2> |
|
<p> """ + current_question["generation_" + user_data["modes"][user_data["current"]][mode]] + "</p>"), |
|
eval_helps: gr.Slider(value = 1), |
|
eval_satisfied: gr.Slider(value = 1) |
|
} |
|
|
|
if step > 11 and not current_question["top10_contains_gold_passage"]: |
|
|
|
if mode == 0: |
|
return { |
|
selection: gr.HTML("<p> Loading second set... </p>") , |
|
forward_btn: gr.Textbox("load new data"), |
|
eval_helps: gr.Slider(value = 1), |
|
eval_satisfied: gr.Slider(value = 1) |
|
} |
|
|
|
if step == 12: |
|
return { |
|
selection: gr.HTML(""" |
|
<h2> Retrieved Passage </h2> |
|
<p> """ + current_question["gold_passage"] + "</p>"), |
|
forward_btn: gr.Textbox(), |
|
eval_helps: gr.Slider(value = 1), |
|
eval_satisfied: gr.Slider(value = 1) |
|
} |
|
elif step == 13: |
|
return { |
|
selection: gr.HTML(""" |
|
<h2> Autogenerated Response </h2> |
|
<p> """ + current_question["gold_passage_generation"] + "</p>"), |
|
forward_btn: gr.Textbox(), |
|
eval_helps: gr.Slider(value = 1), |
|
eval_satisfied: gr.Slider(value = 1) |
|
} |
|
else: |
|
return { |
|
selection: gr.Markdown("Advancing to the next question..."), |
|
forward_btn: gr.Textbox("changed"), |
|
eval_helps: gr.Slider(value = 1), |
|
eval_satisfied: gr.Slider(value = 1) |
|
} |
|
else: |
|
|
|
if mode == 0: |
|
return { |
|
selection: gr.HTML("<p> Loading second set... </p>") , |
|
forward_btn: gr.Textbox("load new data"), |
|
eval_helps: gr.Slider(value = 1), |
|
eval_satisfied: gr.Slider(value = 1) |
|
} |
|
|
|
return { |
|
selection: gr.Markdown("Advancing to the next question..."), |
|
forward_btn: gr.Textbox("changed"), |
|
eval_helps: gr.Slider(value = 1), |
|
eval_satisfied: gr.Slider(value = 1) |
|
} |
|
btn_p.click(fn = next_p, inputs = [eval_0, eval_1, eval_2, eval_3], outputs = [selection, scores_p, scores_g, eval_0, eval_1, eval_2, eval_3]) |
|
btn_g.click(fn = next_g, inputs = [eval_helps, eval_satisfied], outputs = [selection, forward_btn, eval_helps, eval_satisfied]) |
|
|
|
|
|
with gr.Row(equal_height = False, visible = False) as question: |
|
with gr.Column(): |
|
gr.Markdown("**Question**") |
|
gr.Markdown(current_question["question"]) |
|
a = gr.Button(current_question["answers"][0]) |
|
b = gr.Button(current_question["answers"][1]) |
|
c = gr.Button(current_question["answers"][2]) |
|
d = gr.Button(current_question["answers"][3]) |
|
|
|
def answer(): |
|
return { |
|
question: gr.Row(visible = False), |
|
evals: gr.Row(visible = True) |
|
} |
|
|
|
a.click(fn = answer, outputs = [question, evals]) |
|
b.click(fn = answer, outputs = [question, evals]) |
|
c.click(fn = answer, outputs = [question, evals]) |
|
d.click(fn = answer, outputs = [question, evals]) |
|
|
|
def toggle(): |
|
global step |
|
global mode |
|
step = 0 |
|
if mode == 0: |
|
mode = 1 |
|
print("Next set of passages for same question") |
|
return { |
|
scores_p: gr.Column(visible = True), |
|
scores_g: gr.Column(visible = False), |
|
evals: gr.Row(visible = True), |
|
question: gr.Row(visible = False), |
|
} |
|
else: |
|
mode = 0 |
|
print("New question") |
|
return { |
|
scores_p: gr.Column(visible = True), |
|
scores_g: gr.Column(visible = False), |
|
evals: gr.Row(visible = False), |
|
question: gr.Row(visible = True), |
|
} |
|
|
|
forward_btn.change(fn = toggle, inputs = None, outputs = [scores_p, scores_g, evals, question]) |
|
|
|
with gr.Row() as login: |
|
with gr.Column(): |
|
gr.Markdown("# Enter email to start") |
|
gr.Markdown("Thank you so much for your participation in our study! We're using emails to keep track of which questions you've answered and which you haven't seen. Use the same email every time to keep your progress saved. :)") |
|
email = gr.Textbox(label = "Email", placeholder = "[email protected]") |
|
s = gr.Button("Start!", interactive = False) |
|
|
|
def sanitize_login(text): |
|
if text == "": |
|
return {s: gr.Button(interactive = False)} |
|
else: |
|
return {s: gr.Button(interactive = True)} |
|
email.change(fn = sanitize_login, inputs = [email], outputs = [s]) |
|
|
|
def submit_email(email): |
|
global user_id |
|
user_id = email |
|
load_user_data(user_id) |
|
return { |
|
question: gr.Row(visible = True), |
|
login: gr.Row(visible = False) |
|
} |
|
|
|
s.click(fn = submit_email, inputs = [email], outputs = [question, login]) |
|
|
|
|
|
user_eval.launch() |
|
|
|
|