import gradio as gr import os # PERSISTENT DATA STORAGE: these are used to upload user responses to a dataset import json from datetime import datetime from pathlib import Path from uuid import uuid4 from huggingface_hub import CommitScheduler JSON_DATASET_DIR = Path("json_dataset") JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True) JSON_DATASET_PATH = JSON_DATASET_DIR / f"train-{uuid4()}.json" scheduler = CommitScheduler( repo_id="ebrowne/test-data", repo_type="dataset", folder_path=JSON_DATASET_DIR, path_in_repo="data", token = os.getenv("HF_TOKEN") ) def save_json(score1, score2): with scheduler.lock: with JSON_DATASET_PATH.open("a") as f: json.dump({"relevance": score1, "novelty": score2, "datetime": datetime.now().isoformat()}, f) f.write("\n") # READING EXISTING DATA: this is used to read questionss from datasets import load_dataset qa_data = load_dataset("ebrowne/test-data", data_files = "test.json") q = qa_data["train"][0] # loaded question data # VARIABLES: will eventually be loaded with JSON from a dataset """ question_text = q["prompt"] + " " + q["question"] answers_text = [q["a"], q["b"], q["c"], q["d"]] """ question_text = "An act of Congress provides that \"no federal court shall order the implementation of a public school desegregation plan that would require the transportation of any student to a school other than the school closest or next closest to his place of residence.\" Which of the following is the strongest argument for the constitutionality of the act?" answers_text = ["The Fourteenth Amendment authorizes Congress to define governmental conduct which violates the equal protection clause.", "Under Article III, Congress may restrict the jurisdiction of the federal courts.", "Transportation of students is subject to regulation by Congress because commerce is involved.", "Congress provides partial support for public education and is therefore entitled to establish conditions upon the expenditure of federal grants."] answer_id = 1 # BLOCKS: main user interface with gr.Blocks() as user_eval: # Title text introducing study gr.Markdown(""" # Legal Retriever Evaluation Study Thank you for your participation! Here are some basic instructions on how to complete the legal study. """) # Passages and user evaluations thereof with gr.Row(equal_height = False, visible = False) as evals: # Passage text with gr.Column(scale = 2) as passages: answers_text[answer_id] = "**" + answers_text[answer_id] + "**" passage_display = gr.Markdown(""" ### Question and Answer """ + question_text + """ \n """ + answers_text[0] + """ \n """ + answers_text[1] + """ \n """ + answers_text[2] + """ \n """ + answers_text[3] + """ ### Relevant Passages - Dataset 1 - Dataset 2 - More text - More text - More text - More text ### Auto-Generated Summary This is a summary of the above legal passages, which imitates how a RAG system might \ encorporate retrieved data into its context to give a better response to a certain query. """) # Scoring box with gr.Column(scale = 1) as scores: desc_1 = gr.Markdown("How **relevant** is this passage to the question?") eval_1 = gr.Slider(1, 5, step = 0.5) desc_2 = gr.Markdown("How would you rate the passage's **quality** in terms of detail, clarity, and focus?") eval_2 = gr.Slider(1, 5, step = 0.5) desc_3 = gr.Markdown("How effectively does the passage **lead you to the correct answer?**") eval_3 = gr.Slider(1, 5, step = 0.5) btn = gr.Button("Next") def next(eval_1, eval_2, eval_3): print(eval_1 + eval_2 + eval_3) btn.click(fn = next, inputs = [eval_1, eval_2, eval_3]) # Question and answering dynamics with gr.Row() as question: with gr.Column(): gr.Markdown("---") gr.Markdown("**Question**") gr.Markdown(question_text) a = gr.Button(answers_text[0]) b = gr.Button(answers_text[1]) c = gr.Button(answers_text[2]) d = gr.Button(answers_text[3]) def answer(): return { question: gr.Row(visible = False), evals: gr.Row(visible = True) } a.click(fn = answer, outputs = [question, evals]) b.click(fn = answer, outputs = [question, evals]) c.click(fn = answer, outputs = [question, evals]) d.click(fn = answer, outputs = [question, evals]) # Starts on question, switches to evaluation after the user answers user_eval.launch()