import streamlit as st import streamlit as st from pprint import pprint import subprocess cmd = ["python", "-m", "spacy", "download", "en_core_web_sm"] subprocess.run(cmd) from spacy.cli import download from Questgen import main, main2 from PyPDF2 import PdfReader from transformers import pipeline from PyPDF2 import PdfReader import nltk import pandas as pd nltk.download('punkt') # st.title(body='7 - Question Generation') def get_pdf_text(pdf_docs): text = "" for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() return text ######################################################## # st.header(body='Proposition 1',divider='red') # if st.toggle(label='Show Proposition 1'): st.title('Generate Questions from PDFs') file = st.file_uploader(label='Upload',accept_multiple_files=True) pr = st.button(label='Process') if pr: # pr = st.button(label='Process') raw_text = get_pdf_text(file) # questions = [] # ge = main.QGen() ge = main2.QGen() payload = { 'input_text' : raw_text, # 'max_questions':2, } output = ge.predict_mcq(payload=payload) st.header(body='*Generated Questions are:*', divider='orange') for question in output['questions']: st.subheader(body=f":orange[Q{question['id']}:] {question['question_statement']}", divider='blue') st.markdown(f"A: {question['answer']}") c = 0 for option in question['options']: # st.markdown(f"{c}") c+=1 if c==1: st.markdown(f"B: {option}") elif c==2: st.markdown(f"C: {option}") elif c==3: st.markdown(f"D: {option}") if output is not None: # Convert the dictionary to a DataFrame df = pd.DataFrame(output['questions']) # Convert the options from lists to strings # df['options'] = df['options'].apply(lambda x: ','.join(x)) df = df.drop(labels=['options_algorithm','extra_options','context','question_type'],axis=1) # Convert the DataFrame to CSV csv = df.to_csv(index=False).encode('utf-8') st.download_button( label='Download Data', data=csv, file_name='Generated MCQs.csv', mime='text/csv' ) if st.toggle(label='Show Raw Output'): st.write(output)