Spaces:

pseudolab
/

Balanced-News-Reading

App Files Files Community

Balanced-News-Reading / app.py

gabrielwithhappy

fix null example issue

d47bebc 12 months ago

raw

history blame contribute delete

4.18 kB

	import gradio as gr
	from newspaper import Article
	from newspaper import Config

	from transformers import pipeline
	import requests
	from bs4 import BeautifulSoup
	import re

	from bs4 import BeautifulSoup as bs
	import requests
	from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration

	# Load Model and Tokenize
	def get_summary(input_text):
	tokenizer = PreTrainedTokenizerFast.from_pretrained("ainize/kobart-news")
	summary_model = BartForConditionalGeneration.from_pretrained("ainize/kobart-news")
	input_ids = tokenizer.encode(input_text, return_tensors="pt")
	summary_text_ids = summary_model.generate(
	input_ids=input_ids,
	length_penalty=2,
	top_p=0.9,
	max_length=128,
	min_length=12,
	num_beams=2,
	)
	# "task_specific_params": {
	# "summarization": {
	# "length_penalty": 1.0,
	# "max_length": 128,
	# "min_length": 12,
	# "num_beams": 4
	# }
	return tokenizer.decode(summary_text_ids[0], skip_special_tokens=True)



	USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
	config = Config()
	config.browser_user_agent = USER_AGENT
	config.request_timeout = 10

	class news_collector:
	def __init__(self):
	self.examples_text = []


	def get_new_parser(self, url):
	article = Article(url, language='ko')
	article.download()
	article.parse()
	return article

	def get_news_links(self, page=''):
	url = "https://news.daum.net/breakingnews/economic"
	response = requests.get(url)
	html_text = response.text

	soup = bs(response.text, 'html.parser')
	news_titles = soup.select("a.link_txt")
	links = [item.attrs['href'] for item in news_titles ]
	https_links = [item for item in links if item.startswith('https') == True]
	https_links
	return https_links


	def update_news_examples(self):
	news_links = self.get_news_links()

	for news_url in news_links:
	article = self.get_new_parser(news_url)
	if article.text:
	self.examples_text.append([get_summary(article.text[:1500]), news_url])


	title = "균형잡힌 뉴스 읽기 (Balanced News Reading)"



	with gr.Blocks(theme='pseudolab/huggingface-korea-theme') as demo:

	collector = news_collector()
	collector.update_news_examples()

	with gr.Tab("소개"):
	gr.Markdown(
	"""
	# 균형잡힌 뉴스 읽기 (Balanced News Reading)

	긍정적인 기사와 부정적인 기사인지 확인하여 뉴스를 읽을 수 있습니다. 최근 경제뉴스기사를 가져와 Example에서 바로 확인할 수 있도록 구성했습니다.

	## 1. 사용방법
	Daum뉴스의 경제 기사를 가져와 내용을 요약하고 `Example`에 가져옵니다. 감정 분석을 하고 싶은 기사를 `Examples`에서 선택해서 `Submit`을 누르면 `Classification`에
	해당 기사의 감정 평가 결과가 표시됩니다. 감정평가는 각 상태의 확률 정보와 함께 `neutral`, `positive`, `negative` 3가지로 표시됩니다.

	## 2. 구조 설명
	뉴스기사를 크롤링 및 요약 모델을 이용한 기사 요약 >> 기사 요약정보 Example에 추가 >> 한국어 fine-tunning한 감정평가 모델을 이용해 입력된 기사에 대한 감정 평가 진행
	""")

	with gr.Tab("데모"):
	Link_TXT = gr.Textbox(label="뉴스 내용", placeholder = "뉴스 기사 내용을 입력하세요.")
	gr.load("models/gabrielyang/finance_news_classifier-KR_v7",
	# gr.load("models/Hyeonseo/ko-finance_news_classifier",
	inputs = Link_TXT)
	Link_URL = gr.Textbox(label="뉴스 URL")

	# diable due to dynamic loading
	# update_button = gr.Button(value="뉴스 데이터 업데이트")
	# update_button.click(fn=collector.update_news_examples_and_update, inputs=None, outputs=None)

	gr.Examples(
	collector.examples_text,
	[Link_TXT, Link_URL],
	)

	if __name__ == "__main__":
	demo.launch()