Spaces:

varun500
/

knowledge_graph

Runtime error

App Files Files Community

knowledge_graph / app.py

varun500

Update app.py

92d98fe over 1 year ago

raw

history blame contribute delete

4.46 kB

	import streamlit as st
	import networkx as nx
	import matplotlib.pyplot as plt
	import pandas as pd
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	from sklearn.cluster import KMeans


	def main():
	st.title("Financial Graph App")
	st.write("Enter a financial sentence and see its similarity to predefined keywords.")

	# User input
	financial_sentence = st.text_area("Enter the financial sentence", value="")

	# Check if the user entered a sentence
	if financial_sentence.strip() != "":
	# Predefined keywords
	keywords = [
	"Finance",
	"Fiscal",
	"Quarterly results",
	"Revenue",
	"Profit",
	"Loss",
	"Net income",
	"Gross margin",
	"Operating expenses",
	"Cash flow",
	"Shareholders",
	"Guidance",
	"Forecast",
	"Market performance",
	"Stock price",
	"Capital expenditures",
	"Acquisitions",
	"Mergers",
	"Debt",
	"Financial ratios",
	"Growth",
	"Outlook",
	"Visibility",
	"Revenue growth",
	"Market trends",
	"Profitability",
	"Efficiency",
	"Projections",
	"Performance"
	]

	# Load the pre-trained Sentence-Transformers model
	model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
	st.set_option('deprecation.showPyplotGlobalUse', False)
	# Generate word embeddings for the financial sentence and keywords
	sentence_embedding = model.encode([financial_sentence])
	keyword_embeddings = model.encode(keywords)

	# Calculate cosine similarity between the sentence embedding and keyword embeddings
	similarity_scores = cosine_similarity(sentence_embedding, keyword_embeddings)[0]

	# Create a graph
	G = nx.Graph()

	# Add the sentence embedding as a node to the graph
	G.add_node(financial_sentence, embedding=sentence_embedding[0])

	# Add the keyword embeddings as nodes to the graph
	for keyword, embedding, similarity in zip(keywords, keyword_embeddings, similarity_scores):
	G.add_node(keyword, embedding=embedding, similarity=similarity)

	# Add edges between the sentence and keywords with their similarity scores as weights
	for keyword, similarity in zip(keywords, similarity_scores):
	G.add_edge(financial_sentence, keyword, weight=similarity)

	# Perform KNN clustering on the keyword embeddings
	kmeans = KMeans(n_clusters=3)
	cluster_labels = kmeans.fit_predict(keyword_embeddings)

	# Add cluster labels as node attributes
	for node, cluster_label in zip(G.nodes, cluster_labels):
	G.nodes[node]["cluster"] = cluster_label

	# Set node positions using spring layout
	pos = nx.spring_layout(G)

	# Get unique cluster labels
	unique_clusters = set(cluster_labels)

	# Assign colors to clusters
	cluster_colors = ["lightblue", "lightgreen", "lightyellow"]

	# Draw nodes with cluster colors
	nx.draw_networkx_nodes(
	G,
	pos,
	node_color=[cluster_colors[G.nodes[node].get("cluster", 0)] for node in G.nodes],
	node_size=800,
	)

	# Draw edges
	nx.draw_networkx_edges(G, pos, edge_color="gray", width=1, alpha=0.7)

	# Draw labels
	nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold")

	# Draw edge labels (cosine similarity scores)
	edge_labels = nx.get_edge_attributes(G, "weight")
	nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8)

	# Set plot attributes
	plt.title("Financial Context and Keywords")
	plt.axis("off")

	# Save the graph as an image
	plt.savefig("financial_graph.png")

	# Show the graph
	st.pyplot()

	# Save the similarity scores in a CSV file
	df = pd.DataFrame({"Keyword": keywords, "Cosine Similarity": similarity_scores})
	st.write("Similarity Scores:")
	st.dataframe(df)


	if __name__ == "__main__":
	main()