import streamlit as st import networkx as nx import matplotlib.pyplot as plt import pandas as pd from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity from sklearn.cluster import KMeans def main(): st.title("Financial Graph App") st.write("Enter a financial sentence and see its similarity to predefined keywords.") # User input financial_sentence = st.text_area("Enter the financial sentence", value="") # Check if the user entered a sentence if financial_sentence.strip() != "": # Predefined keywords keywords = [ "Finance", "Fiscal", "Quarterly results", "Revenue", "Profit", "Loss", "Net income", "Gross margin", "Operating expenses", "Cash flow", "Shareholders", "Guidance", "Forecast", "Market performance", "Stock price", "Capital expenditures", "Acquisitions", "Mergers", "Debt", "Financial ratios", "Growth", "Outlook", "Visibility", "Revenue growth", "Market trends", "Profitability", "Efficiency", "Projections", "Performance" ] # Load the pre-trained Sentence-Transformers model model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2") st.set_option('deprecation.showPyplotGlobalUse', False) # Generate word embeddings for the financial sentence and keywords sentence_embedding = model.encode([financial_sentence]) keyword_embeddings = model.encode(keywords) # Calculate cosine similarity between the sentence embedding and keyword embeddings similarity_scores = cosine_similarity(sentence_embedding, keyword_embeddings)[0] # Create a graph G = nx.Graph() # Add the sentence embedding as a node to the graph G.add_node(financial_sentence, embedding=sentence_embedding[0]) # Add the keyword embeddings as nodes to the graph for keyword, embedding, similarity in zip(keywords, keyword_embeddings, similarity_scores): G.add_node(keyword, embedding=embedding, similarity=similarity) # Add edges between the sentence and keywords with their similarity scores as weights for keyword, similarity in zip(keywords, similarity_scores): G.add_edge(financial_sentence, keyword, weight=similarity) # Perform KNN clustering on the keyword embeddings kmeans = KMeans(n_clusters=3) cluster_labels = kmeans.fit_predict(keyword_embeddings) # Add cluster labels as node attributes for node, cluster_label in zip(G.nodes, cluster_labels): G.nodes[node]["cluster"] = cluster_label # Set node positions using spring layout pos = nx.spring_layout(G) # Get unique cluster labels unique_clusters = set(cluster_labels) # Assign colors to clusters cluster_colors = ["lightblue", "lightgreen", "lightyellow"] # Draw nodes with cluster colors nx.draw_networkx_nodes( G, pos, node_color=[cluster_colors[G.nodes[node].get("cluster", 0)] for node in G.nodes], node_size=800, ) # Draw edges nx.draw_networkx_edges(G, pos, edge_color="gray", width=1, alpha=0.7) # Draw labels nx.draw_networkx_labels(G, pos, font_size=10, font_weight="bold") # Draw edge labels (cosine similarity scores) edge_labels = nx.get_edge_attributes(G, "weight") nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8) # Set plot attributes plt.title("Financial Context and Keywords") plt.axis("off") # Save the graph as an image plt.savefig("financial_graph.png") # Show the graph st.pyplot() # Save the similarity scores in a CSV file df = pd.DataFrame({"Keyword": keywords, "Cosine Similarity": similarity_scores}) st.write("Similarity Scores:") st.dataframe(df) if __name__ == "__main__": main()