priceprojection / app.py
netflypsb's picture
Update app.py
bcf2525 verified
import streamlit as st
import yfinance as yf
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from datetime import datetime, timedelta
import numpy as np
# Function to compute RSI
def compute_rsi(data, window):
diff = data.diff(1).dropna()
gain = diff.where(diff > 0, 0)
loss = -diff.where(diff < 0, 0)
avg_gain = gain.rolling(window=window, min_periods=1).mean()
avg_loss = loss.rolling(window=window, min_periods=1).mean()
rs = avg_gain / avg_loss
rsi = 100 - (100 / (1 + rs))
return rsi
# Set up the Streamlit app
st.title("Stock Price Prediction")
st.write("This app uses historical data to predict future stock prices.")
# User input for stock ticker symbol
ticker = st.text_input("Enter the stock ticker symbol:", value='AAPL')
# Fetch stock historical data using yfinance
stock_data = yf.download(ticker, start='2020-01-01', end=datetime.today().strftime('%Y-%m-%d'))
stock_data.reset_index(inplace=True)
# Display the historical data
st.write(f"Historical Data for {ticker}")
st.dataframe(stock_data.tail())
# Feature engineering
stock_data['MA_10'] = stock_data['Close'].rolling(window=10).mean()
stock_data['MA_50'] = stock_data['Close'].rolling(window=50).mean()
stock_data['RSI'] = compute_rsi(stock_data['Close'], window=14)
stock_data['Return'] = stock_data['Close'].pct_change()
stock_data.dropna(inplace=True)
# Prepare features and target variable
X = stock_data[['Open', 'High', 'Low', 'Volume', 'MA_10', 'MA_50', 'RSI', 'Return']]
y = stock_data['Close']
# Handle missing values
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Train the Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)
# Train the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
# Predict future prices using ensemble method
future_dates = [stock_data['Date'].iloc[-1] + timedelta(days=x) for x in range(1, 15)]
future_df = pd.DataFrame(index=future_dates, columns=stock_data.columns)
future_df['Open'] = stock_data['Open'].iloc[-1]
future_df['High'] = stock_data['High'].iloc[-1]
future_df['Low'] = stock_data['Low'].iloc[-1]
future_df['Volume'] = stock_data['Volume'].iloc[-1]
future_df['MA_10'] = stock_data['MA_10'].iloc[-1]
future_df['MA_50'] = stock_data['MA_50'].iloc[-1]
future_df['RSI'] = stock_data['RSI'].iloc[-1]
future_df['Return'] = stock_data['Return'].iloc[-1]
future_df = future_df.fillna(method='ffill')
# Handle missing values in future data
future_X = imputer.transform(future_df[['Open', 'High', 'Low', 'Volume', 'MA_10', 'MA_50', 'RSI', 'Return']])
future_X_scaled = scaler.transform(future_X)
lr_predictions = lr_model.predict(future_X_scaled)
rf_predictions = rf_model.predict(future_X_scaled)
# Combine predictions (average)
combined_predictions = (lr_predictions + rf_predictions) / 2
# Display predictions
predictions_df = pd.DataFrame({'Date': future_dates, 'Predicted Close': combined_predictions})
predictions_df.set_index('Date', inplace=True)
st.write(f"Future Price Predictions for {ticker}")
st.dataframe(predictions_df)
# Plot the predictions
st.line_chart(predictions_df['Predicted Close'])