S&P 500 YTD Analysis

The code below does the following

Gets all of the tickers from the S&P 500. I also provide an example for running this analysis for just a few tickers
Pull YTD information from Yahoo Finance and calculate YTD performance
Count/Graph how many had positive returns & how many had negative returns
Plot the YTD performance distribution as a box plot
Plot the YTD performance distribution as a histogram
Provide the top & bottom 10 performing tickers

A copy of the jupyter notebook can be found here

The code below is the libraries used, the function to get stock data, and the ticker_list

# Import Libraries
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns

# Function to fetch historical stock prices
def get_stock_data(ticker, start_date, end_date):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    return stock_data
    
# Get Tickers
sp_df = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
sp_df['Symbol'] = sp_df['Symbol'].str.replace(r'\.', '-', regex=True)
tickers_list = list(sp_df['Symbol'])[:]
# tickers_list

# Smaller Test List
# tickers_list = ['AAPL', 'MSFT', 'GOOGL']

Below are the summarizing and plotting functions

# Function to create and display a summary of returns
def summarize_returns(results_df):
    # Display summary bar chart
    plot_bar_chart(results_df)
    plot_box_plot(results_df['YTD Performance'])
    plot_histogram(results_df['YTD Performance'])

    # Sort by YTD Performance in descending order for positive returns and ascending order for negative returns
    positive_returns_df = results_df[results_df['YTD Performance'] > 0].sort_values(by='YTD     Performance', ascending=False)
    negative_returns_df = results_df[results_df['YTD Performance'] < 0].sort_values(by='YTD Performance', ascending=True)

    # Display top 10 positive returns
    print("Top 10 Positive Returns:")
    print(positive_returns_df.head(10))

    # Display bottom 10 negative returns
    print("\nBottom 10 Negative Returns:")
    print(negative_returns_df.tail(10))
    
# Function to create a bar chart of YTD returns
def plot_bar_chart(returns_df):
    returns_sign = pd.cut(returns_df['YTD Performance'], bins=[-float('inf'), 0, float('inf')], labels=['Negative', 'Positive'])
    returns_count = returns_sign.value_counts()
    returns_percentage = returns_sign.value_counts(normalize=True) * 100

    fig, ax = plt.subplots()
    returns_count.plot(kind='bar', ax=ax)
    ax.set_ylabel('Count')
    ax2 = ax.twinx()
    ax2.set_ylabel('Percentage')
    returns_percentage.plot(kind='bar', ax=ax2, color='orange', alpha=0.5)
    plt.show()

# Function to create a bubble chart of YTD returns
def plot_bubble_chart(results_df):
    # Create bins for returns
    bins = [-float('inf'), -10, -5, 0, 5, 10, float('inf')]
    bin_labels = ['-Inf to -10%', '-10% to -5%', '-5% to 0%', '0% to 5%', '5% to 10%', '10% and above']

    # Categorize returns into bins
    results_df['Return Bins'] = pd.cut(results_df['YTD Performance'], bins=bins, labels=bin_labels, include_lowest=True)

    # Group by return bins and count the number of companies in each bin
    return_counts = results_df.groupby('Return Bins').size()

    # Create a bubble chart
    fig, ax = plt.subplots(figsize=(10, 6))
    scatter = ax.scatter(return_counts.index, [1] * len(return_counts), s=return_counts.values * 10, alpha=0.5)

    # Add labels and title
    ax.set_xlabel('Return Bins')
    ax.set_title('Bubble Chart of YTD Returns')

    # Add a legend
    legend_labels = [f'{bin_label}: {count}' for bin_label, count in zip(return_counts.index, return_counts.values)]
    ax.legend(legend_labels, title='Return Range', loc='upper right', bbox_to_anchor=(1.3, 1))

    plt.show()
    
# Function to create a box plot of YTD Perf
def plot_box_plot(ytd_performance):
    plt.figure(figsize=(10, 6))
    sns.boxplot(x=ytd_performance)
    plt.title('Box Plot for YTD Performance Distribution')
    plt.show()
  
# Function to create a histogram for YTD performance frequency
def plot_histogram(ytd_performance):
    plt.figure(figsize=(10, 6))
    plt.hist(ytd_performance, bins=20, edgecolor='black')
    plt.title('Histogram for YTD Performance Frequency')
    plt.xlabel('YTD Performance')
    plt.ylabel('Frequency')
    plt.show()

And the code to run this all

results_df = pd.DataFrame(columns=['Ticker', 'Start Price', 'Last Closed Price', 'YTD Performance'])

# Create an empty list to store individual DataFrames for each company
dfs = []

# Loop through each company, fetch YTD performance, and store the results
for ticker in tickers_list:
    stock_data = get_stock_data(ticker, '2023-01-01', '2023-11-16')
    
    start_price = stock_data['Adj Close'].iloc[0]
    last_closed_price = stock_data['Adj Close'].iloc[-1]
    ytd_performance = (last_closed_price / start_price - 1) * 100
    
    # Create a DataFrame for each company
    df = pd.DataFrame({
        'Ticker': [ticker],
        'Start Price': [start_price],
        'Last Closed Price': [last_closed_price],
        'YTD Performance': [ytd_performance]
    })
    
    # Append the DataFrame to the list
    dfs.append(df)

# Concatenate all DataFrames into a single DataFrame
results_df = pd.concat(dfs, ignore_index=True)

# Sort the DataFrame by YTD Performance
results_df = results_df.sort_values(by='YTD Performance', ascending=False)

# Create and display the summary
results_df = results_df.sort_values(by='YTD Performance', ascending=False)

# Call the plotting functions and summarize returns
summarize_returns(results_df)