HW3 question2¶
In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import numpy as np
Part A: Function to calculate number of weekdays¶
In [5]:
def nweekdays(date):
start_date = pd.Timestamp('2023-01-01')
end_date = pd.Timestamp(date)
dates = pd.date_range(start=start_date, end=end_date)
weekday_count = 0
for date in dates:
# weekday() returns 0-6 (Monday-Sunday)
if date.weekday() < 5: # 0-4 are weekdays
weekday_count += 1
return weekday_count
In [7]:
# Verification function using pd.bdate_range()
def verify_weekdays(date):
start_date = pd.Timestamp('2023-01-01')
end_date = pd.Timestamp(date)
return len(pd.bdate_range(start_date, end_date))
Part B: Import and process data¶
In [12]:
# Read the CSV file
df = pd.read_csv('vti.csv')
In [14]:
# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])
In [16]:
# Calculate weekdays for each date
df['cum_weekdays'] = df['date'].apply(nweekdays)
In [18]:
# Calculate number of weekdays in each month using diff()
df['weekdays_in_month'] = df['cum_weekdays'].diff()
In [20]:
# Fix first month manually
df.loc[0, 'weekdays_in_month'] = df.loc[0, 'cum_weekdays']
Part C: Calculate daily returns and create visualization¶
In [23]:
# Convert monthly returns to decimals
df['monthly_return'] = df['pct_return'] / 100
In [25]:
# Calculate daily returns using the compound formula
# R_m = (1 + R_d)^t - 1
# Solving for R_d: R_d = (1 + R_m)^(1/t) - 1
df['daily_return_pct'] = ((1 + df['monthly_return'])**(1/df['weekdays_in_month']) - 1) * 10000 # Convert to basis points
In [27]:
# Create bar plot
plt.figure(figsize=(12, 6))
plt.bar(df['date'], df['daily_return_pct'])
plt.title('Average Daily Returns by Month (in Basis Points)')
plt.xlabel('Date')
plt.ylabel('Average Daily Return (Basis Points)')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3)
plt.tight_layout()
In [29]:
# Display the results
print("\nFirst few rows of the processed data:")
print(df[['date', 'pct_return', 'weekdays_in_month', 'daily_return_pct']].head())
# Save the plot
plt.savefig('daily_returns.png')
plt.close()
First few rows of the processed data:
date pct_return weekdays_in_month daily_return_pct
0 2023-01-31 6.93 22.0 30.502895
1 2023-02-28 -2.40 20.0 -12.138973
2 2023-03-31 2.71 23.0 11.632542
3 2023-04-30 1.08 20.0 5.372491
4 2023-05-31 0.43 23.0 1.865731
In [ ]: