import pandas as pd
import matplotlib.pyplot as plt
df_acct = pd.read_csv('Accounting_Data.csv',parse_dates=['datadate'])
df_stock = pd.read_csv('Stock_Data.csv',parse_dates=['date'])
df_acct = df_acct[(df_acct['fic']=='USA') & (df_acct['oibdp'].notna()) & (df_acct['sale']>0)]
df_acct.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 51519 entries, 0 to 76105 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 GVKEY 51519 non-null int64 1 LPERMNO 51519 non-null int64 2 datadate 51519 non-null datetime64[ns] 3 fyear 51519 non-null float64 4 indfmt 51519 non-null object 5 consol 51519 non-null object 6 popsrc 51519 non-null object 7 datafmt 51519 non-null object 8 conm 51519 non-null object 9 curcd 51519 non-null object 10 oibdp 51519 non-null float64 11 sale 51519 non-null float64 12 costat 51519 non-null object 13 fic 51519 non-null object dtypes: datetime64[ns](1), float64(3), int64(2), object(8) memory usage: 5.9+ MB
df_acct['fic'].unique()
array(['USA'], dtype=object)
df_stock['RET'] = pd.to_numeric(df_stock['RET'], errors='coerce')
df_stock = df_stock[df_stock['RET'].notna()]
df_stock.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 1184089 entries, 0 to 1216805 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 PERMNO 1184089 non-null int64 1 date 1184089 non-null datetime64[ns] 2 SHRCD 1184089 non-null float64 3 EXCHCD 1184089 non-null float64 4 PRC 1184089 non-null float64 5 RET 1184089 non-null float64 6 SHROUT 1184089 non-null float64 dtypes: datetime64[ns](1), float64(5), int64(1) memory usage: 72.3 MB
df_acct14 = df_acct[df_acct['datadate']=='2014-12-31']
df_acct14['operating_margin'] = df_acct14['oibdp'] / df_acct14['sale']
/var/folders/3q/7hjq0blj0n7_s0nppn9f2rrm0000gn/T/ipykernel_70363/2609518936.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_acct14['operating_margin'] = df_acct14['oibdp'] / df_acct14['sale']
df_acct14
| GVKEY | LPERMNO | datadate | fyear | indfmt | consol | popsrc | datafmt | conm | curcd | oibdp | sale | costat | fic | operating_margin | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 16 | 1045 | 21020 | 2014-12-31 | 2014.0 | INDL | C | D | STD | AMERICAN AIRLINES GROUP INC | USD | 6585.000 | 42650.000 | A | USA | 0.154396 |
| 30 | 1050 | 11499 | 2014-12-31 | 2014.0 | INDL | C | D | STD | CECO ENVIRONMENTAL CORP | USD | 35.600 | 263.217 | A | USA | 0.135250 |
| 66 | 1075 | 27991 | 2014-12-31 | 2014.0 | INDL | C | D | STD | PINNACLE WEST CAPITAL CORP | USD | 1228.600 | 3491.632 | A | USA | 0.351870 |
| 80 | 1076 | 10517 | 2014-12-31 | 2014.0 | INDL | C | D | STD | PROG HOLDINGS INC | USD | 1194.234 | 2725.239 | A | USA | 0.438213 |
| 94 | 1078 | 20482 | 2014-12-31 | 2014.0 | INDL | C | D | STD | ABBOTT LABORATORIES | USD | 4475.000 | 20247.000 | A | USA | 0.221020 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 75271 | 276731 | 13705 | 2014-12-31 | 2014.0 | INDL | C | D | STD | PLANET PAYMENT INC | USD | 8.141 | 47.369 | I | USA | 0.171863 |
| 75381 | 282553 | 82678 | 2014-12-31 | 2014.0 | INDL | C | D | STD | CDTI ADVANCED MATERIALS INC | USD | -5.752 | 41.231 | A | USA | -0.139507 |
| 75544 | 287462 | 12615 | 2014-12-31 | 2014.0 | INDL | C | D | STD | FUTUREFUEL CORP | USD | 65.134 | 341.838 | A | USA | 0.190541 |
| 75578 | 289735 | 14060 | 2014-12-31 | 2014.0 | INDL | C | D | STD | VISLINK TECHNOLOGIES INC | USD | -15.368 | 0.628 | A | USA | -24.471338 |
| 75824 | 315318 | 14406 | 2014-12-31 | 2014.0 | INDL | C | D | STD | ELEMENT SOLUTIONS INC | USD | 211.600 | 843.200 | A | USA | 0.250949 |
2959 rows × 15 columns
weak = df_acct14['operating_margin'].quantile(0.25)
robust = df_acct14['operating_margin'].quantile(0.75)
df_acct14['portfolio_2014'] = 'Neither'
df_acct14.loc[df_acct14['operating_margin']>=robust,'portfolio_2014'] = 'Robust'
df_acct14.loc[df_acct14['operating_margin']<=weak,'portfolio_2014'] = 'Weak'
/var/folders/3q/7hjq0blj0n7_s0nppn9f2rrm0000gn/T/ipykernel_70363/3373422227.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_acct14['portfolio_2014'] = 'Neither'
df_acct14
| GVKEY | LPERMNO | datadate | fyear | indfmt | consol | popsrc | datafmt | conm | curcd | oibdp | sale | costat | fic | operating_margin | portfolio_2014 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 16 | 1045 | 21020 | 2014-12-31 | 2014.0 | INDL | C | D | STD | AMERICAN AIRLINES GROUP INC | USD | 6585.000 | 42650.000 | A | USA | 0.154396 | Neither |
| 30 | 1050 | 11499 | 2014-12-31 | 2014.0 | INDL | C | D | STD | CECO ENVIRONMENTAL CORP | USD | 35.600 | 263.217 | A | USA | 0.135250 | Neither |
| 66 | 1075 | 27991 | 2014-12-31 | 2014.0 | INDL | C | D | STD | PINNACLE WEST CAPITAL CORP | USD | 1228.600 | 3491.632 | A | USA | 0.351870 | Robust |
| 80 | 1076 | 10517 | 2014-12-31 | 2014.0 | INDL | C | D | STD | PROG HOLDINGS INC | USD | 1194.234 | 2725.239 | A | USA | 0.438213 | Robust |
| 94 | 1078 | 20482 | 2014-12-31 | 2014.0 | INDL | C | D | STD | ABBOTT LABORATORIES | USD | 4475.000 | 20247.000 | A | USA | 0.221020 | Neither |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 75271 | 276731 | 13705 | 2014-12-31 | 2014.0 | INDL | C | D | STD | PLANET PAYMENT INC | USD | 8.141 | 47.369 | I | USA | 0.171863 | Neither |
| 75381 | 282553 | 82678 | 2014-12-31 | 2014.0 | INDL | C | D | STD | CDTI ADVANCED MATERIALS INC | USD | -5.752 | 41.231 | A | USA | -0.139507 | Weak |
| 75544 | 287462 | 12615 | 2014-12-31 | 2014.0 | INDL | C | D | STD | FUTUREFUEL CORP | USD | 65.134 | 341.838 | A | USA | 0.190541 | Neither |
| 75578 | 289735 | 14060 | 2014-12-31 | 2014.0 | INDL | C | D | STD | VISLINK TECHNOLOGIES INC | USD | -15.368 | 0.628 | A | USA | -24.471338 | Weak |
| 75824 | 315318 | 14406 | 2014-12-31 | 2014.0 | INDL | C | D | STD | ELEMENT SOLUTIONS INC | USD | 211.600 | 843.200 | A | USA | 0.250949 | Neither |
2959 rows × 16 columns
robust_top10 = df_acct14[df_acct14['portfolio_2014'] == 'Robust'].sort_values(by='sale', ascending=False).head(10)
weak_top10 = df_acct14[df_acct14['portfolio_2014'] == 'Weak'].sort_values(by='sale', ascending=False).head(10)
print(f'10 companies with highest sales among the "robust" firms are:')
for company in robust_top10['conm']:
print(company)
print(f'\n10 companies with highest sales among the "weak" firms are:')
for company in weak_top10['conm']:
print(company)
10 companies with highest sales among the "robust" firms are: JPMORGAN CHASE & CO BANK OF AMERICA CORP CITIGROUP INC WELLS FARGO & CO INTEL CORP PFIZER INC MERCK & CO GOLDMAN SACHS GROUP INC PHILIP MORRIS INTERNATIONAL MCDONALD'S CORP 10 companies with highest sales among the "weak" firms are: PHILLIPS 66 AMAZON.COM INC INGRAM MICRO INC WORLD KINECT CORP ARROW ELECTRONICS INC MANPOWERGROUP PBF ENERGY INC HF SINCLAIR CORP SUNOCO LOGISTICS PARTNERS LP GLOBAL PARTNERS LP
Some of them are as same as what I expected, while some not. For example JP Morgan has the highest sales among the robust firms, while Amazon is the weak firm is what I didn't expect.
Big financial companies like JPMorgan Chase, Bank of America, and Citigroup dominate, which makes sense given their strong operating efficiency. There are also notable healthcare companies like Pfizer and Merck, along with diverse firms like Intel (technology), McDonald’s (fast food), and Philip Morris (tobacco). These companies all have strong pricing power and solid profitability, which is why they are in this category.
On the other hand, the "weak" firms include many energy companies like Phillips 66 and PBF Energy. This might be due to low margins or challenges in the oil and gas industry. It’s also surprising to see Amazon.com on this list, but it reflects their strategy of reinvesting profits into growth rather than focusing on high margins. Other weak firms, like ManpowerGroup, represent labor-intensive businesses, which can struggle with profitability.
df_merged_2014 = df_stock.merge(df_acct14,left_on='PERMNO',right_on='LPERMNO',how='left')
df_merged_2014 = df_merged_2014[['PERMNO','date','SHRCD','EXCHCD','PRC','RET','SHROUT','portfolio_2014']] # keep only stock + portfolio
df_merged_2014
| PERMNO | date | SHRCD | EXCHCD | PRC | RET | SHROUT | portfolio_2014 | |
|---|---|---|---|---|---|---|---|---|
| 0 | 10001 | 2010-12-31 | 11.0 | 2.0 | 10.52000 | 0.052291 | 7834.0 | Neither |
| 1 | 10001 | 2011-01-31 | 11.0 | 2.0 | 10.78000 | 0.028992 | 7834.0 | Neither |
| 2 | 10001 | 2011-02-28 | 11.0 | 2.0 | 10.98000 | 0.022727 | 7834.0 | Neither |
| 3 | 10001 | 2011-03-31 | 11.0 | 2.0 | 11.73000 | 0.072404 | 7834.0 | Neither |
| 4 | 10001 | 2011-04-29 | 11.0 | 2.0 | 11.23000 | -0.038789 | 8151.0 | Neither |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1184084 | 93436 | 2023-08-31 | 11.0 | 3.0 | 258.07999 | -0.034962 | 3173994.0 | Weak |
| 1184085 | 93436 | 2023-09-29 | 11.0 | 3.0 | 250.22000 | -0.030456 | 3179000.0 | Weak |
| 1184086 | 93436 | 2023-10-31 | 11.0 | 3.0 | 200.84000 | -0.197346 | 3178921.0 | Weak |
| 1184087 | 93436 | 2023-11-30 | 11.0 | 3.0 | 240.08000 | 0.195379 | 3178921.0 | Weak |
| 1184088 | 93436 | 2023-12-29 | 11.0 | 3.0 | 248.48000 | 0.034988 | 3178921.0 | Weak |
1184089 rows × 8 columns
df_merged_2014['month'] = df_merged_2014['date'].dt.to_period('M')
robust_portfolio = df_merged_2014[df_merged_2014['portfolio_2014'] == 'Robust']
weak_portfolio = df_merged_2014[df_merged_2014['portfolio_2014'] == 'Weak']
robust_monthly_returns = robust_portfolio.groupby('month')['RET'].mean()
weak_monthly_returns = weak_portfolio.groupby('month')['RET'].mean()
print(f'Monthly returns on equal-weighted "robust" portfolios:\n{robust_monthly_returns}\n')
print(f'Monthly returns on equal-weighted "weak" portfolios:\n{weak_monthly_returns}')
Monthly returns on equal-weighted "robust" portfolios:
month
2010-12 0.086163
2011-01 0.023940
2011-02 0.038231
2011-03 0.000274
2011-04 0.005887
...
2023-08 -0.054656
2023-09 -0.042530
2023-10 -0.036356
2023-11 0.084919
2023-12 0.119861
Freq: M, Name: RET, Length: 157, dtype: float64
Monthly returns on equal-weighted "weak" portfolios:
month
2010-12 0.103697
2011-01 0.011388
2011-02 0.045315
2011-03 0.010604
2011-04 0.017101
...
2023-08 -0.082733
2023-09 -0.090763
2023-10 -0.100211
2023-11 0.065622
2023-12 0.118084
Freq: M, Name: RET, Length: 157, dtype: float64
robust_monthly_returns = robust_monthly_returns.reset_index(name='monthly_return')
weak_monthly_returns = weak_monthly_returns.reset_index(name='monthly_return')
robust_monthly_returns['gross_return'] = 1 + robust_monthly_returns['monthly_return']
weak_monthly_returns['gross_return'] = 1 + weak_monthly_returns['monthly_return']
robust_monthly_returns_filtered = robust_monthly_returns[robust_monthly_returns['month']>'2014-12']
weak_monthly_returns_filtered = weak_monthly_returns[weak_monthly_returns['month']>'2014-12']
robust_monthly_returns_filtered['cumulative_return'] = robust_monthly_returns_filtered['gross_return'].cumprod()
weak_monthly_returns_filtered['cumulative_return'] = weak_monthly_returns_filtered['gross_return'].cumprod()
/var/folders/3q/7hjq0blj0n7_s0nppn9f2rrm0000gn/T/ipykernel_70363/2654946553.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy robust_monthly_returns_filtered['cumulative_return'] = robust_monthly_returns_filtered['gross_return'].cumprod() /var/folders/3q/7hjq0blj0n7_s0nppn9f2rrm0000gn/T/ipykernel_70363/2654946553.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy weak_monthly_returns_filtered['cumulative_return'] = weak_monthly_returns_filtered['gross_return'].cumprod()
print("robust monthly returns")
display(robust_monthly_returns_filtered.tail(1))
print("\nweak monthly returns")
display(weak_monthly_returns_filtered.tail(1))
robust monthly returns
| month | monthly_return | gross_return | cumulative_return | |
|---|---|---|---|---|
| 156 | 2023-12 | 0.119861 | 1.119861 | 2.126719 |
weak monthly returns
| month | monthly_return | gross_return | cumulative_return | |
|---|---|---|---|---|
| 156 | 2023-12 | 0.118084 | 1.118084 | 1.637034 |
print(f"The cumulative returns to represent the value of $1 invested at the end of 2014 in 'robust' portfolio \
over the 2014-2023 period is {robust_monthly_returns_filtered.loc[robust_monthly_returns_filtered['month']=='2023-12','cumulative_return'].values}\n")
print(f"The cumulative returns to represent the value of $1 invested at the end of 2014 in 'weak' portfolio \
over the 2014-2023 period is {weak_monthly_returns_filtered.loc[weak_monthly_returns_filtered['month']=='2023-12','cumulative_return'].values}")
The cumulative returns to represent the value of $1 invested at the end of 2014 in 'robust' portfolio over the 2014-2023 period is [2.1267191] The cumulative returns to represent the value of $1 invested at the end of 2014 in 'weak' portfolio over the 2014-2023 period is [1.63703419]
robust_monthly_returns_filtered['month'] = robust_monthly_returns_filtered['month'].dt.to_timestamp()
weak_monthly_returns_filtered['month'] = weak_monthly_returns_filtered['month'].dt.to_timestamp()
/var/folders/3q/7hjq0blj0n7_s0nppn9f2rrm0000gn/T/ipykernel_70363/3782684709.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy robust_monthly_returns_filtered['month'] = robust_monthly_returns_filtered['month'].dt.to_timestamp() /var/folders/3q/7hjq0blj0n7_s0nppn9f2rrm0000gn/T/ipykernel_70363/3782684709.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy weak_monthly_returns_filtered['month'] = weak_monthly_returns_filtered['month'].dt.to_timestamp()
plt.figure(figsize=(12, 6))
plt.plot(robust_monthly_returns_filtered['month'], robust_monthly_returns_filtered['cumulative_return'],label="Robust Portfolio")
plt.plot(weak_monthly_returns_filtered['month'], weak_monthly_returns_filtered['cumulative_return'],label="Weak Portfolio")
plt.title('Cumulative Returns of Robust vs Weak Portfolios (2015-2023)')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend()
<matplotlib.legend.Legend at 0x15e390710>
Over the past decade, the Robust Portfolio has generally outperformed the Weak Portfolio in terms of cumulative returns.
However, during the pandemic period around 2020, the Weak Portfolio temporarily surpassed the Robust Portfolio, likely driven by short-term market dynamics or specific sector performance.
Despite this brief period of outperformance, the Robust Portfolio recovered strongly and regained its lead, demonstrating its resilience and sustained growth. By the end of 2023, the robust portfolio’s cumulative returns were significantly higher, highlighting the long-term benefits of investing in high-profitability firms, even when accounting for periods of market volatility.
df_acct23 = df_acct[df_acct['datadate']=='2023-12-31']
df_acct23['operating_margin'] = df_acct23['oibdp'] / df_acct23['sale']
/var/folders/3q/7hjq0blj0n7_s0nppn9f2rrm0000gn/T/ipykernel_70363/2063155247.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_acct23['operating_margin'] = df_acct23['oibdp'] / df_acct23['sale']
weak_2023 = df_acct23['operating_margin'].quantile(0.25)
robust_2023 = df_acct23['operating_margin'].quantile(0.75)
df_acct23['portfolio_2023'] = 'Neither'
df_acct23.loc[df_acct23['operating_margin']>=robust_2023,'portfolio_2023'] = 'Robust'
df_acct23.loc[df_acct23['operating_margin']<=weak_2023,'portfolio_2023'] = 'Weak'
/var/folders/3q/7hjq0blj0n7_s0nppn9f2rrm0000gn/T/ipykernel_70363/1369560875.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_acct23['portfolio_2023'] = 'Neither'
df_merged_2014_2023 = df_merged_2014.merge(df_acct23, left_on='PERMNO',right_on='LPERMNO',how='left')
df_merged_2014_2023 = df_merged_2014_2023[['PERMNO','date','SHRCD','EXCHCD','PRC','RET','SHROUT','portfolio_2014','portfolio_2023']] # keep only stock + portfolio
df_merged_2014_2023['portfolio_2014'] = df_merged_2014_2023['portfolio_2014'].fillna("Doesn't Exist")
df_merged_2014_2023['portfolio_2023'] = df_merged_2014_2023['portfolio_2023'].fillna("Doesn't Exist")
df_merged_2014_2023_unique_firm = df_merged_2014_2023.drop_duplicates(subset=['PERMNO'])
df_merged_2014_2023_unique_firm
| PERMNO | date | SHRCD | EXCHCD | PRC | RET | SHROUT | portfolio_2014 | portfolio_2023 | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 10001 | 2010-12-31 | 11.0 | 2.0 | 10.52 | 0.052291 | 7834.0 | Neither | Doesn't Exist |
| 80 | 10002 | 2010-12-31 | 11.0 | 3.0 | 2.67 | 0.063745 | 17639.0 | Doesn't Exist | Doesn't Exist |
| 106 | 10025 | 2010-12-31 | 11.0 | 3.0 | 25.95 | 0.079002 | 6144.0 | Doesn't Exist | Doesn't Exist |
| 179 | 10026 | 2010-12-31 | 11.0 | 3.0 | 48.24 | 0.056995 | 18542.0 | Doesn't Exist | Doesn't Exist |
| 336 | 10028 | 2010-12-31 | 11.0 | 2.0 | 4.28 | 0.043903 | 9834.0 | Weak | Neither |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1183680 | 93432 | 2010-12-31 | 11.0 | 3.0 | 5.72 | -0.059211 | 12800.0 | Doesn't Exist | Doesn't Exist |
| 1183686 | 93433 | 2010-12-31 | 11.0 | 3.0 | 18.57 | -0.375588 | 43084.0 | Weak | Doesn't Exist |
| 1183758 | 93434 | 2010-12-31 | 11.0 | 3.0 | 3.30 | 0.067961 | 5800.0 | Doesn't Exist | Doesn't Exist |
| 1183915 | 93435 | 2010-12-31 | 11.0 | 3.0 | 6.61 | -0.027941 | 23452.0 | Doesn't Exist | Doesn't Exist |
| 1183932 | 93436 | 2010-12-31 | 11.0 | 3.0 | 26.63 | -0.246250 | 94908.0 | Weak | Neither |
15668 rows × 9 columns
cross_tab = df_merged_2014_2023_unique_firm.groupby(['portfolio_2014', 'portfolio_2023']).size().unstack()
cross_tab
| portfolio_2023 | Doesn't Exist | Neither | Robust | Weak |
|---|---|---|---|---|
| portfolio_2014 | ||||
| Doesn't Exist | 11386 | 525 | 205 | 593 |
| Neither | 581 | 729 | 149 | 20 |
| Robust | 303 | 59 | 372 | 6 |
| Weak | 407 | 181 | 23 | 129 |
169 stocks changed from ‘Neither’ to other categories (not include ‘Doesn’t Exist’). 65 stocks changed from ‘Robust’ to other categories (not include ‘Doesn’t Exist’). 204 stocks changed from ‘Weak’ to other categories (not include ‘Doesn’t Exist’). Total 438 stocks changed categories.
729 firms from 2014 are still ‘Neither’ in 2023. 372 firms from 2014 are still ‘Robust’ in 2023. 129 firms are still ‘Weak’ in 2023. Total 1230 firms are still around in 2023.
1323 new firms show up.
Most stocks experienced category changes over time, demonstrating a significant drift in their characteristics. Numerous "Robust” firms transitioned to “Weak” or “Neither", and vice versa. Only a small proportion of firms from 2014 remained in existence by 2023, highlighting the high turnover within the stock universe. Additionally, 1323 new firms entered the stock universe in 2023, showcasing missed opportunities for growth if the portfolio isn’t updated to include such entries. Not rebalancing leads to a portfolio that becomes increasingly unrepresentative of the original strategy.
df_5yrs = df_acct[(df_acct['fyear'] >= 2010) & (df_acct['fyear'] <= 2014)]
df_5yrs['operating_margin'] = df_5yrs['oibdp'] / df_5yrs['sale']
/var/folders/3q/7hjq0blj0n7_s0nppn9f2rrm0000gn/T/ipykernel_70363/1536826624.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_5yrs['operating_margin'] = df_5yrs['oibdp'] / df_5yrs['sale']
non_missing_firms = df_5yrs.groupby('LPERMNO').size() == 5
valid_firms = non_missing_firms[non_missing_firms].index
df_5yrs = df_5yrs[df_5yrs['LPERMNO'].isin(valid_firms)]
average_margins = df_5yrs.groupby('LPERMNO')['operating_margin'].mean().reset_index()
average_margins.rename(columns={'operating_margin': 'avg_operating_margin'}, inplace=True)
weak_5yrs = average_margins['avg_operating_margin'].quantile(0.25)
robust_5yrs = average_margins['avg_operating_margin'].quantile(0.75)
average_margins['portfolio_5yrs'] = 'Neither'
average_margins.loc[average_margins['avg_operating_margin'] >= robust_5yrs, 'portfolio_5yrs'] = 'Robust'
average_margins.loc[average_margins['avg_operating_margin'] <= weak_5yrs, 'portfolio_5yrs'] = 'Weak'
combined_portfolios_5yrs = df_merged_2014_2023.merge(average_margins, left_on='PERMNO',right_on='LPERMNO',how='left')
combined_portfolios_5yrs = combined_portfolios_5yrs[['PERMNO','date','SHRCD','EXCHCD','PRC','RET','SHROUT','portfolio_2014','portfolio_2023','portfolio_5yrs']] # keep only stock + portfolio
combined_portfolios_5yrs['portfolio_5yrs'] = combined_portfolios_5yrs['portfolio_5yrs'].fillna('Doesn’t Exist')
combined_portfolios_5yrs_unique_firm = combined_portfolios_5yrs.drop_duplicates(subset=['PERMNO'])
combined_portfolios_5yrs_unique_firm
| PERMNO | date | SHRCD | EXCHCD | PRC | RET | SHROUT | portfolio_2014 | portfolio_2023 | portfolio_5yrs | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 10001 | 2010-12-31 | 11.0 | 2.0 | 10.52 | 0.052291 | 7834.0 | Neither | Doesn't Exist | Neither |
| 80 | 10002 | 2010-12-31 | 11.0 | 3.0 | 2.67 | 0.063745 | 17639.0 | Doesn't Exist | Doesn't Exist | Doesn’t Exist |
| 106 | 10025 | 2010-12-31 | 11.0 | 3.0 | 25.95 | 0.079002 | 6144.0 | Doesn't Exist | Doesn't Exist | Doesn’t Exist |
| 179 | 10026 | 2010-12-31 | 11.0 | 3.0 | 48.24 | 0.056995 | 18542.0 | Doesn't Exist | Doesn't Exist | Doesn’t Exist |
| 336 | 10028 | 2010-12-31 | 11.0 | 2.0 | 4.28 | 0.043903 | 9834.0 | Weak | Neither | Weak |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1183680 | 93432 | 2010-12-31 | 11.0 | 3.0 | 5.72 | -0.059211 | 12800.0 | Doesn't Exist | Doesn't Exist | Doesn’t Exist |
| 1183686 | 93433 | 2010-12-31 | 11.0 | 3.0 | 18.57 | -0.375588 | 43084.0 | Weak | Doesn't Exist | Weak |
| 1183758 | 93434 | 2010-12-31 | 11.0 | 3.0 | 3.30 | 0.067961 | 5800.0 | Doesn't Exist | Doesn't Exist | Doesn’t Exist |
| 1183915 | 93435 | 2010-12-31 | 11.0 | 3.0 | 6.61 | -0.027941 | 23452.0 | Doesn't Exist | Doesn't Exist | Doesn’t Exist |
| 1183932 | 93436 | 2010-12-31 | 11.0 | 3.0 | 26.63 | -0.246250 | 94908.0 | Weak | Neither | Weak |
15668 rows × 10 columns
cross_tab_5yrs = combined_portfolios_5yrs_unique_firm.groupby(['portfolio_2014', 'portfolio_5yrs']).size().unstack()
cross_tab_5yrs
| portfolio_5yrs | Doesn’t Exist | Neither | Robust | Weak |
|---|---|---|---|---|
| portfolio_2014 | ||||
| Doesn't Exist | 12401 | 184 | 23 | 101 |
| Neither | 262 | 974 | 101 | 142 |
| Robust | 169 | 59 | 501 | 11 |
| Weak | 326 | 37 | 3 | 374 |
The portfolios formed using the 5-year average operating margins show notable differences compared to those formed using only 2014 operating margins. The “Robust” portfolio demonstrates greater stability, with 501 firms classified as “Robust” in both methods. However, the 5-year averages also reclassify 101 firms from “Neither” and 3 firms from “Weak” into “Robust,” highlighting how multi-year trends capture firms with consistently strong profitability. On the other hand, the “Weak” portfolio exhibits more variability, with 374 firms remaining “Weak” under both methods, but many firms shift categories, such as 142 “Neither” firms and 101 previously non-existent firms being classified as “Weak.” The “Neither” category shows a substantial overlap of 974 firms, reflecting its middle-ground nature. Overall, portfolios based on 5-year averages better capture stable profitability trends and reduce the impact of short-term anomalies. Overall, using 5-year averages provides a more stable view of profitability trends and reduces the impact of short-term anomalies.
However, 12,401 firms were excluded from the portfolios because they lacked operating margin data for all five years.