This program takes the ticker of publically traded companies as input and returns the value of the companies by extracting the financial data from Yahoo Finance.
from bs4 import BeautifulSoup
import requests
import pandas as pd
from pandas import Series, DataFrame
import matplotlib.pyplot as plt
import matplotlib.dates
import numpy as np
import re
%matplotlib inline
def getData(xtable,xdata):
xRows = xtable[0].findAll('tr')
for tr in xRows:
cols = tr.findAll('td')
for td in cols:
text = td.find(text=True)
xdata.append(text)
return xdata
def addTickerCol(data,tickerCol):
for value in tickerCol:
data.append(value)
return data
def getCols(xData,numCols):
cols = [Series(xData[i:i+numCols+1]) for i in range(0, len(xData),numCols+1)]
return cols
def getDF(xCols):
x_df = pd.concat(xCols, axis=1)
x_df.columns = x_df.iloc[0]
x_df = x_df[1:]
return x_df
def is_int(s):
try:
int(s)
return True
except ValueError:
return False
def getType(df):
for col in df.columns.values:
for i, value in enumerate(df[col]):
if value =='-':
df[col][i+1] = '0'
if (col!='Period Ending') & (col!='Ticker'):
df[col] = df[col].astype(int)
elif col=='Period Ending':
df[col] = pd.to_datetime(df[col])
return df
ticker = raw_input('Insert Ticker(s) of the publically traded company(ies) you would like to know the value of. Separate Tickers using commas.').upper()
removeChars = [' ','!','@','#','$','%','^','&','*','(',')','-','+','=','{','}','[',']',':',';','"',"'",'<','>','.','?']
for character in removeChars:
ticker = ticker.replace(character,'')
tickerList = ticker.split(',')
tickerList = filter(None, tickerList)
tickerSet = set(tickerList) # remove duplicates in list
print tickerSet
for ticker in tickerSet:
try:
incomeStatementURL = 'https://finance.yahoo.com/quote/{a}/financials?p={a}'.format(a = ticker)
incomeStatementPage = requests.get(incomeStatementURL)
incomeStatementPageParsed = BeautifulSoup(incomeStatementPage.content,'html.parser')
incomeStatementPageSummary = incomeStatementPageParsed.find('div',{'class':'Mt(10px) Ovx(a) W(100%)'})
incomeStatementTable = incomeStatementPageSummary.find_all('table')
except:
print("Oops! {a} is an invalid Ticker value. Please try again.".format(a=ticker))
incomeStatementDF = pd.DataFrame()
balanceSheetDF = pd.DataFrame()
#cashFlowDF = pd.DataFrame
for ticker in tickerSet:
incomeStatementURL = 'https://finance.yahoo.com/quote/{a}/financials?p={a}'.format(a = ticker)
balanceSheetURL = 'https://finance.yahoo.com/quote/{a}/balance-sheet?p={a}'.format(a = ticker)
#cashFlowURL = 'https://finance.yahoo.com/quote/{a}/cash-flow?p={a}'.format(a = ticker)
incomeStatementPage = requests.get(incomeStatementURL)
balanceSheetPage = requests.get(balanceSheetURL)
#cashFlowPage = requests.get(cashFlowURL)
incomeStatementPageParsed = BeautifulSoup(incomeStatementPage.content,'html.parser')
balanceSheetPageParsed = BeautifulSoup(balanceSheetPage.content,'html.parser')
#cashFlowPageParsed = BeautifulSoup(cashFlowPage.content,'html.parser')
incomeStatementPageSummary = incomeStatementPageParsed.find('div',{'class':'Mt(10px) Ovx(a) W(100%)'})
incomeStatementTable = incomeStatementPageSummary.find_all('table')
balanceSheetPageSummary = balanceSheetPageParsed.find('div',{'class':'Mt(10px) Mb(50px) Ovx(a) W(100%)'})
balanceSheetTable = balanceSheetPageSummary.find_all('table')
#cashFlowPageSummary = cashFlowPageParsed.find('div',{'class':'Mt(10px) Mb(50px) Ovx(a) W(100%)'})
#cashFlowTable = cashFlowPageSummary.find_all('table')
incomeStatementData = []
balanceSheetData = []
#cashFlowData = []
incomeStatementData = getData(incomeStatementTable,incomeStatementData)
balanceSheetData = getData(balanceSheetTable,balanceSheetData)
#cashFlowData = getData(cashFlowTable,cashFlowData)
incomeStatementData.remove('Net Income')
incomeStatementData = [e for e in incomeStatementData if e not in ('Operating Expenses','Income from Continuing Operations','Non-recurring Events')]
balanceSheetData = [e for e in balanceSheetData if e not in('Current Assets','Current Liabilities',"Stockholders' Equity")]
#cashFlowData = [e for e in cashFlowData if e not in ("Operating Activities, Cash Flows Provided By or Used In",'Investing Activities, Cash Flows Provided By or Used In','Financing Activities, Cash Flows Provided By or Used In')]
incomeStatementData[0]='Period Ending'
incomeStatementData = [value.replace(',','') for value in incomeStatementData]
balanceSheetData = [value.replace(',','') for value in balanceSheetData]
#cashFlowData = [value.replace(',','') for value in cashFlowData]
colsNumber=-1
for value in incomeStatementData:
if value == 'Total Revenue':
break
else:
colsNumber +=1
tickerCol = ['Ticker']
for i in range(colsNumber):
tickerCol.append(str(ticker))
addTickerCol(incomeStatementData,tickerCol)
addTickerCol(balanceSheetData,tickerCol)
#addTickerCol(cashFlowData,tickerCol)
incomeStatementCols = getCols(incomeStatementData, colsNumber)
balanceSheetCols = getCols(balanceSheetData, colsNumber)
#cashFlowCols = getCols(cashFlowData, colsNumber)
incomeStatement_df = getDF(incomeStatementCols)
balanceSheet_df = getDF(balanceSheetCols)
#cashFlow_df = getDF(cashFlowCols)
incomeStatement_df = getType(incomeStatement_df)
balanceSheet_df = getType(balanceSheet_df)
#cashFlow_df = getType(cashFlow_df)
if balanceSheet_df['Total Current Assets'].any() == 0: #.all()
balanceSheet_df['Total Current Assets'] = balanceSheet_df['Cash And Cash Equivalents'] + balanceSheet_df['Short Term Investments'] + balanceSheet_df['Net Receivables'] + balanceSheet_df['Inventory'] + balanceSheet_df['Other Current Assets']
if balanceSheet_df['Total Current Liabilities'].any() == 0:
balanceSheet_df['Total Current Liabilities'] = balanceSheet_df['Accounts Payable'] + balanceSheet_df['Short/Current Long Term Debt'] + balanceSheet_df['Other Current Liabilities']
incomeStatementDF = pd.concat([incomeStatementDF,incomeStatement_df])
balanceSheetDF = pd.concat([balanceSheetDF,balanceSheet_df])
#cashFlowDF = pd.concat([cashFlowDF,cashFlow_df])
#incomeStatementDF['Period Ending'] = pd.to_datetime(incomeStatementDF['Period Ending'])
#balanceSheetDF['Period Ending'] = pd.to_datetime(balanceSheetDF['Period Ending'])
#cashFlowDF['Period Ending'] = pd.to_datetime(cashFlowDF['Period Ending'])
incomeStatementDF.info()
incomeStatementDF
balanceSheetDF
# Ratios
ratiosDF = pd.DataFrame()
ratiosDF['Period Ending'] = incomeStatementDF['Period Ending']
ratiosDF['Ticker'] = incomeStatementDF['Ticker']
ratiosDF['Current Ratio'] = balanceSheetDF['Total Current Assets']/balanceSheetDF['Total Current Liabilities']
ratiosDF['Quick Ratio'] = (balanceSheetDF['Cash And Cash Equivalents']+balanceSheetDF['Net Receivables'])/balanceSheetDF['Total Current Liabilities']
ratiosDF['L-T Debt to Book Value of Equity'] = balanceSheetDF['Long Term Debt']/balanceSheetDF['Total Stockholder Equity']
ratiosDF['Gross Margin'] = incomeStatementDF['Gross Profit']/incomeStatementDF['Total Revenue']*100
ratiosDF['EBITDA Margin'] = incomeStatementDF['Income Before Tax']/incomeStatementDF['Total Revenue']*100
ratiosDF['Pre-Tax Income Margin'] = incomeStatementDF['Earnings Before Interest and Taxes']/incomeStatementDF['Total Revenue']*100
ratiosDF
# Function for Looping through Specific Ticker Value Ratios
def plotValues(dataFrame,ax,yValue):
for ticker in tickerSet:
df = dataFrame[dataFrame['Ticker']==ticker]
ax.plot_date(df['Period Ending'],df[yValue], ls='-', marker='o')
fig = plt.figure(figsize=(8,30))
ax1 = fig.add_subplot(611)
ax1.set_title('Current Ratio')
ax1.set_ylabel('Ratio Value')
ax1.grid(True)
plotValues(ratiosDF,ax1,'Current Ratio')
ax2 = fig.add_subplot(612)
ax2.set_title('Quick Ratio')
ax2.set_ylabel('Ratio Value')
ax2.grid(True)
plotValues(ratiosDF,ax2,'Quick Ratio')
ax3 = fig.add_subplot(613)
ax3.set_title('L-T Debt to Book Value of Equity')
ax3.set_ylabel('Percent (%)')
ax3.grid(True)
plotValues(ratiosDF,ax3,'L-T Debt to Book Value of Equity')
ax4 = fig.add_subplot(614)
ax4.set_title('Gross Margin')
ax4.set_ylabel('Percent (%)')
ax4.grid(True)
plotValues(ratiosDF,ax4,'Gross Margin')
ax5 = fig.add_subplot(615)
ax5.set_title('EBITDA Margin')
ax5.set_ylabel('Percent (%)')
ax5.grid(True)
plotValues(ratiosDF,ax5,'EBITDA Margin')
ax6 = fig.add_subplot(616)
ax6.set_title('Pre-Tax Income Margin')
ax6.set_ylabel('Percent (%)')
ax6.grid(True)
plotValues(ratiosDF,ax6,'Pre-Tax Income Margin')
fig.autofmt_xdate(rotation=45)
plt.legend(list(tickerSet))
#plt.figure(figsize=(1,1))
plt.show()