Class Day 3 - Data Scraping and Data Manipulation

Day #3-Copy1

Day# 3 Notes

Data Scraping

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

##"Magic Command"
%matplotlib inline
In [2]:
x = np.arange(0,20,1)
y = np.random.normal(10,1,20)
z = np.random.normal(10,2,20)
In [3]:
fig = plt.figure()
ax = fig.add_subplot(111)
# (111)
# (1) row, (1) column, (1) chart
ax.plot(x,y)
Out[3]:
[<matplotlib.lines.Line2D at 0x1b75c198>]
In [4]:
fig = plt.figure()
ax1 = fig.add_subplot(121)
# (1) row, (2) column, (1) chart
ax1.plot(x,y)
ax2 = fig.add_subplot(122)
# (1) row, (2) column, (2) chart
ax2.scatter(y,z)
Out[4]:
<matplotlib.collections.PathCollection at 0x1bbcd518>
In [5]:
fig = plt.figure()
ax1 = fig.add_subplot(223)
ax1.plot(x,y)
ax2 = fig.add_subplot(222)
ax2.scatter(y,z)
ax3 = fig.add_subplot(221)
ax4 = fig.add_subplot(224)
ax3.hist(y)
ax4.boxplot([y,z])
Out[5]:
{'boxes': [<matplotlib.lines.Line2D at 0x1c35ca58>,
  <matplotlib.lines.Line2D at 0x1c41e898>],
 'caps': [<matplotlib.lines.Line2D at 0x1c2ae668>,
  <matplotlib.lines.Line2D at 0x1c1b8eb8>,
  <matplotlib.lines.Line2D at 0x1c42d940>,
  <matplotlib.lines.Line2D at 0x1c42deb8>],
 'fliers': [<matplotlib.lines.Line2D at 0x1c41e358>,
  <matplotlib.lines.Line2D at 0x1c43a9e8>],
 'means': [],
 'medians': [<matplotlib.lines.Line2D at 0x1c405da0>,
  <matplotlib.lines.Line2D at 0x1c43a470>],
 'whiskers': [<matplotlib.lines.Line2D at 0x1c35cb38>,
  <matplotlib.lines.Line2D at 0x1c2b9c88>,
  <matplotlib.lines.Line2D at 0x1c41ee10>,
  <matplotlib.lines.Line2D at 0x1c42d3c8>]}
In [6]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x,y, label = 'Thing #1')
ax.plot(x,y, label = 'Thing #2')
ax.legend()
Out[6]:
<matplotlib.legend.Legend at 0x811b0f0>
In [7]:
fig = plt.figure()
ax1 = fig.add_subplot(121)
ax1.plot(x,y)
ax1.set_title("Line")
ax2 = fig.add_subplot(122)
ax2.scatter(y,z)
ax2.set_title("Scatter")
fig.suptitle('Results')
Out[7]:
<matplotlib.text.Text at 0x1c9978d0>
In [8]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(x,y)
ax.set_ylabel('$$$ Money', fontsize=10)
ax.set_xlabel('Years', fontsize = 20)

# Save Figure to PNG File
fig.savefig('PNGfigure.png')

#Save Figure to JPG File
fig.savefig('JPGfigure.jpg')
In [9]:
# Here is an examples of what a JSON (JavaScript Object Notation) looks like:
json_obj = """
{
    "zoo_animal": "Lion",
    "food": ["Meat","Veggies","Honey"],
    "fur": "Golden",
    "clothes": null,
    "diet": [{"zoo_animal": "Gazelle", "food": "grass", "fur": "Brown"}]
}
"""
In [10]:
type(json_obj)
Out[10]:
str
In [11]:
import json

# Importing JSON Objection into our data
data = json.loads(json_obj)
type(data)
Out[11]:
dict
In [12]:
data
Out[12]:
{u'clothes': None,
 u'diet': [{u'food': u'grass', u'fur': u'Brown', u'zoo_animal': u'Gazelle'}],
 u'food': [u'Meat', u'Veggies', u'Honey'],
 u'fur': u'Golden',
 u'zoo_animal': u'Lion'}
In [13]:
# Convert Dictionary back into a JSON Object

json_saved = json.dumps(data)
In [14]:
# Create DataFrames using Pandas

import pandas as pd
dframe = pd.DataFrame(data['diet'])
dframe1 = pd.DataFrame(data['food'])
In [15]:
dframe
Out[15]:
food fur zoo_animal
0 grass Brown Gazelle
In [16]:
dframe1
Out[16]:
0
0 Meat
1 Veggies
2 Honey
In [17]:
#Concatinating the data frames. Adding the 2 data frames together

frames = [dframe,dframe1]
result = pd.concat(frames)
result
Out[17]:
0 food fur zoo_animal
0 NaN grass Brown Gazelle
0 Meat NaN NaN NaN
1 Veggies NaN NaN NaN
2 Honey NaN NaN NaN
In [18]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from pandas import Series, DataFrame
In [19]:
# 

url = 'http://www.ucop.edu/operating-budget/budgets-and-reports/legislative-reports/2013-14-legislative-session.html'
result = requests.get(url)
result
Out[19]:
<Response [200]>
In [20]:
result.headers['content-type']
Out[20]:
'text/html'
In [21]:
result.encoding
Out[21]:
'ISO-8859-1'
In [22]:
result.content
Out[22]:
'\n<!DOCTYPE html>\n<!--[if lt IE 9]><html class="lte-ie8 no-js"  lang="en"><![endif]-->\n<!--[if gt IE 8]><!--><html lang="en" class="no-js"><!--<![endif]-->\n<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">\n<head>\n<meta content="IE=edge" http-equiv="X-UA-Compatible" />\r\n<meta charset="utf-8" />\r\n<meta content="width=device-width, initial-scale=1.0" name="viewport" />    \n<meta content="" name="description"/>\n<meta content="" name="author"/>\n<title>Legislative reports | UCOP</title>\n<!-- Le HTML5 shim, for IE6-8 support of HTML elements -->\n<!--[if lt IE 9]>\n      <script src="//html5shim.googlecode.com/svn/trunk/html5.js"></script>\n    <![endif]-->\n<!-- Le styles -->\n<!-- main.css - see /_common/files/css/main.less non-minified sources -->\n<link href="/_common/files/css/main.css?v=1.2" media="screen" rel="stylesheet"/>\n<link href="/_common/files/css/print.css" media="print" rel="stylesheet"/>\n<!-- Le fav and touch icons -->\n<link href="/_common/files/img/ico/favicon.ico" rel="shortcut icon"/>\n<!-- <link href="/files/img/ico/apple-touch-icon.png" rel="apple-touch-icon"/>\n<link href="/files/img/ico/apple-touch-icon-72x72.png" rel="apple-touch-icon" sizes="72x72"/>\n<link href="/files/img/ico/apple-touch-icon-114x114.png" rel="apple-touch-icon" sizes="114x114"/> -->\n<!-- Custom styles -->\n<!--[if gt IE 8]><!--><link href="/_common/files/css/ff-old.css" rel="stylesheet"/><!--<![endif]-->\n  \n \n<script src="/_common/files/js/protection.js"></script>  \n<![CDATA[]]>\n\n</head>\n<body><a class="skip-link" href="#content" title="skip to content">skip to content</a>\n<div id="bg-left"></div>\n<div id="bg-right"></div>\n<div id="wrapper">\n  <div class="container"> \n    <!-- Pills\n    ================================================== -->\n    <div class="row mast">\n      <div class="span8 main-nav offset4">\n        <div class="navbar">\n          <div class="navbar-inner">\n             <a class="btn btn-navbar" data-target=".nav-collapse" data-toggle="collapse"> Main Menu <b class="caret">\xc2\xa0</b> </a> \n            <div aria-labelledby="ariatopnav" class="nav-collapse" role="navigation">\n              <ul class="nav pull-right" id="ariatopnav">\n<li class=""><a href="http://www.ucop.edu/">Home</a></li>\n<li class=""><a href="http://www.ucop.edu/about">About</a></li>\n\n<li class="dropdown "> <a class="dropdown-toggle disabled" data-toggle="dropdown" href="http://www.ucop.edu/organization"> Organization <b class="caret">\xc2\xa0</b></a>\n<div class="dropdown-menu"> \n\n<!-- links -->              \n<div class="row">\n\n\n<div class="leftcol">\n<ul>\n<li><a class="mega-header" href="http://www.ucop.edu/finance-office/index.html">Chief Financial Officer</a></li>\n<li><a href="http://www.ucop.edu/operating-budget/index.html">Budget Analysis &amp; Planning</a></li>\n<li><a href="http://www.ucop.edu/capital-asset-strategies-finance/index.html">Capital Asset Strategies &amp; Finance</a></li>\n<li><a href="http://www.ucop.edu/financial-accounting/index.html">Financial Accounting</a></li>\n<li><a href="http://www.ucop.edu/procurement-services/index.html">Procurement Services</a></li>\n<li><a href="http://www.ucop.edu/risk-services/index.html">Risk Services</a></li>\n</ul>\n\n<ul>\n<li><a class="mega-header" href="http://www.ucop.edu/business-operations/index.html">Chief Operating Officer</a></li>\n<li><a href="http://www.ucop.edu/energy-sustainability/index.html">Energy &amp; Sustainability</a></li>\n<li><a href="http://www.ucop.edu/human-resources/index.html">Human Resources</a></li>\n<li><a href="http://www.ucop.edu/information-technology-services/index.html">Information Technology Services</a></li>\n<li><a href="http://www.ucop.edu/operational-services/index.html">Operational Services</a></li>\n<li><a href="http://www.ucop.edu/pmo/index.html">Strategy and Program Management Office</a></li>\n<li><a href="http://www.ucop.edu/ucop-operations/index.html">UCOP Operations</a></li>\n</ul>\n</div>\n\n<div class="midcol">\n<ul>\n<li><a class="mega-header" href="http://www.ucop.edu/academic-affairs/index.html">Academic Affairs</a></li>\n<li><a href="http://www.ucop.edu/academic-personnel-programs/index.html">Academic Personnel &amp; Programs</a></li>\n<li><a href="http://www.ucop.edu/diversity-engagement/index.html">Diversity &amp; Engagement</a></li>\n<li><a href="http://www.ucop.edu/institutional-research-academic-planning/index.html">Institutional Research &amp; Academic Planning</a></li>\n<li><a href="http://www.ucop.edu/research-graduate-studies/index.html">Research &amp; Graduate Studies</a></li>\n<li><a href="http://www.ucop.edu/student-affairs/index.html">Student Affairs</a></li>\n</ul>\n\n<ul>  \n<li><a class="mega-header" href="http://www.ucop.edu/government-relations/index.html">Government Relations</a></li>    \n<li><a href="http://www.ucop.edu/federal-governmental-relations/index.html">Federal Governmental Relations</a></li>\n<li><a href="http://www.ucop.edu/state-governmental-relations/index.html">State Governmental Relations</a></li>\n</ul>\n\n<ul>    \n<li><a class="mega-header" href="http://www.ucop.edu/public-affairs/index.html">Public Affairs</a></li>\n<li><a href="http://www.ucop.edu/alumni-constituent-affairs/index.html">Alumni &amp; Constituent Affairs</a></li>\n<li><a href="http://www.ucop.edu/executive-communications/index.html">Executive Communications &amp; Engagement</a></li>\n<li><a href="http://www.ucop.edu/institutional-advancement/index.html">Institutional Advancement</a></li>\n<li><a href="http://www.ucop.edu/marketing-communications/index.html">Marketing Communications</a></li>\n</ul>\n</div>\n\n<div class="rightcol">\n<ul>\n<li><a class="mega-others" href="http://ucanr.edu/">Agriculture &amp; Natural Resources</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/investment-office/index.html">Chief Investment Officer</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/ethics-compliance-audit-services/index.html">Ethics, Compliance &amp; Audit Services</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/general-counsel/index.html">General Counsel</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/uc-health/index.html">UC Health</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/impac/index.html">Issues Management, Policy Analysis &amp; Coordination</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/media-relations/index.html">Media Relations</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/innovation-entrepreneurship/index.html">Office of Innovation &amp; Entrepreneurship</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/title-ix/index.html">Systemwide Title IX Office</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/laboratory-management/index.html">UC National Laboratories</a><br/></li>\n<li><a class="mega-others" href="http://senate.universityofcalifornia.edu/">Academic Senate</a></li>\n<li><a class="mega-others" href="http://regents.universityofcalifornia.edu/">Board of Regents</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/president/index.html">President Napolitano</a></li>\n</ul>\n</div>\n\n</div>\n</div>\n</li>\n<li class=""><a href="http://www.ucop.edu/initiatives">Initiatives</a></li>\n</ul> <ul class="nav pull-right" id="topmenu">\n<li><a href="http://jobs.universityofcalifornia.edu">Jobs</a></li>\n<li><a href="/directory-search/index.php" title="Search the UCOP directory">People</a></li>\n<li>\n                  <form action="/search" class="navbar-search" id="search-form" method="get">\n<!--\nThe search form\'s submit event is being handled\nby an inline script further down the page\n-->\n                    <label class="skip-link" for="query-field">search</label>\n                    <input class="search-query" id="query-field" name="q" placeholder="Search" type="text"/>\n                  </form>\n</li>\n</ul>\n            </div>\n          </div>\n        </div>\n      </div>\n      <div class="span4 logo">\n        <div id="logo"><a href="http://www.ucop.edu/index.html"><img alt="University of California" height="64" src="/_common/files/img/wordmark.png" width="240"/></a></div>\n      </div>\n    </div>\n    \n<div class="row">\n<div class="span12">         \n<ul class="breadcrumb">\n\t\t\t\t\t\t\t\t<li> <a href="http://www.ucop.edu/index.html">UCOP</a> <span class="divider">&gt;</span> </li>\n\t\t\t\t\t\t\t<li> <a href="/finance-office/index.html">CFO</a> <span class="divider">&gt;</span> </li>\n\t<li> <a href="../../index.html">Budget Analysis and Planning</a> <span class="divider">&gt;</span> </li>\n\t\t\t\t\t\t\t\t\t\t<li>\n\t <a href="../index.html">Budgets &amp; Reports</a>\n\t \t <span class="divider">&gt;</span> \n\t \t</li>\n\t\t\t\t\t\t\t\t\t\t<li>\n\t <a href="index.html">Legislative reports</a>\n\t \t <span class="divider">&gt;</span> \n\t \t</li>\n\t\t\t\t\t\t\t\t\t<li>\n\t <a href="2013-14-legislative-session.html">Legislative reports</a>\n\t \t</li>\n\t</ul>\n</div>\n</div>\n    <div class="list-land" id="content"> \n      <!-- Main hero unit for a primary marketing message or call to action -->\n      <div class="row">\n        <div class="span12">\n\n    <h1 class="page-header">Budget Analysis and Planning</h1>\n\n</div>\n                <div class="span12">\n        <ul class="nav nav-tabs sub-nav tab4">\n                                                        \n                <li class=""><a class="" href="../../index.html">Overview</a></li>\n                                            \n                    <li class=""><a class="" href="../../staff/index.html">Staff</a></li>\n                                            \n                    <li class="active"><a class="" href="../index.html">Budgets &amp; Reports</a></li>\n                                            \n                    <li class=""><a class="" href="../../fees-and-enrollments/index.html">Fees &amp; Enrollments</a></li>\n                    </ul>\n    </div>\n    \n      </div>\n      <!-- Example row of columns -->\n      \n        \n\n\n\n\n\n<div class="row">\n        <div class="span8 dotted-top" role="main">\n                          <h2>Legislative reports</h2>\n                            <h3 class="subhead">2013-2014</h3>\n                <table cellpadding="5" cellspacing="0" class="table-striped" id="report" summary="2009-10 Legislative Reports in a table with one level of column and row headers" width="100%">\n<tbody>\n<tr>\n<th scope="col"/><th scope="col">Date</th><th scope="col">Report title</th>\n</tr>\n<tr>\n<td scope="row">1</td>\n<td scope="row">08/01/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/2013-14cobcp.pdf">2013-14 (EDU 92495) Proposed Capital Outlay Projects (2013-14 only) (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">2</td>\n<td scope="row">09/01/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/2014-15cobcp.pdf">2014-15&#160; (EDU 92495) Proposed Capital Outlay Projects (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">3</td>\n<td scope="row">11/01/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/utilizationofclassroomsandresearchspace-11-1-13.pdf">Utilization of Classroom and Teaching Laboratories (pdf)</a>&#160;(<em>Final Report; biennial</em>)</td>\n</tr>\n<tr>\n<td scope="row">4</td>\n<td scope="row">11/01/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/instructionandresearchspacesummary-11-1-2013.pdf">Instruction and Research Space Summary &amp; Analysis (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">5</td>\n<td scope="row">11/15/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/seplegreport-1-21-14.pdf">Statewide Energy Partnership Program (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">6</td>\n<td scope="row">11/30/13</td>\n<td scope="row"><a href="http://ucop.edu/capital-planning/_files/capital/201323/_UC-Capital-Financial-Plan-2013.pdf">2013-23 Capital Financial Plan (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">7</td>\n<td scope="row">11/30/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/projectsavingsprogramlegrpt-11-2013.pdf">Projects Savings Funded from Capital Outlay Bond Funds (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">8</td>\n<td scope="row">12/01/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/streamlinedcapitaloutlayprojectslegrpt-12-13.pdf">Streamlined Capital Projects Funded from Capital (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">9</td>\n<td scope="row">01/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/annualgeneralobligationbondsaccountabilitylegrep-11-2013.pdf">Annual General Obligation Bonds Accountability (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">10</td>\n<td scope="row">01/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/2012-13sbulegrpt-3-31-14.pdf">Small Business Utilization (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">11</td>\n<td scope="row">01/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/instfinaid-prelimlegreport-2-10-14.pdf">Institutional Financial Aid Programs - Preliminary report (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">12</td>\n<td scope="row">01/10/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/summerenrollmentlegreport-2-18-2014.pdf">Summer Enrollment (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">13</td>\n<td scope="row">01/15/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/contractingoutforservicesatnewlydevelopedfacilitieslegrep-1-8-2014.pdf">Contracting Out for Services at Newly Developed Facilities (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">14</td>\n<td scope="row">03/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/performanceoutcomemeasureslegreport-March-2014.pdf">Performance Measures (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">15</td>\n<td scope="row">03/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/ELWRlegrpt-3-4-14.pdf">Entry Level Writing Requirement (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">16</td>\n<td scope="row">03/31/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/2012-13annualstudentsupportlegreport-4-1-14.pdf">Annual Report on Student&#160;Financial Support (pdf)</a><a href="../../_files/legreports/1213/2011-12uc-mexicofacilityresearchandacadPrgms-casa.pdf"></a></td>\n</tr>\n<tr>\n<td scope="row">17</td>\n<td scope="row">04/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/uniquestatewidepupilidentifierlegrpt-03-10-14.pdf">Unique Statewide Pupil Identifier (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">18</td>\n<td scope="row">04/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/progressreportucr-somlegrpt-4-2-14.pdf">Riverside School of Medicine (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">19</td>\n<td scope="row">04/01/14</td>\n<td scope="row">SAPEP Funds and Outcomes - N/A<br/></td>\n</tr>\n<tr>\n<td scope="row">20</td>\n<td scope="row">05/15/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/2012-13ReceiptandUseofLotteryFundsLegReport-5-15-14.pdf">Receipt and Use of Lottery Funds (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">21</td>\n<td scope="row">07/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/5thAmendedProposedSEPListtoState-10-31-13.pdf">Cogeneration and Energy Consv Major Capital Projects (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">\n</td>\n<td scope="row">\n</td>\n<td scope="row">\n</td>\n</tr>\n<tr>\n<td scope="row"><strong>&#160;</strong></td>\n<td scope="row"><strong>Future Reports</strong></td>\n<td scope="row">\n</td>\n</tr>\n<tr>\n<td scope="row">24</td>\n<td scope="row">12-<a></a>31-15</td>\n<td scope="row">Breast Cancer Research Fund</td>\n</tr>\n<tr>\n<td scope="row">25</td>\n<td scope="row">12-31-15</td>\n<td scope="row">Cigarette and Tobacco Products Surtax Research Program</td>\n</tr>\n<tr>\n<td scope="row">26</td>\n<td scope="row">01-01-16</td>\n<td scope="row">Best Value Program</td>\n</tr>\n<tr>\n<td scope="row">27</td>\n<td scope="row">01-01-16</td>\n<td scope="row">California Subject Matter Programs</td>\n</tr>\n<tr>\n<td scope="row">28</td>\n<td scope="row">04-01-16</td>\n<td scope="row">COSMOS Program Outcomes</td>\n</tr>\n</tbody>\n</table>\n        </div>\n        \n        <div class="span4 sidebar">\n                  <div class="row">\n            <div class="span4 blustripe">\n                                                <h5>Related resources</h5>\n            <ul class="prp-dots">\n                    <li>                    <a href="index.html">Back to state-mandated reports</a>\n</li>\n                    <li>                    <a href="../index.html">View all budgets and reports</a>\n</li>\n            </ul>\n                            </div>\n          </div>\n                </div>\n</div>\n        \n        \n\n\n\n\n\n\n\n<div class="row less3 novrflw">\n</div>\n\n    </div>\n    <!-- /content --> \n  </div>\n  <!-- /container -->   \n</div>\n<!-- /wrapper -->\n<footer><div><!-- wrapper div cause the include only wants to display one div -->\n<div class="row footfirst">\n<div class="span12 footfirst-inner">\n<h2>UCOP Divisions &amp; Departments</h2>\n<div class="row-fluid">\n<div class="span4">\n<p><a class="mega-header" href="http://www.ucop.edu/finance-office/index.html">Chief Financial Officer</a></p>\n<ul>\n<li><a href="http://www.ucop.edu/operating-budget/index.html">Budget Analysis &amp; Planning</a></li>\n<li><a href="http://www.ucop.edu/capital-asset-strategies-finance/index.html">Capital Asset Strategies &amp; Finance</a></li>\n<li><a href="http://www.ucop.edu/financial-accounting/index.html">Financial Accounting</a></li>\n<li><a href="http://www.ucop.edu/procurement-services/index.html">Procurement Services</a></li>\n<li><a href="http://www.ucop.edu/risk-services/index.html">Risk Services</a></li>\n</ul>\n<p><a class="mega-header" href="http://www.ucop.edu/business-operations/index.html">Chief Operating Officer</a></p>\n<ul>\n<li><a href="http://www.ucop.edu/energy-sustainability/index.html">Energy &amp; Sustainability</a></li>\n<li><a href="http://www.ucop.edu/human-resources/index.html">Human Resources</a></li>\n<li><a href="http://www.ucop.edu/information-technology-services/index.html">Information Technology Services</a></li>\n<li><a href="http://www.ucop.edu/operational-services/index.html">Operational Services</a></li>\n<li><a href="http://www.ucop.edu/pmo/index.html">Strategy and Program Management Office</a></li>\n<li><a href="http://www.ucop.edu/ucop-operations/index.html">UCOP Operations</a></li>\n</ul>\n</div>\n<div class="span4">\n<p><a class="mega-header" href="http://www.ucop.edu/academic-affairs/index.html">Academic Affairs</a></p>\n<ul>\n<li><a href="http://www.ucop.edu/academic-personnel-programs/index.html">Academic Personnel &amp; Programs</a></li>\n<li><a href="http://www.ucop.edu/diversity-engagement/index.html">Diversity &amp; Engagement</a></li>\n<li><a href="http://www.ucop.edu/institutional-research-academic-planning/index.html">Institutional Research &amp; Academic Planning</a></li>\n<li><a href="http://www.ucop.edu/research-graduate-studies/index.html">Research &amp; Graduate Studies</a></li>\n<li><a href="http://www.ucop.edu/student-affairs/index.html">Student Affairs</a></li>\n</ul>\n<p><a class="mega-header" href="http://www.ucop.edu/government-relations/index.html">Government Relations</a></p>\n<ul>\n<li><a href="http://www.ucop.edu/federal-governmental-relations/index.html">Federal Governmental Relations</a></li>\n<li><a href="http://www.ucop.edu/state-governmental-relations/index.html">State Governmental Relations</a></li>\n</ul>\n<p><a class="mega-header" href="http://www.ucop.edu/public-affairs/index.html">Public Affairs</a></p>\n<ul>\n<li><a href="http://www.ucop.edu/alumni-constituent-affairs/index.html">Alumni &amp; Constituent Affairs</a></li>\n<li><a href="http://www.ucop.edu/executive-communications/index.html">Executive Communications &amp; Engagement</a></li>\n<li><a href="http://www.ucop.edu/institutional-advancement/index.html">Institutional Advancement</a></li>\n<li><a href="http://www.ucop.edu/marketing-communications/index.html">Marketing Communications</a></li>\n</ul>\n</div>\n<div class="span4">\n<p><a class="mega-others" href="http://ucanr.edu/">Agriculture &amp; Natural Resources</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/investment-office/index.html">Chief Investment Officer</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/ethics-compliance-audit-services/index.html">Ethics, Compliance &amp; Audit Services</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/general-counsel/index.html">General Counsel</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/uc-health/index.html">UC Health</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/impac/index.html">Issues Management, Policy Analysis &amp; Coordination</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/media-relations/index.html">Media Relations</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/innovation-entrepreneurship/index.html">Office of Innovation &amp; Entrepreneurship</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/laboratory-management/index.html">UC National Laboratories</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/title-ix/index.html">Systemwide Title IX Office</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/president/index.html">President Napolitano</a></p>\n</div>\n</div>\n</div>\n</div>\n<div class="row footsec">\n<div class="span12 footsec-inner">\n<div class="row-fluid">\n<div class="span4">\n<ul>\n<li><strong><a href="http://www.universityofcalifornia.edu/" target="_blank">University of California</a></strong></li>\n<li><strong><a href="http://senate.universityofcalifornia.edu/" target="_blank">Academic Senate</a></strong></li>\n<li><strong><a href="http://regents.universityofcalifornia.edu/" target="_blank">Board of Regents</a></strong></li>\n<li><strong><a href="http://ucnet.universityofcalifornia.edu" target="_blank">UCnet</a></strong></li>\n</ul>\n</div>\n<div class="span4">\n<ul>\n<li><a href="http://www.ucop.edu/accessibility/index.html">Accessibility</a></li>\n<li><a href="http://www.ucop.edu/terms/index.html">Terms of Use</a></li>\n</ul>\n</div>\n<div class="span4">\n<ul class="pull-right">\n<li><a href="http://www.facebook.com/universityofcalifornia" target="_blank"><img alt="UC on Facebook" height="25" src="/_common/files/img/icon_fb_yel_blu.gif" width="25"/></a> <a href="https://twitter.com/UC_Newsroom" target="_blank"><img alt="UC on Twitter" height="25" src="/_common/files/img/icon_twtr_yel_blu.gif" width="25"/></a></li>\n</ul>\n</div>\n</div>\n<p><br/>\n Copyright &#169; Regents of the University of California</p>\n</div>\n</div>\n</div></footer>  \n<!-- Le javascript ================================================== --> \n\n<!-- jQuery & Bootstrap via CDN, fallback to local copy if CDN fails/blocked -->\n<script src="//ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js"></script>\n<script>/*<![CDATA[*/window.jQuery || document.write(\'<script src="/_common/files/js/jquery.1.7.1.min.js"><\\/script>\')/* ]]>*/</script>\n\n<script src="//netdna.bootstrapcdn.com/twitter-bootstrap/2.0.4/js/bootstrap.min.js"></script>\n<script>/*<![CDATA[*/$.fn.modal || document.write(\'<script src="/_common/files/js/bootstrap.2.0.4.min.js"><\\/script>\')/* ]]>*/</script>\n\n<!--[if gt IE 8]><!--><script src="/_common/files/js/modernizr.js"></script><!--<![endif]-->\n<!--[if lt IE 9]><script src="/files/js/selectivizr-min.js"></script><![endif]--> \n\n<script src="/_common/files/js/plugins.js"></script>\n<script src="/_common/files/js/main.js?v=1.2.2"></script>\n\n\n<script>\n  (function(i,s,o,g,r,a,m){i[\'GoogleAnalyticsObject\']=r;i[r]=i[r]||function(){\n  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),\n  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)\n  })(window,document,\'script\',\'https://www.google-analytics.com/analytics.js\',\'ga\');\n\n  ga(\'create\', \'UA-18163990-2\', \'auto\');\n\n// autotrack plugins \n  ga(\'require\', \'outboundLinkTracker\');\n  \n  \n  ga(\'send\', \'pageview\');\n\n</script>\n<script async="" src="/_common/files/js/autotrack.js"></script>\n</body>\n</html>'
In [23]:
c = result.content
In [24]:
soup = BeautifulSoup(c,"html.parser")
soup
Out[24]:
\n<!DOCTYPE html>\n\n<!--[if lt IE 9]><html class="lte-ie8 no-js"  lang="en"><![endif]-->\n<!--[if gt IE 8]><!--><html class="no-js" lang="en"><!--<![endif]-->\n<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">\n<head>\n<meta content="IE=edge" http-equiv="X-UA-Compatible"/>\n<meta charset="unicode-escape"/>\n<meta content="width=device-width, initial-scale=1.0" name="viewport"/>\n<meta content="" name="description"/>\n<meta content="" name="author"/>\n<title>Legislative reports | UCOP</title>\n<!-- Le HTML5 shim, for IE6-8 support of HTML elements -->\n<!--[if lt IE 9]>\n      <script src="//html5shim.googlecode.com/svn/trunk/html5.js"></script>\n    <![endif]-->\n<!-- Le styles -->\n<!-- main.css - see /_common/files/css/main.less non-minified sources -->\n<link href="/_common/files/css/main.css?v=1.2" media="screen" rel="stylesheet"/>\n<link href="/_common/files/css/print.css" media="print" rel="stylesheet"/>\n<!-- Le fav and touch icons -->\n<link href="/_common/files/img/ico/favicon.ico" rel="shortcut icon"/>\n<!-- <link href="/files/img/ico/apple-touch-icon.png" rel="apple-touch-icon"/>\n<link href="/files/img/ico/apple-touch-icon-72x72.png" rel="apple-touch-icon" sizes="72x72"/>\n<link href="/files/img/ico/apple-touch-icon-114x114.png" rel="apple-touch-icon" sizes="114x114"/> -->\n<!-- Custom styles -->\n<!--[if gt IE 8]><!--><link href="/_common/files/css/ff-old.css" rel="stylesheet"/><!--<![endif]-->\n<script src="/_common/files/js/protection.js"></script>\n<![CDATA[ ]]>\n</head>\n<body><a class="skip-link" href="#content" title="skip to content">skip to content</a>\n<div id="bg-left"></div>\n<div id="bg-right"></div>\n<div id="wrapper">\n<div class="container">\n<!-- Pills\n    ================================================== -->\n<div class="row mast">\n<div class="span8 main-nav offset4">\n<div class="navbar">\n<div class="navbar-inner">\n<a class="btn btn-navbar" data-target=".nav-collapse" data-toggle="collapse"> Main Menu <b class="caret">\xa0</b> </a>\n<div aria-labelledby="ariatopnav" class="nav-collapse" role="navigation">\n<ul class="nav pull-right" id="ariatopnav">\n<li class=""><a href="http://www.ucop.edu/">Home</a></li>\n<li class=""><a href="http://www.ucop.edu/about">About</a></li>\n<li class="dropdown "> <a class="dropdown-toggle disabled" data-toggle="dropdown" href="http://www.ucop.edu/organization"> Organization <b class="caret">\xa0</b></a>\n<div class="dropdown-menu">\n<!-- links -->\n<div class="row">\n<div class="leftcol">\n<ul>\n<li><a class="mega-header" href="http://www.ucop.edu/finance-office/index.html">Chief Financial Officer</a></li>\n<li><a href="http://www.ucop.edu/operating-budget/index.html">Budget Analysis &amp; Planning</a></li>\n<li><a href="http://www.ucop.edu/capital-asset-strategies-finance/index.html">Capital Asset Strategies &amp; Finance</a></li>\n<li><a href="http://www.ucop.edu/financial-accounting/index.html">Financial Accounting</a></li>\n<li><a href="http://www.ucop.edu/procurement-services/index.html">Procurement Services</a></li>\n<li><a href="http://www.ucop.edu/risk-services/index.html">Risk Services</a></li>\n</ul>\n<ul>\n<li><a class="mega-header" href="http://www.ucop.edu/business-operations/index.html">Chief Operating Officer</a></li>\n<li><a href="http://www.ucop.edu/energy-sustainability/index.html">Energy &amp; Sustainability</a></li>\n<li><a href="http://www.ucop.edu/human-resources/index.html">Human Resources</a></li>\n<li><a href="http://www.ucop.edu/information-technology-services/index.html">Information Technology Services</a></li>\n<li><a href="http://www.ucop.edu/operational-services/index.html">Operational Services</a></li>\n<li><a href="http://www.ucop.edu/pmo/index.html">Strategy and Program Management Office</a></li>\n<li><a href="http://www.ucop.edu/ucop-operations/index.html">UCOP Operations</a></li>\n</ul>\n</div>\n<div class="midcol">\n<ul>\n<li><a class="mega-header" href="http://www.ucop.edu/academic-affairs/index.html">Academic Affairs</a></li>\n<li><a href="http://www.ucop.edu/academic-personnel-programs/index.html">Academic Personnel &amp; Programs</a></li>\n<li><a href="http://www.ucop.edu/diversity-engagement/index.html">Diversity &amp; Engagement</a></li>\n<li><a href="http://www.ucop.edu/institutional-research-academic-planning/index.html">Institutional Research &amp; Academic Planning</a></li>\n<li><a href="http://www.ucop.edu/research-graduate-studies/index.html">Research &amp; Graduate Studies</a></li>\n<li><a href="http://www.ucop.edu/student-affairs/index.html">Student Affairs</a></li>\n</ul>\n<ul>\n<li><a class="mega-header" href="http://www.ucop.edu/government-relations/index.html">Government Relations</a></li>\n<li><a href="http://www.ucop.edu/federal-governmental-relations/index.html">Federal Governmental Relations</a></li>\n<li><a href="http://www.ucop.edu/state-governmental-relations/index.html">State Governmental Relations</a></li>\n</ul>\n<ul>\n<li><a class="mega-header" href="http://www.ucop.edu/public-affairs/index.html">Public Affairs</a></li>\n<li><a href="http://www.ucop.edu/alumni-constituent-affairs/index.html">Alumni &amp; Constituent Affairs</a></li>\n<li><a href="http://www.ucop.edu/executive-communications/index.html">Executive Communications &amp; Engagement</a></li>\n<li><a href="http://www.ucop.edu/institutional-advancement/index.html">Institutional Advancement</a></li>\n<li><a href="http://www.ucop.edu/marketing-communications/index.html">Marketing Communications</a></li>\n</ul>\n</div>\n<div class="rightcol">\n<ul>\n<li><a class="mega-others" href="http://ucanr.edu/">Agriculture &amp; Natural Resources</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/investment-office/index.html">Chief Investment Officer</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/ethics-compliance-audit-services/index.html">Ethics, Compliance &amp; Audit Services</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/general-counsel/index.html">General Counsel</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/uc-health/index.html">UC Health</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/impac/index.html">Issues Management, Policy Analysis &amp; Coordination</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/media-relations/index.html">Media Relations</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/innovation-entrepreneurship/index.html">Office of Innovation &amp; Entrepreneurship</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/title-ix/index.html">Systemwide Title IX Office</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/laboratory-management/index.html">UC National Laboratories</a><br/></li>\n<li><a class="mega-others" href="http://senate.universityofcalifornia.edu/">Academic Senate</a></li>\n<li><a class="mega-others" href="http://regents.universityofcalifornia.edu/">Board of Regents</a></li>\n<li><a class="mega-others" href="http://www.ucop.edu/president/index.html">President Napolitano</a></li>\n</ul>\n</div>\n</div>\n</div>\n</li>\n<li class=""><a href="http://www.ucop.edu/initiatives">Initiatives</a></li>\n</ul> <ul class="nav pull-right" id="topmenu">\n<li><a href="http://jobs.universityofcalifornia.edu">Jobs</a></li>\n<li><a href="/directory-search/index.php" title="Search the UCOP directory">People</a></li>\n<li>\n<form action="/search" class="navbar-search" id="search-form" method="get">\n<!--\nThe search form's submit event is being handled\nby an inline script further down the page\n-->\n<label class="skip-link" for="query-field">search</label>\n<input class="search-query" id="query-field" name="q" placeholder="Search" type="text"/>\n</form>\n</li>\n</ul>\n</div>\n</div>\n</div>\n</div>\n<div class="span4 logo">\n<div id="logo"><a href="http://www.ucop.edu/index.html"><img alt="University of California" height="64" src="/_common/files/img/wordmark.png" width="240"/></a></div>\n</div>\n</div>\n<div class="row">\n<div class="span12">\n<ul class="breadcrumb">\n<li> <a href="http://www.ucop.edu/index.html">UCOP</a> <span class="divider">&gt;</span> </li>\n<li> <a href="/finance-office/index.html">CFO</a> <span class="divider">&gt;</span> </li>\n<li> <a href="../../index.html">Budget Analysis and Planning</a> <span class="divider">&gt;</span> </li>\n<li>\n<a href="../index.html">Budgets &amp; Reports</a>\n<span class="divider">&gt;</span>\n</li>\n<li>\n<a href="index.html">Legislative reports</a>\n<span class="divider">&gt;</span>\n</li>\n<li>\n<a href="2013-14-legislative-session.html">Legislative reports</a>\n</li>\n</ul>\n</div>\n</div>\n<div class="list-land" id="content">\n<!-- Main hero unit for a primary marketing message or call to action -->\n<div class="row">\n<div class="span12">\n<h1 class="page-header">Budget Analysis and Planning</h1>\n</div>\n<div class="span12">\n<ul class="nav nav-tabs sub-nav tab4">\n<li class=""><a class="" href="../../index.html">Overview</a></li>\n<li class=""><a class="" href="../../staff/index.html">Staff</a></li>\n<li class="active"><a class="" href="../index.html">Budgets &amp; Reports</a></li>\n<li class=""><a class="" href="../../fees-and-enrollments/index.html">Fees &amp; Enrollments</a></li>\n</ul>\n</div>\n</div>\n<!-- Example row of columns -->\n<div class="row">\n<div class="span8 dotted-top" role="main">\n<h2>Legislative reports</h2>\n<h3 class="subhead">2013-2014</h3>\n<table cellpadding="5" cellspacing="0" class="table-striped" id="report" summary="2009-10 Legislative Reports in a table with one level of column and row headers" width="100%">\n<tbody>\n<tr>\n<th scope="col"></th><th scope="col">Date</th><th scope="col">Report title</th>\n</tr>\n<tr>\n<td scope="row">1</td>\n<td scope="row">08/01/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/2013-14cobcp.pdf">2013-14 (EDU 92495) Proposed Capital Outlay Projects (2013-14 only) (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">2</td>\n<td scope="row">09/01/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/2014-15cobcp.pdf">2014-15\xa0 (EDU 92495) Proposed Capital Outlay Projects (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">3</td>\n<td scope="row">11/01/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/utilizationofclassroomsandresearchspace-11-1-13.pdf">Utilization of Classroom and Teaching Laboratories (pdf)</a>\xa0(<em>Final Report; biennial</em>)</td>\n</tr>\n<tr>\n<td scope="row">4</td>\n<td scope="row">11/01/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/instructionandresearchspacesummary-11-1-2013.pdf">Instruction and Research Space Summary &amp; Analysis (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">5</td>\n<td scope="row">11/15/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/seplegreport-1-21-14.pdf">Statewide Energy Partnership Program (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">6</td>\n<td scope="row">11/30/13</td>\n<td scope="row"><a href="http://ucop.edu/capital-planning/_files/capital/201323/_UC-Capital-Financial-Plan-2013.pdf">2013-23 Capital Financial Plan (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">7</td>\n<td scope="row">11/30/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/projectsavingsprogramlegrpt-11-2013.pdf">Projects Savings Funded from Capital Outlay Bond Funds (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">8</td>\n<td scope="row">12/01/13</td>\n<td scope="row"><a href="../../_files/legreports/1314/streamlinedcapitaloutlayprojectslegrpt-12-13.pdf">Streamlined Capital Projects Funded from Capital (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">9</td>\n<td scope="row">01/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/annualgeneralobligationbondsaccountabilitylegrep-11-2013.pdf">Annual General Obligation Bonds Accountability (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">10</td>\n<td scope="row">01/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/2012-13sbulegrpt-3-31-14.pdf">Small Business Utilization (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">11</td>\n<td scope="row">01/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/instfinaid-prelimlegreport-2-10-14.pdf">Institutional Financial Aid Programs - Preliminary report (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">12</td>\n<td scope="row">01/10/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/summerenrollmentlegreport-2-18-2014.pdf">Summer Enrollment (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">13</td>\n<td scope="row">01/15/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/contractingoutforservicesatnewlydevelopedfacilitieslegrep-1-8-2014.pdf">Contracting Out for Services at Newly Developed Facilities (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">14</td>\n<td scope="row">03/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/performanceoutcomemeasureslegreport-March-2014.pdf">Performance Measures (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">15</td>\n<td scope="row">03/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/ELWRlegrpt-3-4-14.pdf">Entry Level Writing Requirement (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">16</td>\n<td scope="row">03/31/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/2012-13annualstudentsupportlegreport-4-1-14.pdf">Annual Report on Student\xa0Financial Support (pdf)</a><a href="../../_files/legreports/1213/2011-12uc-mexicofacilityresearchandacadPrgms-casa.pdf"></a></td>\n</tr>\n<tr>\n<td scope="row">17</td>\n<td scope="row">04/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/uniquestatewidepupilidentifierlegrpt-03-10-14.pdf">Unique Statewide Pupil Identifier (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">18</td>\n<td scope="row">04/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/progressreportucr-somlegrpt-4-2-14.pdf">Riverside School of Medicine (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">19</td>\n<td scope="row">04/01/14</td>\n<td scope="row">SAPEP Funds and Outcomes - N/A<br/></td>\n</tr>\n<tr>\n<td scope="row">20</td>\n<td scope="row">05/15/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/2012-13ReceiptandUseofLotteryFundsLegReport-5-15-14.pdf">Receipt and Use of Lottery Funds (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">21</td>\n<td scope="row">07/01/14</td>\n<td scope="row"><a href="../../_files/legreports/1314/5thAmendedProposedSEPListtoState-10-31-13.pdf">Cogeneration and Energy Consv Major Capital Projects (pdf)</a></td>\n</tr>\n<tr>\n<td scope="row">\n</td>\n<td scope="row">\n</td>\n<td scope="row">\n</td>\n</tr>\n<tr>\n<td scope="row"><strong>\xa0</strong></td>\n<td scope="row"><strong>Future Reports</strong></td>\n<td scope="row">\n</td>\n</tr>\n<tr>\n<td scope="row">24</td>\n<td scope="row">12-<a></a>31-15</td>\n<td scope="row">Breast Cancer Research Fund</td>\n</tr>\n<tr>\n<td scope="row">25</td>\n<td scope="row">12-31-15</td>\n<td scope="row">Cigarette and Tobacco Products Surtax Research Program</td>\n</tr>\n<tr>\n<td scope="row">26</td>\n<td scope="row">01-01-16</td>\n<td scope="row">Best Value Program</td>\n</tr>\n<tr>\n<td scope="row">27</td>\n<td scope="row">01-01-16</td>\n<td scope="row">California Subject Matter Programs</td>\n</tr>\n<tr>\n<td scope="row">28</td>\n<td scope="row">04-01-16</td>\n<td scope="row">COSMOS Program Outcomes</td>\n</tr>\n</tbody>\n</table>\n</div>\n<div class="span4 sidebar">\n<div class="row">\n<div class="span4 blustripe">\n<h5>Related resources</h5>\n<ul class="prp-dots">\n<li> <a href="index.html">Back to state-mandated reports</a>\n</li>\n<li> <a href="../index.html">View all budgets and reports</a>\n</li>\n</ul>\n</div>\n</div>\n</div>\n</div>\n<div class="row less3 novrflw">\n</div>\n</div>\n<!-- /content -->\n</div>\n<!-- /container -->\n</div>\n<!-- /wrapper -->\n<footer><div><!-- wrapper div cause the include only wants to display one div -->\n<div class="row footfirst">\n<div class="span12 footfirst-inner">\n<h2>UCOP Divisions &amp; Departments</h2>\n<div class="row-fluid">\n<div class="span4">\n<p><a class="mega-header" href="http://www.ucop.edu/finance-office/index.html">Chief Financial Officer</a></p>\n<ul>\n<li><a href="http://www.ucop.edu/operating-budget/index.html">Budget Analysis &amp; Planning</a></li>\n<li><a href="http://www.ucop.edu/capital-asset-strategies-finance/index.html">Capital Asset Strategies &amp; Finance</a></li>\n<li><a href="http://www.ucop.edu/financial-accounting/index.html">Financial Accounting</a></li>\n<li><a href="http://www.ucop.edu/procurement-services/index.html">Procurement Services</a></li>\n<li><a href="http://www.ucop.edu/risk-services/index.html">Risk Services</a></li>\n</ul>\n<p><a class="mega-header" href="http://www.ucop.edu/business-operations/index.html">Chief Operating Officer</a></p>\n<ul>\n<li><a href="http://www.ucop.edu/energy-sustainability/index.html">Energy &amp; Sustainability</a></li>\n<li><a href="http://www.ucop.edu/human-resources/index.html">Human Resources</a></li>\n<li><a href="http://www.ucop.edu/information-technology-services/index.html">Information Technology Services</a></li>\n<li><a href="http://www.ucop.edu/operational-services/index.html">Operational Services</a></li>\n<li><a href="http://www.ucop.edu/pmo/index.html">Strategy and Program Management Office</a></li>\n<li><a href="http://www.ucop.edu/ucop-operations/index.html">UCOP Operations</a></li>\n</ul>\n</div>\n<div class="span4">\n<p><a class="mega-header" href="http://www.ucop.edu/academic-affairs/index.html">Academic Affairs</a></p>\n<ul>\n<li><a href="http://www.ucop.edu/academic-personnel-programs/index.html">Academic Personnel &amp; Programs</a></li>\n<li><a href="http://www.ucop.edu/diversity-engagement/index.html">Diversity &amp; Engagement</a></li>\n<li><a href="http://www.ucop.edu/institutional-research-academic-planning/index.html">Institutional Research &amp; Academic Planning</a></li>\n<li><a href="http://www.ucop.edu/research-graduate-studies/index.html">Research &amp; Graduate Studies</a></li>\n<li><a href="http://www.ucop.edu/student-affairs/index.html">Student Affairs</a></li>\n</ul>\n<p><a class="mega-header" href="http://www.ucop.edu/government-relations/index.html">Government Relations</a></p>\n<ul>\n<li><a href="http://www.ucop.edu/federal-governmental-relations/index.html">Federal Governmental Relations</a></li>\n<li><a href="http://www.ucop.edu/state-governmental-relations/index.html">State Governmental Relations</a></li>\n</ul>\n<p><a class="mega-header" href="http://www.ucop.edu/public-affairs/index.html">Public Affairs</a></p>\n<ul>\n<li><a href="http://www.ucop.edu/alumni-constituent-affairs/index.html">Alumni &amp; Constituent Affairs</a></li>\n<li><a href="http://www.ucop.edu/executive-communications/index.html">Executive Communications &amp; Engagement</a></li>\n<li><a href="http://www.ucop.edu/institutional-advancement/index.html">Institutional Advancement</a></li>\n<li><a href="http://www.ucop.edu/marketing-communications/index.html">Marketing Communications</a></li>\n</ul>\n</div>\n<div class="span4">\n<p><a class="mega-others" href="http://ucanr.edu/">Agriculture &amp; Natural Resources</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/investment-office/index.html">Chief Investment Officer</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/ethics-compliance-audit-services/index.html">Ethics, Compliance &amp; Audit Services</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/general-counsel/index.html">General Counsel</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/uc-health/index.html">UC Health</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/impac/index.html">Issues Management, Policy Analysis &amp; Coordination</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/media-relations/index.html">Media Relations</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/innovation-entrepreneurship/index.html">Office of Innovation &amp; Entrepreneurship</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/laboratory-management/index.html">UC National Laboratories</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/title-ix/index.html">Systemwide Title IX Office</a></p>\n<p><a class="mega-others" href="http://www.ucop.edu/president/index.html">President Napolitano</a></p>\n</div>\n</div>\n</div>\n</div>\n<div class="row footsec">\n<div class="span12 footsec-inner">\n<div class="row-fluid">\n<div class="span4">\n<ul>\n<li><strong><a href="http://www.universityofcalifornia.edu/" target="_blank">University of California</a></strong></li>\n<li><strong><a href="http://senate.universityofcalifornia.edu/" target="_blank">Academic Senate</a></strong></li>\n<li><strong><a href="http://regents.universityofcalifornia.edu/" target="_blank">Board of Regents</a></strong></li>\n<li><strong><a href="http://ucnet.universityofcalifornia.edu" target="_blank">UCnet</a></strong></li>\n</ul>\n</div>\n<div class="span4">\n<ul>\n<li><a href="http://www.ucop.edu/accessibility/index.html">Accessibility</a></li>\n<li><a href="http://www.ucop.edu/terms/index.html">Terms of Use</a></li>\n</ul>\n</div>\n<div class="span4">\n<ul class="pull-right">\n<li><a href="http://www.facebook.com/universityofcalifornia" target="_blank"><img alt="UC on Facebook" height="25" src="/_common/files/img/icon_fb_yel_blu.gif" width="25"/></a> <a href="https://twitter.com/UC_Newsroom" target="_blank"><img alt="UC on Twitter" height="25" src="/_common/files/img/icon_twtr_yel_blu.gif" width="25"/></a></li>\n</ul>\n</div>\n</div>\n<p><br/>\n Copyright \xa9 Regents of the University of California</p>\n</div>\n</div>\n</div></footer>\n<!-- Le javascript ================================================== -->\n<!-- jQuery & Bootstrap via CDN, fallback to local copy if CDN fails/blocked -->\n<script src="//ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js"></script>\n<script>/*<![CDATA[*/window.jQuery || document.write('<script src="/_common/files/js/jquery.1.7.1.min.js"><\\/script>')/* ]]>*/</script>\n<script src="//netdna.bootstrapcdn.com/twitter-bootstrap/2.0.4/js/bootstrap.min.js"></script>\n<script>/*<![CDATA[*/$.fn.modal || document.write('<script src="/_common/files/js/bootstrap.2.0.4.min.js"><\\/script>')/* ]]>*/</script>\n<!--[if gt IE 8]><!--><script src="/_common/files/js/modernizr.js"></script><!--<![endif]-->\n<!--[if lt IE 9]><script src="/files/js/selectivizr-min.js"></script><![endif]-->\n<script src="/_common/files/js/plugins.js"></script>\n<script src="/_common/files/js/main.js?v=1.2.2"></script>\n<script>\n  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){\n  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),\n  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)\n  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');\n\n  ga('create', 'UA-18163990-2', 'auto');\n\n// autotrack plugins \n  ga('require', 'outboundLinkTracker');\n  \n  \n  ga('send', 'pageview');\n\n</script>\n<script async="" src="/_common/files/js/autotrack.js"></script>\n</body>\n</html></html>
In [25]:
print (soup.prettify())
<!DOCTYPE html>
<!--[if lt IE 9]><html class="lte-ie8 no-js"  lang="en"><![endif]-->
<!--[if gt IE 8]><!-->
<html class="no-js" lang="en">
 <!--<![endif]-->
 <html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
  <head>
   <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
   <meta charset="utf-8"/>
   <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
   <meta content="" name="description"/>
   <meta content="" name="author"/>
   <title>
    Legislative reports | UCOP
   </title>
   <!-- Le HTML5 shim, for IE6-8 support of HTML elements -->
   <!--[if lt IE 9]>
      <script src="//html5shim.googlecode.com/svn/trunk/html5.js"></script>
    <![endif]-->
   <!-- Le styles -->
   <!-- main.css - see /_common/files/css/main.less non-minified sources -->
   <link href="/_common/files/css/main.css?v=1.2" media="screen" rel="stylesheet"/>
   <link href="/_common/files/css/print.css" media="print" rel="stylesheet"/>
   <!-- Le fav and touch icons -->
   <link href="/_common/files/img/ico/favicon.ico" rel="shortcut icon"/>
   <!-- <link href="/files/img/ico/apple-touch-icon.png" rel="apple-touch-icon"/>
<link href="/files/img/ico/apple-touch-icon-72x72.png" rel="apple-touch-icon" sizes="72x72"/>
<link href="/files/img/ico/apple-touch-icon-114x114.png" rel="apple-touch-icon" sizes="114x114"/> -->
   <!-- Custom styles -->
   <!--[if gt IE 8]><!-->
   <link href="/_common/files/css/ff-old.css" rel="stylesheet"/>
   <!--<![endif]-->
   <script src="/_common/files/js/protection.js">
   </script>
   <![CDATA[ ]]>
  </head>
  <body>
   <a class="skip-link" href="#content" title="skip to content">
    skip to content
   </a>
   <div id="bg-left">
   </div>
   <div id="bg-right">
   </div>
   <div id="wrapper">
    <div class="container">
     <!-- Pills
    ================================================== -->
     <div class="row mast">
      <div class="span8 main-nav offset4">
       <div class="navbar">
        <div class="navbar-inner">
         <a class="btn btn-navbar" data-target=".nav-collapse" data-toggle="collapse">
          Main Menu
          <b class="caret">
          </b>
         </a>
         <div aria-labelledby="ariatopnav" class="nav-collapse" role="navigation">
          <ul class="nav pull-right" id="ariatopnav">
           <li class="">
            <a href="http://www.ucop.edu/">
             Home
            </a>
           </li>
           <li class="">
            <a href="http://www.ucop.edu/about">
             About
            </a>
           </li>
           <li class="dropdown ">
            <a class="dropdown-toggle disabled" data-toggle="dropdown" href="http://www.ucop.edu/organization">
             Organization
             <b class="caret">
             </b>
            </a>
            <div class="dropdown-menu">
             <!-- links -->
             <div class="row">
              <div class="leftcol">
               <ul>
                <li>
                 <a class="mega-header" href="http://www.ucop.edu/finance-office/index.html">
                  Chief Financial Officer
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/operating-budget/index.html">
                  Budget Analysis &amp; Planning
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/capital-asset-strategies-finance/index.html">
                  Capital Asset Strategies &amp; Finance
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/financial-accounting/index.html">
                  Financial Accounting
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/procurement-services/index.html">
                  Procurement Services
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/risk-services/index.html">
                  Risk Services
                 </a>
                </li>
               </ul>
               <ul>
                <li>
                 <a class="mega-header" href="http://www.ucop.edu/business-operations/index.html">
                  Chief Operating Officer
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/energy-sustainability/index.html">
                  Energy &amp; Sustainability
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/human-resources/index.html">
                  Human Resources
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/information-technology-services/index.html">
                  Information Technology Services
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/operational-services/index.html">
                  Operational Services
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/pmo/index.html">
                  Strategy and Program Management Office
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/ucop-operations/index.html">
                  UCOP Operations
                 </a>
                </li>
               </ul>
              </div>
              <div class="midcol">
               <ul>
                <li>
                 <a class="mega-header" href="http://www.ucop.edu/academic-affairs/index.html">
                  Academic Affairs
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/academic-personnel-programs/index.html">
                  Academic Personnel &amp; Programs
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/diversity-engagement/index.html">
                  Diversity &amp; Engagement
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/institutional-research-academic-planning/index.html">
                  Institutional Research &amp; Academic Planning
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/research-graduate-studies/index.html">
                  Research &amp; Graduate Studies
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/student-affairs/index.html">
                  Student Affairs
                 </a>
                </li>
               </ul>
               <ul>
                <li>
                 <a class="mega-header" href="http://www.ucop.edu/government-relations/index.html">
                  Government Relations
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/federal-governmental-relations/index.html">
                  Federal Governmental Relations
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/state-governmental-relations/index.html">
                  State Governmental Relations
                 </a>
                </li>
               </ul>
               <ul>
                <li>
                 <a class="mega-header" href="http://www.ucop.edu/public-affairs/index.html">
                  Public Affairs
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/alumni-constituent-affairs/index.html">
                  Alumni &amp; Constituent Affairs
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/executive-communications/index.html">
                  Executive Communications &amp; Engagement
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/institutional-advancement/index.html">
                  Institutional Advancement
                 </a>
                </li>
                <li>
                 <a href="http://www.ucop.edu/marketing-communications/index.html">
                  Marketing Communications
                 </a>
                </li>
               </ul>
              </div>
              <div class="rightcol">
               <ul>
                <li>
                 <a class="mega-others" href="http://ucanr.edu/">
                  Agriculture &amp; Natural Resources
                 </a>
                </li>
                <li>
                 <a class="mega-others" href="http://www.ucop.edu/investment-office/index.html">
                  Chief Investment Officer
                 </a>
                </li>
                <li>
                 <a class="mega-others" href="http://www.ucop.edu/ethics-compliance-audit-services/index.html">
                  Ethics, Compliance &amp; Audit Services
                 </a>
                </li>
                <li>
                 <a class="mega-others" href="http://www.ucop.edu/general-counsel/index.html">
                  General Counsel
                 </a>
                </li>
                <li>
                 <a class="mega-others" href="http://www.ucop.edu/uc-health/index.html">
                  UC Health
                 </a>
                </li>
                <li>
                 <a class="mega-others" href="http://www.ucop.edu/impac/index.html">
                  Issues Management, Policy Analysis &amp; Coordination
                 </a>
                </li>
                <li>
                 <a class="mega-others" href="http://www.ucop.edu/media-relations/index.html">
                  Media Relations
                 </a>
                </li>
                <li>
                 <a class="mega-others" href="http://www.ucop.edu/innovation-entrepreneurship/index.html">
                  Office of Innovation &amp; Entrepreneurship
                 </a>
                </li>
                <li>
                 <a class="mega-others" href="http://www.ucop.edu/title-ix/index.html">
                  Systemwide Title IX Office
                 </a>
                </li>
                <li>
                 <a class="mega-others" href="http://www.ucop.edu/laboratory-management/index.html">
                  UC National Laboratories
                 </a>
                 <br/>
                </li>
                <li>
                 <a class="mega-others" href="http://senate.universityofcalifornia.edu/">
                  Academic Senate
                 </a>
                </li>
                <li>
                 <a class="mega-others" href="http://regents.universityofcalifornia.edu/">
                  Board of Regents
                 </a>
                </li>
                <li>
                 <a class="mega-others" href="http://www.ucop.edu/president/index.html">
                  President Napolitano
                 </a>
                </li>
               </ul>
              </div>
             </div>
            </div>
           </li>
           <li class="">
            <a href="http://www.ucop.edu/initiatives">
             Initiatives
            </a>
           </li>
          </ul>
          <ul class="nav pull-right" id="topmenu">
           <li>
            <a href="http://jobs.universityofcalifornia.edu">
             Jobs
            </a>
           </li>
           <li>
            <a href="/directory-search/index.php" title="Search the UCOP directory">
             People
            </a>
           </li>
           <li>
            <form action="/search" class="navbar-search" id="search-form" method="get">
             <!--
The search form's submit event is being handled
by an inline script further down the page
-->
             <label class="skip-link" for="query-field">
              search
             </label>
             <input class="search-query" id="query-field" name="q" placeholder="Search" type="text"/>
            </form>
           </li>
          </ul>
         </div>
        </div>
       </div>
      </div>
      <div class="span4 logo">
       <div id="logo">
        <a href="http://www.ucop.edu/index.html">
         <img alt="University of California" height="64" src="/_common/files/img/wordmark.png" width="240"/>
        </a>
       </div>
      </div>
     </div>
     <div class="row">
      <div class="span12">
       <ul class="breadcrumb">
        <li>
         <a href="http://www.ucop.edu/index.html">
          UCOP
         </a>
         <span class="divider">
          &gt;
         </span>
        </li>
        <li>
         <a href="/finance-office/index.html">
          CFO
         </a>
         <span class="divider">
          &gt;
         </span>
        </li>
        <li>
         <a href="../../index.html">
          Budget Analysis and Planning
         </a>
         <span class="divider">
          &gt;
         </span>
        </li>
        <li>
         <a href="../index.html">
          Budgets &amp; Reports
         </a>
         <span class="divider">
          &gt;
         </span>
        </li>
        <li>
         <a href="index.html">
          Legislative reports
         </a>
         <span class="divider">
          &gt;
         </span>
        </li>
        <li>
         <a href="2013-14-legislative-session.html">
          Legislative reports
         </a>
        </li>
       </ul>
      </div>
     </div>
     <div class="list-land" id="content">
      <!-- Main hero unit for a primary marketing message or call to action -->
      <div class="row">
       <div class="span12">
        <h1 class="page-header">
         Budget Analysis and Planning
        </h1>
       </div>
       <div class="span12">
        <ul class="nav nav-tabs sub-nav tab4">
         <li class="">
          <a class="" href="../../index.html">
           Overview
          </a>
         </li>
         <li class="">
          <a class="" href="../../staff/index.html">
           Staff
          </a>
         </li>
         <li class="active">
          <a class="" href="../index.html">
           Budgets &amp; Reports
          </a>
         </li>
         <li class="">
          <a class="" href="../../fees-and-enrollments/index.html">
           Fees &amp; Enrollments
          </a>
         </li>
        </ul>
       </div>
      </div>
      <!-- Example row of columns -->
      <div class="row">
       <div class="span8 dotted-top" role="main">
        <h2>
         Legislative reports
        </h2>
        <h3 class="subhead">
         2013-2014
        </h3>
        <table cellpadding="5" cellspacing="0" class="table-striped" id="report" summary="2009-10 Legislative Reports in a table with one level of column and row headers" width="100%">
         <tbody>
          <tr>
           <th scope="col">
           </th>
           <th scope="col">
            Date
           </th>
           <th scope="col">
            Report title
           </th>
          </tr>
          <tr>
           <td scope="row">
            1
           </td>
           <td scope="row">
            08/01/13
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/2013-14cobcp.pdf">
             2013-14 (EDU 92495) Proposed Capital Outlay Projects (2013-14 only) (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            2
           </td>
           <td scope="row">
            09/01/13
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/2014-15cobcp.pdf">
             2014-15  (EDU 92495) Proposed Capital Outlay Projects (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            3
           </td>
           <td scope="row">
            11/01/13
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/utilizationofclassroomsandresearchspace-11-1-13.pdf">
             Utilization of Classroom and Teaching Laboratories (pdf)
            </a>
            (
            <em>
             Final Report; biennial
            </em>
            )
           </td>
          </tr>
          <tr>
           <td scope="row">
            4
           </td>
           <td scope="row">
            11/01/13
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/instructionandresearchspacesummary-11-1-2013.pdf">
             Instruction and Research Space Summary &amp; Analysis (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            5
           </td>
           <td scope="row">
            11/15/13
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/seplegreport-1-21-14.pdf">
             Statewide Energy Partnership Program (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            6
           </td>
           <td scope="row">
            11/30/13
           </td>
           <td scope="row">
            <a href="http://ucop.edu/capital-planning/_files/capital/201323/_UC-Capital-Financial-Plan-2013.pdf">
             2013-23 Capital Financial Plan (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            7
           </td>
           <td scope="row">
            11/30/13
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/projectsavingsprogramlegrpt-11-2013.pdf">
             Projects Savings Funded from Capital Outlay Bond Funds (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            8
           </td>
           <td scope="row">
            12/01/13
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/streamlinedcapitaloutlayprojectslegrpt-12-13.pdf">
             Streamlined Capital Projects Funded from Capital (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            9
           </td>
           <td scope="row">
            01/01/14
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/annualgeneralobligationbondsaccountabilitylegrep-11-2013.pdf">
             Annual General Obligation Bonds Accountability (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            10
           </td>
           <td scope="row">
            01/01/14
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/2012-13sbulegrpt-3-31-14.pdf">
             Small Business Utilization (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            11
           </td>
           <td scope="row">
            01/01/14
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/instfinaid-prelimlegreport-2-10-14.pdf">
             Institutional Financial Aid Programs - Preliminary report (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            12
           </td>
           <td scope="row">
            01/10/14
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/summerenrollmentlegreport-2-18-2014.pdf">
             Summer Enrollment (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            13
           </td>
           <td scope="row">
            01/15/14
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/contractingoutforservicesatnewlydevelopedfacilitieslegrep-1-8-2014.pdf">
             Contracting Out for Services at Newly Developed Facilities (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            14
           </td>
           <td scope="row">
            03/01/14
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/performanceoutcomemeasureslegreport-March-2014.pdf">
             Performance Measures (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            15
           </td>
           <td scope="row">
            03/01/14
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/ELWRlegrpt-3-4-14.pdf">
             Entry Level Writing Requirement (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            16
           </td>
           <td scope="row">
            03/31/14
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/2012-13annualstudentsupportlegreport-4-1-14.pdf">
             Annual Report on Student Financial Support (pdf)
            </a>
            <a href="../../_files/legreports/1213/2011-12uc-mexicofacilityresearchandacadPrgms-casa.pdf">
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            17
           </td>
           <td scope="row">
            04/01/14
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/uniquestatewidepupilidentifierlegrpt-03-10-14.pdf">
             Unique Statewide Pupil Identifier (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            18
           </td>
           <td scope="row">
            04/01/14
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/progressreportucr-somlegrpt-4-2-14.pdf">
             Riverside School of Medicine (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            19
           </td>
           <td scope="row">
            04/01/14
           </td>
           <td scope="row">
            SAPEP Funds and Outcomes - N/A
            <br/>
           </td>
          </tr>
          <tr>
           <td scope="row">
            20
           </td>
           <td scope="row">
            05/15/14
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/2012-13ReceiptandUseofLotteryFundsLegReport-5-15-14.pdf">
             Receipt and Use of Lottery Funds (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
            21
           </td>
           <td scope="row">
            07/01/14
           </td>
           <td scope="row">
            <a href="../../_files/legreports/1314/5thAmendedProposedSEPListtoState-10-31-13.pdf">
             Cogeneration and Energy Consv Major Capital Projects (pdf)
            </a>
           </td>
          </tr>
          <tr>
           <td scope="row">
           </td>
           <td scope="row">
           </td>
           <td scope="row">
           </td>
          </tr>
          <tr>
           <td scope="row">
            <strong>
            </strong>
           </td>
           <td scope="row">
            <strong>
             Future Reports
            </strong>
           </td>
           <td scope="row">
           </td>
          </tr>
          <tr>
           <td scope="row">
            24
           </td>
           <td scope="row">
            12-
            <a>
            </a>
            31-15
           </td>
           <td scope="row">
            Breast Cancer Research Fund
           </td>
          </tr>
          <tr>
           <td scope="row">
            25
           </td>
           <td scope="row">
            12-31-15
           </td>
           <td scope="row">
            Cigarette and Tobacco Products Surtax Research Program
           </td>
          </tr>
          <tr>
           <td scope="row">
            26
           </td>
           <td scope="row">
            01-01-16
           </td>
           <td scope="row">
            Best Value Program
           </td>
          </tr>
          <tr>
           <td scope="row">
            27
           </td>
           <td scope="row">
            01-01-16
           </td>
           <td scope="row">
            California Subject Matter Programs
           </td>
          </tr>
          <tr>
           <td scope="row">
            28
           </td>
           <td scope="row">
            04-01-16
           </td>
           <td scope="row">
            COSMOS Program Outcomes
           </td>
          </tr>
         </tbody>
        </table>
       </div>
       <div class="span4 sidebar">
        <div class="row">
         <div class="span4 blustripe">
          <h5>
           Related resources
          </h5>
          <ul class="prp-dots">
           <li>
            <a href="index.html">
             Back to state-mandated reports
            </a>
           </li>
           <li>
            <a href="../index.html">
             View all budgets and reports
            </a>
           </li>
          </ul>
         </div>
        </div>
       </div>
      </div>
      <div class="row less3 novrflw">
      </div>
     </div>
     <!-- /content -->
    </div>
    <!-- /container -->
   </div>
   <!-- /wrapper -->
   <footer>
    <div>
     <!-- wrapper div cause the include only wants to display one div -->
     <div class="row footfirst">
      <div class="span12 footfirst-inner">
       <h2>
        UCOP Divisions &amp; Departments
       </h2>
       <div class="row-fluid">
        <div class="span4">
         <p>
          <a class="mega-header" href="http://www.ucop.edu/finance-office/index.html">
           Chief Financial Officer
          </a>
         </p>
         <ul>
          <li>
           <a href="http://www.ucop.edu/operating-budget/index.html">
            Budget Analysis &amp; Planning
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/capital-asset-strategies-finance/index.html">
            Capital Asset Strategies &amp; Finance
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/financial-accounting/index.html">
            Financial Accounting
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/procurement-services/index.html">
            Procurement Services
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/risk-services/index.html">
            Risk Services
           </a>
          </li>
         </ul>
         <p>
          <a class="mega-header" href="http://www.ucop.edu/business-operations/index.html">
           Chief Operating Officer
          </a>
         </p>
         <ul>
          <li>
           <a href="http://www.ucop.edu/energy-sustainability/index.html">
            Energy &amp; Sustainability
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/human-resources/index.html">
            Human Resources
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/information-technology-services/index.html">
            Information Technology Services
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/operational-services/index.html">
            Operational Services
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/pmo/index.html">
            Strategy and Program Management Office
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/ucop-operations/index.html">
            UCOP Operations
           </a>
          </li>
         </ul>
        </div>
        <div class="span4">
         <p>
          <a class="mega-header" href="http://www.ucop.edu/academic-affairs/index.html">
           Academic Affairs
          </a>
         </p>
         <ul>
          <li>
           <a href="http://www.ucop.edu/academic-personnel-programs/index.html">
            Academic Personnel &amp; Programs
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/diversity-engagement/index.html">
            Diversity &amp; Engagement
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/institutional-research-academic-planning/index.html">
            Institutional Research &amp; Academic Planning
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/research-graduate-studies/index.html">
            Research &amp; Graduate Studies
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/student-affairs/index.html">
            Student Affairs
           </a>
          </li>
         </ul>
         <p>
          <a class="mega-header" href="http://www.ucop.edu/government-relations/index.html">
           Government Relations
          </a>
         </p>
         <ul>
          <li>
           <a href="http://www.ucop.edu/federal-governmental-relations/index.html">
            Federal Governmental Relations
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/state-governmental-relations/index.html">
            State Governmental Relations
           </a>
          </li>
         </ul>
         <p>
          <a class="mega-header" href="http://www.ucop.edu/public-affairs/index.html">
           Public Affairs
          </a>
         </p>
         <ul>
          <li>
           <a href="http://www.ucop.edu/alumni-constituent-affairs/index.html">
            Alumni &amp; Constituent Affairs
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/executive-communications/index.html">
            Executive Communications &amp; Engagement
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/institutional-advancement/index.html">
            Institutional Advancement
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/marketing-communications/index.html">
            Marketing Communications
           </a>
          </li>
         </ul>
        </div>
        <div class="span4">
         <p>
          <a class="mega-others" href="http://ucanr.edu/">
           Agriculture &amp; Natural Resources
          </a>
         </p>
         <p>
          <a class="mega-others" href="http://www.ucop.edu/investment-office/index.html">
           Chief Investment Officer
          </a>
         </p>
         <p>
          <a class="mega-others" href="http://www.ucop.edu/ethics-compliance-audit-services/index.html">
           Ethics, Compliance &amp; Audit Services
          </a>
         </p>
         <p>
          <a class="mega-others" href="http://www.ucop.edu/general-counsel/index.html">
           General Counsel
          </a>
         </p>
         <p>
          <a class="mega-others" href="http://www.ucop.edu/uc-health/index.html">
           UC Health
          </a>
         </p>
         <p>
          <a class="mega-others" href="http://www.ucop.edu/impac/index.html">
           Issues Management, Policy Analysis &amp; Coordination
          </a>
         </p>
         <p>
          <a class="mega-others" href="http://www.ucop.edu/media-relations/index.html">
           Media Relations
          </a>
         </p>
         <p>
          <a class="mega-others" href="http://www.ucop.edu/innovation-entrepreneurship/index.html">
           Office of Innovation &amp; Entrepreneurship
          </a>
         </p>
         <p>
          <a class="mega-others" href="http://www.ucop.edu/laboratory-management/index.html">
           UC National Laboratories
          </a>
         </p>
         <p>
          <a class="mega-others" href="http://www.ucop.edu/title-ix/index.html">
           Systemwide Title IX Office
          </a>
         </p>
         <p>
          <a class="mega-others" href="http://www.ucop.edu/president/index.html">
           President Napolitano
          </a>
         </p>
        </div>
       </div>
      </div>
     </div>
     <div class="row footsec">
      <div class="span12 footsec-inner">
       <div class="row-fluid">
        <div class="span4">
         <ul>
          <li>
           <strong>
            <a href="http://www.universityofcalifornia.edu/" target="_blank">
             University of California
            </a>
           </strong>
          </li>
          <li>
           <strong>
            <a href="http://senate.universityofcalifornia.edu/" target="_blank">
             Academic Senate
            </a>
           </strong>
          </li>
          <li>
           <strong>
            <a href="http://regents.universityofcalifornia.edu/" target="_blank">
             Board of Regents
            </a>
           </strong>
          </li>
          <li>
           <strong>
            <a href="http://ucnet.universityofcalifornia.edu" target="_blank">
             UCnet
            </a>
           </strong>
          </li>
         </ul>
        </div>
        <div class="span4">
         <ul>
          <li>
           <a href="http://www.ucop.edu/accessibility/index.html">
            Accessibility
           </a>
          </li>
          <li>
           <a href="http://www.ucop.edu/terms/index.html">
            Terms of Use
           </a>
          </li>
         </ul>
        </div>
        <div class="span4">
         <ul class="pull-right">
          <li>
           <a href="http://www.facebook.com/universityofcalifornia" target="_blank">
            <img alt="UC on Facebook" height="25" src="/_common/files/img/icon_fb_yel_blu.gif" width="25"/>
           </a>
           <a href="https://twitter.com/UC_Newsroom" target="_blank">
            <img alt="UC on Twitter" height="25" src="/_common/files/img/icon_twtr_yel_blu.gif" width="25"/>
           </a>
          </li>
         </ul>
        </div>
       </div>
       <p>
        <br/>
        Copyright © Regents of the University of California
       </p>
      </div>
     </div>
    </div>
   </footer>
   <!-- Le javascript ================================================== -->
   <!-- jQuery & Bootstrap via CDN, fallback to local copy if CDN fails/blocked -->
   <script src="//ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js">
   </script>
   <script>
    /*<![CDATA[*/window.jQuery || document.write('<script src="/_common/files/js/jquery.1.7.1.min.js"><\/script>')/* ]]>*/
   </script>
   <script src="//netdna.bootstrapcdn.com/twitter-bootstrap/2.0.4/js/bootstrap.min.js">
   </script>
   <script>
    /*<![CDATA[*/$.fn.modal || document.write('<script src="/_common/files/js/bootstrap.2.0.4.min.js"><\/script>')/* ]]>*/
   </script>
   <!--[if gt IE 8]><!-->
   <script src="/_common/files/js/modernizr.js">
   </script>
   <!--<![endif]-->
   <!--[if lt IE 9]><script src="/files/js/selectivizr-min.js"></script><![endif]-->
   <script src="/_common/files/js/plugins.js">
   </script>
   <script src="/_common/files/js/main.js?v=1.2.2">
   </script>
   <script>
    (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

  ga('create', 'UA-18163990-2', 'auto');

// autotrack plugins
  ga('require', 'outboundLinkTracker');


  ga('send', 'pageview');
   </script>
   <script async="" src="/_common/files/js/autotrack.js">
   </script>
  </body>
 </html>
</html>
In [26]:
# Finding the Table through Tags
summary = soup.find("div",{'class':'list-land','id':'content'})
In [27]:
tables = summary.find_all('table')
len(tables)
Out[27]:
1
In [28]:
data = []
rows = tables[0].findAll('tr')
In [29]:
for tr in rows:
    cols = tr.findAll('td')
    # Check to see if the text is in the row
    for td in cols:
        text = td.find(text=True)
        print text
        data.append(text)
1
08/01/13
2013-14 (EDU 92495) Proposed Capital Outlay Projects (2013-14 only) (pdf)
2
09/01/13
2014-15  (EDU 92495) Proposed Capital Outlay Projects (pdf)
3
11/01/13
Utilization of Classroom and Teaching Laboratories (pdf)
4
11/01/13
Instruction and Research Space Summary & Analysis (pdf)
5
11/15/13
Statewide Energy Partnership Program (pdf)
6
11/30/13
2013-23 Capital Financial Plan (pdf)
7
11/30/13
Projects Savings Funded from Capital Outlay Bond Funds (pdf)
8
12/01/13
Streamlined Capital Projects Funded from Capital (pdf)
9
01/01/14
Annual General Obligation Bonds Accountability (pdf)
10
01/01/14
Small Business Utilization (pdf)
11
01/01/14
Institutional Financial Aid Programs - Preliminary report (pdf)
12
01/10/14
Summer Enrollment (pdf)
13
01/15/14
Contracting Out for Services at Newly Developed Facilities (pdf)
14
03/01/14
Performance Measures (pdf)
15
03/01/14
Entry Level Writing Requirement (pdf)
16
03/31/14
Annual Report on Student Financial Support (pdf)
17
04/01/14
Unique Statewide Pupil Identifier (pdf)
18
04/01/14
Riverside School of Medicine (pdf)
19
04/01/14
SAPEP Funds and Outcomes - N/A
20
05/15/14
Receipt and Use of Lottery Funds (pdf)
21
07/01/14
Cogeneration and Energy Consv Major Capital Projects (pdf)






 
Future Reports


24
12-
Breast Cancer Research Fund
25
12-31-15
Cigarette and Tobacco Products Surtax Research Program
26
01-01-16
Best Value Program
27
01-01-16
California Subject Matter Programs
28
04-01-16
COSMOS Program Outcomes
In [30]:
# Extracting the data
# Set up enpty lists
reports = []
date = []

# Set index counter
index = 0
# Go find the PDF cells
for index, item in enumerate(data): # enumerate function will set index counter
    if 'pdf' in item:
        #Add the date and reports
        date.append(data[index-1])

        #get rid of /xa0
        reports.append(item.replace(u'/xa0',u' '))
In [31]:
print reports
print data
[u'2013-14 (EDU 92495) Proposed Capital Outlay Projects (2013-14 only) (pdf)', u'2014-15\xa0 (EDU 92495) Proposed Capital Outlay Projects (pdf)', u'Utilization of Classroom and Teaching Laboratories (pdf)', u'Instruction and Research Space Summary & Analysis (pdf)', u'Statewide Energy Partnership Program (pdf)', u'2013-23 Capital Financial Plan (pdf)', u'Projects Savings Funded from Capital Outlay Bond Funds (pdf)', u'Streamlined Capital Projects Funded from Capital (pdf)', u'Annual General Obligation Bonds Accountability (pdf)', u'Small Business Utilization (pdf)', u'Institutional Financial Aid Programs - Preliminary report (pdf)', u'Summer Enrollment (pdf)', u'Contracting Out for Services at Newly Developed Facilities (pdf)', u'Performance Measures (pdf)', u'Entry Level Writing Requirement (pdf)', u'Annual Report on Student\xa0Financial Support (pdf)', u'Unique Statewide Pupil Identifier (pdf)', u'Riverside School of Medicine (pdf)', u'Receipt and Use of Lottery Funds (pdf)', u'Cogeneration and Energy Consv Major Capital Projects (pdf)']
[u'1', u'08/01/13', u'2013-14 (EDU 92495) Proposed Capital Outlay Projects (2013-14 only) (pdf)', u'2', u'09/01/13', u'2014-15\xa0 (EDU 92495) Proposed Capital Outlay Projects (pdf)', u'3', u'11/01/13', u'Utilization of Classroom and Teaching Laboratories (pdf)', u'4', u'11/01/13', u'Instruction and Research Space Summary & Analysis (pdf)', u'5', u'11/15/13', u'Statewide Energy Partnership Program (pdf)', u'6', u'11/30/13', u'2013-23 Capital Financial Plan (pdf)', u'7', u'11/30/13', u'Projects Savings Funded from Capital Outlay Bond Funds (pdf)', u'8', u'12/01/13', u'Streamlined Capital Projects Funded from Capital (pdf)', u'9', u'01/01/14', u'Annual General Obligation Bonds Accountability (pdf)', u'10', u'01/01/14', u'Small Business Utilization (pdf)', u'11', u'01/01/14', u'Institutional Financial Aid Programs - Preliminary report (pdf)', u'12', u'01/10/14', u'Summer Enrollment (pdf)', u'13', u'01/15/14', u'Contracting Out for Services at Newly Developed Facilities (pdf)', u'14', u'03/01/14', u'Performance Measures (pdf)', u'15', u'03/01/14', u'Entry Level Writing Requirement (pdf)', u'16', u'03/31/14', u'Annual Report on Student\xa0Financial Support (pdf)', u'17', u'04/01/14', u'Unique Statewide Pupil Identifier (pdf)', u'18', u'04/01/14', u'Riverside School of Medicine (pdf)', u'19', u'04/01/14', u'SAPEP Funds and Outcomes - N/A', u'20', u'05/15/14', u'Receipt and Use of Lottery Funds (pdf)', u'21', u'07/01/14', u'Cogeneration and Energy Consv Major Capital Projects (pdf)', u'\n', u'\n', u'\n', u'\xa0', u'Future Reports', u'\n', u'24', u'12-', u'Breast Cancer Research Fund', u'25', u'12-31-15', u'Cigarette and Tobacco Products Surtax Research Program', u'26', u'01-01-16', u'Best Value Program', u'27', u'01-01-16', u'California Subject Matter Programs', u'28', u'04-01-16', u'COSMOS Program Outcomes']
In [32]:
date = Series(date)
reports = Series(reports)
print date
print reports
0     08/01/13
1     09/01/13
2     11/01/13
3     11/01/13
4     11/15/13
5     11/30/13
6     11/30/13
7     12/01/13
8     01/01/14
9     01/01/14
10    01/01/14
11    01/10/14
12    01/15/14
13    03/01/14
14    03/01/14
15    03/31/14
16    04/01/14
17    04/01/14
18    05/15/14
19    07/01/14
dtype: object
0     2013-14 (EDU 92495) Proposed Capital Outlay Pr...
1     2014-15  (EDU 92495) Proposed Capital Outlay P...
2     Utilization of Classroom and Teaching Laborato...
3     Instruction and Research Space Summary & Analy...
4            Statewide Energy Partnership Program (pdf)
5                  2013-23 Capital Financial Plan (pdf)
6     Projects Savings Funded from Capital Outlay Bo...
7     Streamlined Capital Projects Funded from Capit...
8     Annual General Obligation Bonds Accountability...
9                      Small Business Utilization (pdf)
10    Institutional Financial Aid Programs - Prelimi...
11                              Summer Enrollment (pdf)
12    Contracting Out for Services at Newly Develope...
13                           Performance Measures (pdf)
14                Entry Level Writing Requirement (pdf)
15     Annual Report on Student Financial Support (pdf)
16              Unique Statewide Pupil Identifier (pdf)
17                   Riverside School of Medicine (pdf)
18               Receipt and Use of Lottery Funds (pdf)
19    Cogeneration and Energy Consv Major Capital Pr...
dtype: object
In [33]:
legislative_df = pd.concat([date,reports],axis=1)
legislative_df
Out[33]:
0 1
0 08/01/13 2013-14 (EDU 92495) Proposed Capital Outlay Pr...
1 09/01/13 2014-15  (EDU 92495) Proposed Capital Outlay P...
2 11/01/13 Utilization of Classroom and Teaching Laborato...
3 11/01/13 Instruction and Research Space Summary & Analy...
4 11/15/13 Statewide Energy Partnership Program (pdf)
5 11/30/13 2013-23 Capital Financial Plan (pdf)
6 11/30/13 Projects Savings Funded from Capital Outlay Bo...
7 12/01/13 Streamlined Capital Projects Funded from Capit...
8 01/01/14 Annual General Obligation Bonds Accountability...
9 01/01/14 Small Business Utilization (pdf)
10 01/01/14 Institutional Financial Aid Programs - Prelimi...
11 01/10/14 Summer Enrollment (pdf)
12 01/15/14 Contracting Out for Services at Newly Develope...
13 03/01/14 Performance Measures (pdf)
14 03/01/14 Entry Level Writing Requirement (pdf)
15 03/31/14 Annual Report on Student Financial Support (pdf)
16 04/01/14 Unique Statewide Pupil Identifier (pdf)
17 04/01/14 Riverside School of Medicine (pdf)
18 05/15/14 Receipt and Use of Lottery Funds (pdf)
19 07/01/14 Cogeneration and Energy Consv Major Capital Pr...
In [34]:
legislative_df.columns = ['Date','Reports']
legislative_df
Out[34]:
Date Reports
0 08/01/13 2013-14 (EDU 92495) Proposed Capital Outlay Pr...
1 09/01/13 2014-15  (EDU 92495) Proposed Capital Outlay P...
2 11/01/13 Utilization of Classroom and Teaching Laborato...
3 11/01/13 Instruction and Research Space Summary & Analy...
4 11/15/13 Statewide Energy Partnership Program (pdf)
5 11/30/13 2013-23 Capital Financial Plan (pdf)
6 11/30/13 Projects Savings Funded from Capital Outlay Bo...
7 12/01/13 Streamlined Capital Projects Funded from Capit...
8 01/01/14 Annual General Obligation Bonds Accountability...
9 01/01/14 Small Business Utilization (pdf)
10 01/01/14 Institutional Financial Aid Programs - Prelimi...
11 01/10/14 Summer Enrollment (pdf)
12 01/15/14 Contracting Out for Services at Newly Develope...
13 03/01/14 Performance Measures (pdf)
14 03/01/14 Entry Level Writing Requirement (pdf)
15 03/31/14 Annual Report on Student Financial Support (pdf)
16 04/01/14 Unique Statewide Pupil Identifier (pdf)
17 04/01/14 Riverside School of Medicine (pdf)
18 05/15/14 Receipt and Use of Lottery Funds (pdf)
19 07/01/14 Cogeneration and Energy Consv Major Capital Pr...

Using Pandas to Explore/Manipulate Data

In [35]:
import pandas as pd
import numpy as np

sales = pd.read_excel('sales-estimate.xlsx')
sales.head(20)
Out[35]:
Account Name State Rep Manager Current_Price Quantity New_Product_Price
0 714466 Trantow-Barrows MN Craig Booker Debra Henley 500 100 550
1 737550 Fritsch, Russel and Anderson MN Craig Booker Debra Henley 600 90 725
2 146832 Kiehn-Spinka TX Daniel Hilton Debra Henley 225 475 255
3 218895 Kulas Inc TX Daniel Hilton Debra Henley 290 375 300
4 412290 Jerde-Hilpert WI John Smith Debra Henley 375 400 400
5 740150 Barton LLC WI John Smith Debra Henley 550 100 600
6 141962 Herman LLC CA Cedric Moss Fred Anderson 400 200 425
7 163416 Purdy-Kunde CA Cedric Moss Fred Anderson 450 150 475
8 239344 Stokes LLC WA Cedric Moss Fred Anderson 550 75 610
9 307599 Kassulke, Ondricka and Metz NV Wendy Yule Fred Anderson 275 450 300
10 688981 Keeling LLC NV Wendy Yule Fred Anderson 300 250 350
11 729833 Koepp Ltd NV Wendy Yule Fred Anderson 350 100 375
In [36]:
print sales['Current_Price'].mean(), sales['New_Product_Price'].mean()
405.416666667 447.083333333
In [37]:
total_shoes = sales['Quantity'].sum()
print total_shoes
2765
In [38]:
weightedAvg = (sales['Current_Price'] * sales['Quantity']).sum()/float(total_shoes)
print weightedAvg
342.540687161
In [39]:
weightAveNew = (sales['New_Product_Price'] * sales['Quantity']).sum()/float(sales['Quantity'].sum())
print weightAveNew
374.638336347
In [40]:
# GroupBy function allows you to group the data by items within a column

sales.groupby('Manager')[['Current_Price','New_Product_Price']].mean()
Out[40]:
Current_Price New_Product_Price
Manager
Debra Henley 423.333333 471.666667
Fred Anderson 387.500000 422.500000
In [41]:
def wavg(group,avg_name,weight_name):
    d = group[avg_name]
    w = group[weight_name]

    try:
        return (d*w).sum()/float(w.sum())
    except ZeroDivisionError:
        return np.nan
In [42]:
print wavg(sales,'Current_Price','Quantity'), wavg(sales,'New_Product_Price','Quantity')
342.540687161 374.638336347
In [43]:
print sales.groupby('Manager').apply(wavg,'Current_Price','Quantity')
print ''
print sales.groupby('Rep').apply(wavg,'Current_Price','Quantity')
Manager
Debra Henley     340.665584
Fred Anderson    344.897959
dtype: float64

Rep
Cedric Moss      444.117647
Craig Booker     547.368421
Daniel Hilton    253.676471
John Smith       410.000000
Wendy Yule       292.187500
dtype: float64
In [44]:
print sales.groupby(['Manager','State']).apply(wavg,'Current_Price','Quantity')
Manager        State
Debra Henley   MN       547.368421
               TX       253.676471
               WI       410.000000
Fred Anderson  CA       421.428571
               NV       292.187500
               WA       550.000000
dtype: float64
In [45]:
f = {'New_Product_Price':['mean'],'Current_Price':['median'],'Quantity':['sum','mean']}
sales.groupby('Manager').agg(f)
Out[45]:
Current_Price New_Product_Price Quantity
median mean sum mean
Manager
Debra Henley 437.5 471.666667 1540 256.666667
Fred Anderson 375.0 422.500000 1225 204.166667
In [46]:
data_1 = sales.groupby('Manager').apply(wavg,'New_Product_Price','Quantity')
data_2 = sales.groupby('Manager').apply(wavg,'Current_Price','Quantity')
print data_1
print data_2
Manager
Debra Henley     372.646104
Fred Anderson    377.142857
dtype: float64
Manager
Debra Henley     340.665584
Fred Anderson    344.897959
dtype: float64
In [47]:
summary = pd.DataFrame(data=dict(s1=data_1,s2=data_2))
summary.head()
Out[47]:
s1 s2
Manager
Debra Henley 372.646104 340.665584
Fred Anderson 377.142857 344.897959
In [48]:
print summary.columns
summary.columns = ['New Product Price','Current Product Price']
summary.head()
Index([u's1', u's2'], dtype='object')
Out[48]:
New Product Price Current Product Price
Manager
Debra Henley 372.646104 340.665584
Fred Anderson 377.142857 344.897959
In [49]:
# Numpy has a built-in weighted average function

np.average(sales['Current_Price'],weights=sales['Quantity'])
Out[49]:
342.54068716094031
In [50]:
sales.groupby('Manager').apply(lambda x: np.average(x['New_Product_Price'], weights = x['Quantity']))
Out[50]:
Manager
Debra Henley     372.646104
Fred Anderson    377.142857
dtype: float64
In [51]:
import pandas as pd

df = pd.read_excel('excel-comp-data.xlsx')
df.head()
Out[51]:
account name street city state postal-code Jan Feb Mar
0 211829 Kerluke, Koepp and Hilpert 34456 Sean Highway New Jaycob Texas 28752 10000 62000 35000
1 320563 Walter-Trantow 1311 Alvis Tunnel Port Khadijah NorthCarolina 38365 95000 45000 35000
2 648336 Bashirian, Kunde and Price 62184 Schamberger Underpass Apt. 231 New Lilianland Iowa 76517 91000 120000 35000
3 109996 D'Amore, Gleichner and Bode 155 Fadel Crescent Apt. 144 Hyattburgh Maine 46021 45000 120000 10000
4 121213 Bauch-Goldner 7274 Marissa Common Shanahanchester California 49681 162000 120000 35000
In [52]:
df['Total'] = df['Jan']+df['Feb']+df['Mar']
df.head()
Out[52]:
account name street city state postal-code Jan Feb Mar Total
0 211829 Kerluke, Koepp and Hilpert 34456 Sean Highway New Jaycob Texas 28752 10000 62000 35000 107000
1 320563 Walter-Trantow 1311 Alvis Tunnel Port Khadijah NorthCarolina 38365 95000 45000 35000 175000
2 648336 Bashirian, Kunde and Price 62184 Schamberger Underpass Apt. 231 New Lilianland Iowa 76517 91000 120000 35000 246000
3 109996 D'Amore, Gleichner and Bode 155 Fadel Crescent Apt. 144 Hyattburgh Maine 46021 45000 120000 10000 175000
4 121213 Bauch-Goldner 7274 Marissa Common Shanahanchester California 49681 162000 120000 35000 317000
In [53]:
sum_row = df[['Jan','Feb','Mar','Total']].sum()
print sum_row
Jan      1462000
Feb      1507000
Mar       717000
Total    3686000
dtype: int64
In [54]:
df.describe()
Out[54]:
account postal-code Jan Feb Mar Total
count 15.000000 15.000000 15.000000 15.000000 15.000000 15.000000
mean 236509.466667 46400.600000 97466.666667 100466.666667 47800.000000 245733.333333
std 128420.806565 17343.118919 51159.228558 34450.516618 34996.326338 67072.952465
min 109996.000000 18008.000000 10000.000000 10000.000000 10000.000000 107000.000000
25% 175142.500000 31667.000000 58500.000000 95000.000000 35000.000000 205000.000000
50% 212303.000000 46308.000000 91000.000000 120000.000000 35000.000000 246000.000000
75% 255561.500000 58123.000000 150000.000000 120000.000000 45000.000000 311000.000000
max 648336.000000 76517.000000 162000.000000 120000.000000 162000.000000 340000.000000
In [55]:
df_sum = pd.DataFrame(data=sum_row).T #.T allows you to transpose your matrix
df_sum
Out[55]:
Jan Feb Mar Total
0 1462000 1507000 717000 3686000
In [56]:
df_sum = df_sum.reindex(columns=df.columns)
df_sum
Out[56]:
account name street city state postal-code Jan Feb Mar Total
0 NaN NaN NaN NaN NaN NaN 1462000 1507000 717000 3686000
In [57]:
df_final = df.append(df_sum,ignore_index=True)
df_final.tail()
Out[57]:
account name street city state postal-code Jan Feb Mar Total
11 231907.0 Hahn-Moore 18115 Olivine Throughway Norbertomouth NorthDakota 31415.0 150000 10000 162000 322000
12 242368.0 Frami, Anderson and Donnelly 182 Bertie Road East Davian Iowa 72686.0 162000 120000 35000 317000
13 268755.0 Walsh-Haley 2624 Beatty Parkways Goodwinmouth RhodeIsland 31919.0 55000 120000 35000 210000
14 273274.0 McDermott PLC 8917 Bergstrom Meadow Kathryneborough Delaware 27933.0 150000 120000 70000 340000
15 NaN NaN NaN NaN NaN NaN 1462000 1507000 717000 3686000
In [58]:
import pandas as pd

df = pd.read_excel('sample-salesv3.xlsx')
df.head()
Out[58]:
account number name sku quantity unit price ext price date
0 740150 Barton LLC B1-20000 39 86.69 3380.91 2014-01-01 07:21:51
1 714466 Trantow-Barrows S2-77896 -1 63.16 -63.16 2014-01-01 10:00:47
2 218895 Kulas Inc B1-69924 23 90.70 2086.10 2014-01-01 13:24:58
3 307599 Kassulke, Ondricka and Metz S1-65481 41 21.05 863.05 2014-01-01 15:05:22
4 412290 Jerde-Hilpert S2-34077 6 83.21 499.26 2014-01-01 23:26:55
In [59]:
# Find Data Types
df.info()
df.dtypes
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1500 entries, 0 to 1499
Data columns (total 7 columns):
account number    1500 non-null int64
name              1500 non-null object
sku               1500 non-null object
quantity          1500 non-null int64
unit price        1500 non-null float64
ext price         1500 non-null float64
date              1500 non-null object
dtypes: float64(2), int64(2), object(3)
memory usage: 82.1+ KB
Out[59]:
account number      int64
name               object
sku                object
quantity            int64
unit price        float64
ext price         float64
date               object
dtype: object
In [60]:
# Creating DateTime Variables
df['date'] = pd.to_datetime(df['date'])
df.head()
df.dtypes
Out[60]:
account number             int64
name                      object
sku                       object
quantity                   int64
unit price               float64
ext price                float64
date              datetime64[ns]
dtype: object
In [61]:
# Filtering the data
df[(df['account number']==307599) & (df['quantity']>20)].head()
Out[61]:
account number name sku quantity unit price ext price date
3 307599 Kassulke, Ondricka and Metz S1-65481 41 21.05 863.05 2014-01-01 15:05:22
34 307599 Kassulke, Ondricka and Metz S2-78676 35 33.04 1156.40 2014-01-10 05:26:31
58 307599 Kassulke, Ondricka and Metz B1-20000 22 37.87 833.14 2014-01-15 16:22:22
70 307599 Kassulke, Ondricka and Metz S2-10342 44 96.79 4258.76 2014-01-18 06:32:31
143 307599 Kassulke, Ondricka and Metz S1-27722 32 95.66 3061.12 2014-02-03 11:27:34
In [62]:
# Startswith function allows you to choose specific variables
# Since we are filtering within a single column or "series" we use the .map function to manipulate the data
df[df['sku'].map(lambda x: x.startswith('B1'))].head()
Out[62]:
account number name sku quantity unit price ext price date
0 740150 Barton LLC B1-20000 39 86.69 3380.91 2014-01-01 07:21:51
2 218895 Kulas Inc B1-69924 23 90.70 2086.10 2014-01-01 13:24:58
6 218895 Kulas Inc B1-65551 2 31.10 62.20 2014-01-02 10:57:23
14 737550 Fritsch, Russel and Anderson B1-53102 23 71.56 1645.88 2014-01-04 08:57:48
17 239344 Stokes LLC B1-50809 14 16.23 227.22 2014-01-04 22:14:32
In [63]:
df[(df['sku'].map(lambda x: x.startswith('B1'))) & (df['quantity']>22)].head()
# Filtering data for 
Out[63]:
account number name sku quantity unit price ext price date
0 740150 Barton LLC B1-20000 39 86.69 3380.91 2014-01-01 07:21:51
2 218895 Kulas Inc B1-69924 23 90.70 2086.10 2014-01-01 13:24:58
14 737550 Fritsch, Russel and Anderson B1-53102 23 71.56 1645.88 2014-01-04 08:57:48
26 737550 Fritsch, Russel and Anderson B1-53636 42 42.06 1766.52 2014-01-08 00:02:11
31 714466 Trantow-Barrows B1-33087 32 19.56 625.92 2014-01-09 10:16:32
In [64]:
df[df['account number'].isin([737550,740150])].head()
Out[64]:
account number name sku quantity unit price ext price date
0 740150 Barton LLC B1-20000 39 86.69 3380.91 2014-01-01 07:21:51
9 737550 Fritsch, Russel and Anderson S2-82423 14 81.92 1146.88 2014-01-03 19:07:37
14 737550 Fritsch, Russel and Anderson B1-53102 23 71.56 1645.88 2014-01-04 08:57:48
26 737550 Fritsch, Russel and Anderson B1-53636 42 42.06 1766.52 2014-01-08 00:02:11
32 737550 Fritsch, Russel and Anderson S1-27722 20 29.54 590.80 2014-01-09 13:20:40
In [65]:
df[df['name'].str.contains('Inc')].head(10)
Out[65]:
account number name sku quantity unit price ext price date
2 218895 Kulas Inc B1-69924 23 90.70 2086.10 2014-01-01 13:24:58
6 218895 Kulas Inc B1-65551 2 31.10 62.20 2014-01-02 10:57:23
33 218895 Kulas Inc S1-06532 3 22.36 67.08 2014-01-09 23:58:27
36 218895 Kulas Inc S2-34077 16 73.04 1168.64 2014-01-10 12:07:30
43 218895 Kulas Inc B1-50809 43 47.21 2030.03 2014-01-12 01:54:37
47 218895 Kulas Inc S1-47412 1 94.01 94.01 2014-01-13 08:36:18
65 218895 Kulas Inc S1-93683 43 21.72 933.96 2014-01-17 16:18:29
88 218895 Kulas Inc S2-83881 41 78.27 3209.07 2014-01-20 09:37:58
95 218895 Kulas Inc S2-16558 20 57.96 1159.20 2014-01-21 18:16:45
102 218895 Kulas Inc S1-93683 21 49.59 1041.39 2014-01-22 23:49:39
In [66]:
df.query('name == ["Kulas Inc","Barton LLC"]').head(10)
Out[66]:
account number name sku quantity unit price ext price date
0 740150 Barton LLC B1-20000 39 86.69 3380.91 2014-01-01 07:21:51
2 218895 Kulas Inc B1-69924 23 90.70 2086.10 2014-01-01 13:24:58
6 218895 Kulas Inc B1-65551 2 31.10 62.20 2014-01-02 10:57:23
33 218895 Kulas Inc S1-06532 3 22.36 67.08 2014-01-09 23:58:27
36 218895 Kulas Inc S2-34077 16 73.04 1168.64 2014-01-10 12:07:30
43 218895 Kulas Inc B1-50809 43 47.21 2030.03 2014-01-12 01:54:37
47 218895 Kulas Inc S1-47412 1 94.01 94.01 2014-01-13 08:36:18
65 218895 Kulas Inc S1-93683 43 21.72 933.96 2014-01-17 16:18:29
85 740150 Barton LLC B1-50809 8 19.60 156.80 2014-01-20 01:48:47
88 218895 Kulas Inc S2-83881 41 78.27 3209.07 2014-01-20 09:37:58
In [67]:
df = df.sort_values('date')
df.head(10)
Out[67]:
account number name sku quantity unit price ext price date
0 740150 Barton LLC B1-20000 39 86.69 3380.91 2014-01-01 07:21:51
1 714466 Trantow-Barrows S2-77896 -1 63.16 -63.16 2014-01-01 10:00:47
2 218895 Kulas Inc B1-69924 23 90.70 2086.10 2014-01-01 13:24:58
3 307599 Kassulke, Ondricka and Metz S1-65481 41 21.05 863.05 2014-01-01 15:05:22
4 412290 Jerde-Hilpert S2-34077 6 83.21 499.26 2014-01-01 23:26:55
5 714466 Trantow-Barrows S2-77896 17 87.63 1489.71 2014-01-02 10:07:15
6 218895 Kulas Inc B1-65551 2 31.10 62.20 2014-01-02 10:57:23
7 729833 Koepp Ltd S1-30248 8 33.25 266.00 2014-01-03 06:32:11
8 714466 Trantow-Barrows S1-50961 22 84.09 1849.98 2014-01-03 11:29:02
9 737550 Fritsch, Russel and Anderson S2-82423 14 81.92 1146.88 2014-01-03 19:07:37
In [68]:
df[df['date'] >= '10-10-2014'].head(10)
# Since our dates are in datetime format, you can read in many formats of date and time
Out[68]:
account number name sku quantity unit price ext price date
1174 257198 Cronin, Oberbrunner and Spencer S2-34077 13 12.24 159.12 2014-10-10 02:59:06
1175 740150 Barton LLC S1-65481 28 53.00 1484.00 2014-10-10 15:08:53
1176 146832 Kiehn-Spinka S1-27722 15 64.39 965.85 2014-10-10 18:24:01
1177 257198 Cronin, Oberbrunner and Spencer S2-16558 3 35.34 106.02 2014-10-11 01:48:13
1178 737550 Fritsch, Russel and Anderson B1-53636 10 56.95 569.50 2014-10-11 10:25:53
1179 737550 Fritsch, Russel and Anderson S1-06532 36 40.42 1455.12 2014-10-11 20:04:09
1180 729833 Koepp Ltd B1-53102 6 39.89 239.34 2014-10-11 20:43:21
1181 163416 Purdy-Kunde S1-06532 34 79.87 2715.58 2014-10-12 01:21:27
1182 737550 Fritsch, Russel and Anderson B1-33087 8 66.92 535.36 2014-10-12 07:35:14
1183 239344 Stokes LLC S1-93683 3 17.43 52.29 2014-10-12 08:56:05
In [69]:
df[(df['date'] >= '2014-04-01') & (df['date'] <= '2014-06-01')].head(10)
Out[69]:
account number name sku quantity unit price ext price date
384 383080 Will LLC S2-34077 19 52.21 991.99 2014-04-01 02:27:22
385 714466 Trantow-Barrows S2-00301 8 65.91 527.28 2014-04-01 17:59:55
386 307599 Kassulke, Ondricka and Metz B1-65551 48 43.44 2085.12 2014-04-01 20:48:35
387 307599 Kassulke, Ondricka and Metz S2-10342 37 18.44 682.28 2014-04-01 21:27:10
388 218895 Kulas Inc S2-77896 5 52.86 264.30 2014-04-01 22:57:04
389 218895 Kulas Inc S1-47412 42 93.63 3932.46 2014-04-02 02:27:06
390 729833 Koepp Ltd S1-30248 2 68.02 136.04 2014-04-02 07:32:39
391 604255 Halvorson, Crona and Champlin S1-06532 24 11.21 269.04 2014-04-02 19:25:34
392 786968 Frami, Hills and Schmidt S2-16558 10 44.95 449.50 2014-04-03 05:19:42
393 672390 Kuhn-Gusikowski S2-11481 37 33.20 1228.40 2014-04-03 14:06:35
In [70]:
df2 = df.set_index(['date'])
df2.head()
Out[70]:
account number name sku quantity unit price ext price
date
2014-01-01 07:21:51 740150 Barton LLC B1-20000 39 86.69 3380.91
2014-01-01 10:00:47 714466 Trantow-Barrows S2-77896 -1 63.16 -63.16
2014-01-01 13:24:58 218895 Kulas Inc B1-69924 23 90.70 2086.10
2014-01-01 15:05:22 307599 Kassulke, Ondricka and Metz S1-65481 41 21.05 863.05
2014-01-01 23:26:55 412290 Jerde-Hilpert S2-34077 6 83.21 499.26
In [71]:
df2['2014-01-01':'2014-02-10'].head(8)
Out[71]:
account number name sku quantity unit price ext price
date
2014-01-01 07:21:51 740150 Barton LLC B1-20000 39 86.69 3380.91
2014-01-01 10:00:47 714466 Trantow-Barrows S2-77896 -1 63.16 -63.16
2014-01-01 13:24:58 218895 Kulas Inc B1-69924 23 90.70 2086.10
2014-01-01 15:05:22 307599 Kassulke, Ondricka and Metz S1-65481 41 21.05 863.05
2014-01-01 23:26:55 412290 Jerde-Hilpert S2-34077 6 83.21 499.26
2014-01-02 10:07:15 714466 Trantow-Barrows S2-77896 17 87.63 1489.71
2014-01-02 10:57:23 218895 Kulas Inc B1-65551 2 31.10 62.20
2014-01-03 06:32:11 729833 Koepp Ltd S1-30248 8 33.25 266.00
In [72]:
df['name'].nunique()
df['name'].unique()
Out[72]:
array([u'Barton LLC', u'Trantow-Barrows', u'Kulas Inc',
       u'Kassulke, Ondricka and Metz', u'Jerde-Hilpert', u'Koepp Ltd',
       u'Fritsch, Russel and Anderson', u'Kiehn-Spinka', u'Keeling LLC',
       u'Frami, Hills and Schmidt', u'Stokes LLC', u'Kuhn-Gusikowski',
       u'Herman LLC', u'White-Trantow', u'Sanford and Sons',
       u'Pollich LLC', u'Will LLC', u'Cronin, Oberbrunner and Spencer',
       u'Halvorson, Crona and Champlin', u'Purdy-Kunde'], dtype=object)
In [73]:
df['account number'].nunique()
Out[73]:
20
In [74]:
df.drop_duplicates(subset=['account number','name'])
Out[74]:
account number name sku quantity unit price ext price date
0 740150 Barton LLC B1-20000 39 86.69 3380.91 2014-01-01 07:21:51
1 714466 Trantow-Barrows S2-77896 -1 63.16 -63.16 2014-01-01 10:00:47
2 218895 Kulas Inc B1-69924 23 90.70 2086.10 2014-01-01 13:24:58
3 307599 Kassulke, Ondricka and Metz S1-65481 41 21.05 863.05 2014-01-01 15:05:22
4 412290 Jerde-Hilpert S2-34077 6 83.21 499.26 2014-01-01 23:26:55
7 729833 Koepp Ltd S1-30248 8 33.25 266.00 2014-01-03 06:32:11
9 737550 Fritsch, Russel and Anderson S2-82423 14 81.92 1146.88 2014-01-03 19:07:37
10 146832 Kiehn-Spinka S2-82423 15 67.74 1016.10 2014-01-03 19:39:53
11 688981 Keeling LLC S2-00301 7 20.26 141.82 2014-01-04 00:02:36
12 786968 Frami, Hills and Schmidt S2-23246 6 61.31 367.86 2014-01-04 06:51:53
15 239344 Stokes LLC S1-06532 34 71.51 2431.34 2014-01-04 11:34:58
16 672390 Kuhn-Gusikowski S1-30248 14 72.75 1018.50 2014-01-04 19:59:02
18 141962 Herman LLC S1-82801 10 94.30 943.00 2014-01-05 15:12:16
20 424914 White-Trantow S2-00301 45 96.95 4362.75 2014-01-06 08:20:27
21 527099 Sanford and Sons S1-30248 24 50.29 1206.96 2014-01-06 14:14:00
30 642753 Pollich LLC S1-93683 4 87.18 348.72 2014-01-09 00:57:12
37 383080 Will LLC S2-23246 23 59.98 1379.54 2014-01-10 13:44:08
51 257198 Cronin, Oberbrunner and Spencer B1-05914 8 23.05 184.40 2014-01-14 01:57:35
67 604255 Halvorson, Crona and Champlin B1-65551 34 62.86 2137.24 2014-01-17 19:00:16
106 163416 Purdy-Kunde S1-47412 31 54.32 1683.92 2014-01-24 04:14:33
In [75]:
data = pd.read_csv('combine.csv')
data.describe()
Out[75]:
year heightfeet heightinches heightinchestotal weight arms hands fortyyd twentyyd tenyd twentyss threecone vertical broad bench round pickround picktotal wonderlic nflgrade
count 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000 4947.000000
mean 2007.132201 5.805337 4.371437 74.035476 245.579745 7.640243 2.252426 4.610386 0.073734 0.129149 3.298106 1.503002 28.741257 95.944006 15.723873 2.435415 11.318981 71.879523 1.144532 0.700627
std 5.029664 0.395981 3.279223 2.614778 45.639366 13.801035 4.070448 0.974087 0.432330 0.436941 1.907526 2.929683 11.596749 41.826340 10.840896 2.476746 12.262220 79.417514 5.524047 1.813870
min 1999.000000 5.000000 0.000000 65.000000 155.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 2003.000000 6.000000 2.000000 73.000000 208.000000 0.000000 0.000000 4.530000 0.000000 0.000000 3.835000 0.000000 28.000000 101.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
50% 2007.000000 6.000000 4.000000 74.000000 237.000000 0.000000 0.000000 4.690000 0.000000 0.000000 4.240000 0.000000 32.500000 112.000000 18.000000 2.000000 7.000000 42.000000 0.000000 0.000000
75% 2012.000000 6.000000 6.000000 76.000000 289.000000 0.000000 0.000000 4.990000 0.000000 0.000000 4.470000 0.000000 35.500000 119.000000 24.000000 5.000000 21.000000 134.000000 0.000000 0.000000
max 2015.000000 6.000000 11.875000 82.000000 386.000000 37.750000 11.375000 6.050000 2.980000 1.920000 5.560000 8.310000 46.000000 147.000000 51.000000 8.000000 53.000000 262.000000 48.000000 7.500000
In [76]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4947 entries, 0 to 4946
Data columns (total 26 columns):
year                 4947 non-null int64
name                 4947 non-null object
firstname            4947 non-null object
lastname             4947 non-null object
position             4947 non-null object
heightfeet           4947 non-null int64
heightinches         4947 non-null float64
heightinchestotal    4947 non-null float64
weight               4947 non-null int64
arms                 4947 non-null float64
hands                4947 non-null float64
fortyyd              4947 non-null float64
twentyyd             4947 non-null float64
tenyd                4947 non-null float64
twentyss             4947 non-null float64
threecone            4947 non-null float64
vertical             4947 non-null float64
broad                4947 non-null int64
bench                4947 non-null int64
round                4947 non-null int64
college              3477 non-null object
pick                 3156 non-null object
pickround            4947 non-null int64
picktotal            4947 non-null int64
wonderlic            4947 non-null int64
nflgrade             4947 non-null float64
dtypes: float64(11), int64(9), object(6)
memory usage: 1004.9+ KB
In [77]:
data.isnull().sum()
# Tells you where you have null values in the dataset
Out[77]:
year                    0
name                    0
firstname               0
lastname                0
position                0
heightfeet              0
heightinches            0
heightinchestotal       0
weight                  0
arms                    0
hands                   0
fortyyd                 0
twentyyd                0
tenyd                   0
twentyss                0
threecone               0
vertical                0
broad                   0
bench                   0
round                   0
college              1470
pick                 1791
pickround               0
picktotal               0
wonderlic               0
nflgrade                0
dtype: int64
In [78]:
print data['college'].nunique()
print data['college'].unique()
284
['Nebraska' 'USC' 'Boise St.' 'LSU' 'West Virginia' 'Penn St.' 'Utah'
 'Stanford' 'South Carolina' 'Clemson' 'Oregon' 'Auburn' 'Florida'
 'N.C. State' 'Oklahoma' 'Southeastern Louisiana' 'Ohio St.' 'Iowa St.'
 'Memphis' 'Virginia Tech' 'Duke' 'UNLV' 'Delaware' 'South Alabama'
 'Notre Dame' 'Northern Illinois' 'Louisville' 'Texas' 'Rutgers'
 'Northwestern' 'East Carolina' 'Navy' 'TCU' 'Western Michigan' 'Miami'
 'Michigan' 'Texas A&M' 'Pittsburgh' 'Minnesota' 'Tennessee' 'Indiana'
 'Alabama' 'Georgia' 'Washington St.' 'Rice' 'Mississippi St.'
 'North Dakota St.' 'Maryland' 'Florida St.' 'Colorado St.' 'Iowa'
 'Connecticut' 'Central Michigan' 'Fresno St.' 'Towson' 'Arkansas' 'Tulane'
 'Arizona State' 'Michigan St.' 'Kentucky' 'Ball St.' 'Syracuse' 'Nevada'
 'Kansas St.' 'BYU' 'Boston College' 'Central Florida' 'Missouri'
 'Mississippi' 'Baylor' 'Wisconsin' 'Oregon St.' 'Coastal Carolina'
 'Virginia' 'Kansas' 'Harvard' 'UCLA' 'Hawaii' 'Northern Iowa'
 'Stephen F. Austin' 'Wake Forest' 'Texas St.' 'Washington'
 'Central Arkansas' 'Portland St.' 'Tennessee-Chattanooga'
 'Prairie View A&M' 'Harding' 'Toledo' 'Louisiana-Monroe' 'James Madison'
 'Hobart & William Smith' 'Houston' 'William & Mary' 'Eastern Washington'
 'Tennessee St.' 'UAB' 'Southern Miss' 'Wyoming' 'San Diego St.'
 'Southern Illinois' 'Newberry' 'Miami (OH)' 'Purdue' 'Lafayette'
 'Massachusetts' 'Northwestern St. (LA)' 'Georgia Tech' 'Florida Atlantic'
 'Samford' 'Texas-El Paso' 'Norfolk St.' 'Yale' 'Montana'
 'South Dakota St.' 'Utah St.' 'Texas Tech' 'Western Kentucky' 'Kent St.'
 'Grand Valley St.' 'San Jose St.' 'North Carolina' 'Vanderbilt'
 'Pittsburg St. (KS)' 'Illinois' 'Arizona' 'Arkansas State' 'Ohio'
 'California' 'Alabama St.' 'Lindenwood' 'Northwest Missouri St.' 'Furman'
 'Dixie St.' 'Louisiana Tech' 'Bloomsburg' 'Sam Houston St.'
 'New Mexico St.' 'Eastern Illinois' 'Oklahoma St.' 'McNeese St.' 'Maine'
 'Saginaw Valley St.' 'Shepherd' 'Missouri Western' 'South Florida'
 'Buffalo' 'Cornell' 'Georgia Southern' 'Concordia (MN)'
 'Colorado State-Pueblo' 'Murray St.' 'Princeton' 'Colorado' 'South Dakota'
 'West Texas A&M' 'Georgia State' 'Richmond' nan 'North Carolina State'
 'Arkansas-Pine Bluff' 'Mississippi State' 'Michigan State'
 'Jacksonville State' 'Kansas State' 'East Central Oklahoma'
 'Florida International' 'Utah State' 'Florida State' 'San Diego State'
 'Ohio State' 'Chadron State' 'Florida A&M' 'Penn State' 'Tennessee-Martin'
 'Southern Methodist' 'Miami (FL)' 'Grand Valley State' 'Iowa State'
 'Valdosta State' 'Appalachian State' 'Elon' 'UCF' 'Oregon State'
 'San Jose State' 'Oklahoma State' 'Southern Utah' 'Boise State'
 'Fresno State' 'Missouri Southern State' 'Washington State' 'Kent State'
 'Louisiana-Lafayette' 'Presbyterian' 'Chattanooga' 'Marshall' 'Regina'
 'Cincinnati' 'North Alabama' 'Troy' 'Temple' 'Midwestern State'
 'South Carolina State' 'Citadel' 'Missouri State' 'Tulsa' 'Hampton'
 'Idaho' 'Slippery Rock' 'Abilene Christian' 'New Mexico State'
 'Mount Union' 'Portland State' 'New Mexico' 'Murray State' 'Indiana (PA)'
 'Brigham Young' 'Fordham' 'Colorado State' 'Cal Poly' 'Sam Houston State'
 'Ball State' 'Norfolk State' 'Liberty' 'McNeese State' 'Nicholls State'
 'Tennessee State' 'Northwestern State' 'Army' 'UTEP' 'Weber State'
 'Jackson State' 'San Diego' 'Eastern Michigan' 'Bowling Green'
 'Eastern Kentucky' 'Northwest Missouri State' 'Whitworth' 'Akron'
 'Western Oregon' 'Alabama State' 'Brown' 'Lane' 'Illinois State' 'Howard'
 'Hofstra' 'Northern Colorado' 'Grambling State' 'North Dakota'
 'Central Missouri State' 'Michigan Tech' 'Pearl River (JC)'
 'Bethune-Cookman' 'North Carolina A&T' 'North Dakota State'
 'Tennessee Tech' 'Idaho State' 'Sacramento State' 'Tusculum'
 'Northern Arizona' 'North Texas' 'Montana State' 'Mars Hill'
 'Middle Tennessee State' 'Tuskegee' 'Gustavus Adolphus' 'Morgan State'
 'Clarion' 'Texas State University' 'Wisc. Stout' 'UC Davis' 'Villanova'
 'Nebraska-Omaha' 'Texas A&M-Kingsville' 'Western Illinois'
 'Kentucky State' 'Texas A&M-Commerce' 'New Hampshire' 'Tarleton State'
 'Pennsylvania' 'Air Force' 'South Dakota State' 'Saginaw Valley'
 'Southern University' 'Fort Valley State']
In [79]:
data[data.college == 'UCLA']
Out[79]:
year name firstname lastname position heightfeet heightinches heightinchestotal weight arms ... vertical broad bench round college pick pickround picktotal wonderlic nflgrade
161 2015 Brett Hundley Brett Hundley QB 6 3.00 75.00 226 0.00 ... 36.0 120 0 0 UCLA NaN 0 0 0 5.5
169 2015 Anthony Jefferson Anthony Jefferson CB 6 1.00 73.00 198 0.00 ... 33.5 116 15 0 UCLA NaN 0 0 0 4.9
179 2015 Eric Kendricks Eric Kendricks ILB 6 0.00 72.00 232 0.00 ... 38.0 124 19 0 UCLA NaN 0 0 0 5.8
209 2015 Ellis McCarthy Ellis McCarthy DT 6 5.00 77.00 338 0.00 ... 32.0 109 24 0 UCLA NaN 0 0 0 5.0
224 2015 Owamagbe Odighizuwa Owamagbe Odighizuwa DE 6 3.00 75.00 267 0.00 ... 39.0 127 25 0 UCLA NaN 0 0 0 5.7
333 2014 Anthony Barr Anthony Barr OLB 6 5.00 77.00 255 0.00 ... 34.5 119 15 1 UCLA 0 9 9 0 6.6
408 2014 Shaq Evans Shaq Evans WR 6 1.00 73.00 213 0.00 ... 34.5 122 13 4 UCLA 0 19 115 0 5.3
516 2014 Cassius Marsh Cassius Marsh DE 6 4.00 76.00 252 0.00 ... 32.0 108 14 4 UCLA 0 12 108 0 5.4
612 2014 Xavier Su'a-Filo Xavier Su'a-Filo OG 6 4.00 76.00 307 0.00 ... 25.0 102 25 2 UCLA 0 1 33 0 5.6
656 2014 Jordan Zumwalt Jordan Zumwalt ILB 6 4.00 76.00 235 0.00 ... 33.0 116 0 6 UCLA 0 32 192 0 5.3
667 2013 Jeff Baca Jeff Baca OG 6 3.00 75.00 302 34.25 ... 26.5 100 0 6 UCLA 28(196) 28 196 0 0.0
726 2013 Johnathan Franklin Johnathan Franklin RB 5 10.00 70.00 205 30.00 ... 31.5 115 18 4 UCLA 28(125) 28 125 0 0.0
787 2013 Datone Jones Datone Jones DE 6 4.25 76.25 283 32.75 ... 31.5 112 29 1 UCLA 26(26) 26 26 0 0.0
1268 2011 Akeem Ayers Akeem Ayers OLB 6 3.00 75.00 254 33.00 ... 31.0 116 18 2 UCLA 7(39) 7 39 0 0.0
1441 2011 Rahim Moore Rahim Moore FS 6 1.00 73.00 202 30.25 ... 35.0 115 11 2 UCLA 13(45) 13 45 0 0.0
1765 2010 Brian Price Brian Price DT 6 1.00 73.00 303 32.25 ... 28.5 96 34 2 UCLA 3(35) 3 35 0 0.0
1818 2010 Alterraun Verner Alterraun Verner CB 5 10.00 70.00 189 31.00 ... 32.0 116 11 4 UCLA 6(104) 6 104 0 0.0
2192 2008 Bruce Davis Bruce Davis OLB 6 3.00 75.00 252 0.00 ... 29.0 109 19 3 UCLA 25(88) 25 88 0 0.0
2257 2008 Chris Horton Chris Horton SS 6 1.00 73.00 212 0.00 ... 28.5 109 14 7 UCLA 42(249) 42 249 0 0.0
2814 2006 Marcedes Lewis Marcedes Lewis TE 6 7.00 79.00 261 0.00 ... 37.0 118 23 1 UCLA 28(28) 28 28 0 0.0
2866 2006 Jarrad Page Jarrad Page SS 6 1.00 73.00 239 0.00 ... 38.0 127 0 7 UCLA 20(228) 20 228 0 0.0
2985 2005 Craig Bragg Craig Bragg WR 6 1.00 73.00 196 32.00 ... 36.0 122 0 6 UCLA 21(195) 21 195 0 0.0
3041 2005 Ben Emanuel Ben Emanuel FS 6 3.00 75.00 212 0.00 ... 0.0 0 21 5 UCLA 35(171) 35 171 0 0.0
3165 2005 Tab Perry Tab Perry WR 6 3.00 75.00 219 0.00 ... 34.0 116 0 6 UCLA 16(190) 16 190 0 0.0
3254 2004 Dave Ball Dave Ball DE 6 6.00 78.00 277 0.00 ... 0.0 0 24 5 UCLA 1(133) 1 133 0 0.0
3280 2004 Brandon Chillar Brandon Chillar OLB 6 3.00 75.00 253 0.00 ... 33.0 115 23 4 UCLA 34(130) 34 130 0 0.0
3488 2004 Matt Ware Matt Ware FS 6 3.00 75.00 209 0.00 ... 0.0 0 15 3 UCLA 26(89) 26 89 0 0.0
3715 2003 Mike Seidman Mike Seidman TE 6 5.00 77.00 261 0.00 ... 0.0 0 20 3 UCLA 12(76) 12 76 0 0.0
3783 2002 Marques Anderson Marques Anderson SS 5 11.00 71.00 213 0.00 ... 39.5 127 18 3 UCLA 27(92) 27 92 0 0.0
3825 2002 Kenyon Coleman Kenyon Coleman DE 6 5.00 77.00 284 0.00 ... 0.0 0 32 5 UCLA 12(147) 12 147 0 0.0
3863 2002 DeShaun Foster DeShaun Foster RB 6 1.00 73.00 222 0.00 ... 35.5 119 20 2 UCLA 2(34) 2 34 0 0.0
4018 2002 Robert Thomas Robert Thomas OLB 6 1.00 73.00 229 0.00 ... 34.0 116 21 1 UCLA 31(31) 31 31 0 0.0
4206 2001 Freddie Mitchell Freddie Mitchell WR 6 1.00 73.00 185 0.00 ... 39.5 120 0 1 UCLA 25(25) 25 25 0 0.0
4665 1999 Kris Farris Kris Farris OT 6 8.00 80.00 313 0.00 ... 24.0 95 20 3 UCLA 13(74) 13 74 0 0.0
4795 1999 Cade McNown Cade McNown QB 6 1.00 73.00 213 0.00 ... 33.5 113 0 1 UCLA 12(12) 12 12 28 0.0

35 rows × 26 columns

In [80]:
data.college.fillna('No College',inplace=True)
data.isnull().sum()
Out[80]:
year                    0
name                    0
firstname               0
lastname                0
position                0
heightfeet              0
heightinches            0
heightinchestotal       0
weight                  0
arms                    0
hands                   0
fortyyd                 0
twentyyd                0
tenyd                   0
twentyss                0
threecone               0
vertical                0
broad                   0
bench                   0
round                   0
college                 0
pick                 1791
pickround               0
picktotal               0
wonderlic               0
nflgrade                0
dtype: int64
In [81]:
data.college.value_counts()
Out[81]:
No College                1470
Georgia                     81
Florida                     80
USC                         77
Alabama                     75
LSU                         74
Oklahoma                    66
Notre Dame                  64
Florida State               63
Nebraska                    62
Texas                       60
Clemson                     57
Ohio State                  57
Tennessee                   56
Miami (FL)                  56
Virginia Tech               55
Iowa                        54
Auburn                      54
Wisconsin                   54
Oregon                      52
Michigan                    52
Texas A&M                   50
North Carolina              49
South Carolina              48
California                  47
Stanford                    46
Arkansas                    44
Louisville                  43
Arizona State               40
Penn State                  38
                          ...
Kent St.                     1
Prairie View A&M             1
Slippery Rock                1
Hobart & William Smith       1
Texas A&M-Kingsville         1
Presbyterian                 1
Texas-El Paso                1
Lindenwood                   1
Morgan State                 1
Northwest Missouri St.       1
South Dakota St.             1
Pittsburg St. (KS)           1
North Texas                  1
Tennessee Tech               1
Murray State                 1
Fordham                      1
Sacramento State             1
Villanova                    1
Army                         1
Florida A&M                  1
Concordia (MN)               1
Liberty                      1
Saginaw Valley St.           1
Valdosta State               1
Southern Utah                1
Murray St.                   1
Whitworth                    1
Shepherd                     1
San Diego                    1
Western Oregon               1
Name: college, Length: 285, dtype: int64
In [82]:
print data.pick.nunique()
len(data['pick'].unique())
1301
Out[82]:
1302
In [83]:
# Drop Null values from a speific columns 'pick'

data_after_drop = data.dropna(subset=['pick'])
In [84]:
data_dropped = data.drop('pick', axis=1)
data_dropped.head()
Out[84]:
year name firstname lastname position heightfeet heightinches heightinchestotal weight arms ... threecone vertical broad bench round college pickround picktotal wonderlic nflgrade
0 2015 Ameer Abdullah Ameer Abdullah RB 5 9.0 69.0 205 0.0 ... 6.79 42.5 130 24 0 Nebraska 0 0 0 5.9
1 2015 Nelson Agholor Nelson Agholor WR 6 0.0 72.0 198 0.0 ... 0.00 0.0 0 12 0 USC 0 0 0 5.6
2 2015 Jay Ajayi Jay Ajayi RB 6 0.0 72.0 221 0.0 ... 7.10 39.0 121 19 0 Boise St. 0 0 0 6.0
3 2015 Kwon Alexander Kwon Alexander OLB 6 1.0 73.0 227 0.0 ... 7.14 36.0 121 24 0 LSU 0 0 0 5.4
4 2015 Mario Alford Mario Alford WR 5 8.0 68.0 180 0.0 ... 6.64 34.0 121 13 0 West Virginia 0 0 0 5.3

5 rows × 25 columns

In [85]:
data['round'].unique()
Out[85]:
array([0, 6, 2, 4, 5, 1, 8, 3, 7], dtype=int64)
In [86]:
print data['position'].nunique()
data['position'].unique()
20
Out[86]:
array(['RB', 'WR', 'OLB', 'FS', 'DE', 'TE', 'ILB', 'DT', 'P', 'QB', 'OG',
       'OT', 'K', 'FB', 'SS', 'LS', 'CB', 'C', 'NT', 'OC'], dtype=object)
In [87]:
# Replace Values to make them easier to interpret for the coder

data.position.replace(['QB','RB','WR'],['Quarterback','Running Back','Wide Receiver'],inplace=True)
In [88]:
data[data['position'] == 'Quarterback']
Out[88]:
year name firstname lastname position heightfeet heightinches heightinchestotal weight arms ... vertical broad bench round college pick pickround picktotal wonderlic nflgrade
19 2015 Bryan Bennett Bryan Bennett Quarterback 6 2.0 74.0 211 0.0 ... 37.0 125 0 0 Southeastern Louisiana NaN 0 0 0 0.0
25 2015 Anthony Boone Anthony Boone Quarterback 6 0.0 72.0 231 0.0 ... 26.5 100 0 0 Duke NaN 0 0 0 5.0
28 2015 Brandon Bridge Brandon Bridge Quarterback 6 4.0 76.0 229 0.0 ... 33.0 110 0 0 South Alabama NaN 0 0 0 5.0
39 2015 Shane Carden Shane Carden Quarterback 6 2.0 74.0 218 0.0 ... 29.5 104 0 0 East Carolina NaN 0 0 0 5.1
101 2015 Cody Fajardo Cody Fajardo Quarterback 6 1.0 73.0 223 0.0 ... 32.5 118 0 0 Nevada NaN 0 0 0 5.0
126 2015 Garrett Grayson Garrett Grayson Quarterback 6 2.0 74.0 213 0.0 ... 0.0 0 0 0 Colorado St. NaN 0 0 0 5.4
138 2015 Connor Halliday Connor Halliday Quarterback 6 3.0 75.0 196 0.0 ... 0.0 0 0 0 Washington St. NaN 0 0 0 5.1
161 2015 Brett Hundley Brett Hundley Quarterback 6 3.0 75.0 226 0.0 ... 36.0 120 0 0 UCLA NaN 0 0 0 5.5
192 2015 Jerry Lovelocke Jerry Lovelocke Quarterback 6 4.0 76.0 248 0.0 ... 31.5 113 0 0 Prairie View A&M NaN 0 0 0 0.0
197 2015 Sean Mannion Sean Mannion Quarterback 6 6.0 78.0 229 0.0 ... 31.0 105 0 0 Oregon St. NaN 0 0 0 5.3
199 2015 Marcus Mariota Marcus Mariota Quarterback 6 4.0 76.0 222 0.0 ... 36.0 121 0 0 Oregon NaN 0 0 0 6.4
202 2015 Nick Marshall Nick Marshall Quarterback 6 1.0 73.0 207 0.0 ... 37.5 124 12 0 Auburn NaN 0 0 0 5.2
236 2015 Bryce Petty Bryce Petty Quarterback 6 3.0 75.0 230 0.0 ... 34.0 121 0 0 Baylor NaN 0 0 0 5.3
273 2015 Blake Sims Blake Sims Quarterback 5 11.0 71.0 218 0.0 ... 30.5 115 0 0 Alabama NaN 0 0 0 5.2
317 2015 Jameis Winston Jameis Winston Quarterback 6 4.0 76.0 231 0.0 ... 28.5 103 0 0 Florida St. NaN 0 0 0 6.5
347 2014 Blake Bortles Blake Bortles Quarterback 6 5.0 77.0 232 0.0 ... 32.5 115 0 1 Central Florida 0 3 3 0 6.4
351 2014 Tajh Boyd Tajh Boyd Quarterback 6 1.0 73.0 222 0.0 ... 30.5 106 0 7 Clemson 0 21 213 0 5.4
354 2014 Teddy Bridgewater Teddy Bridgewater Quarterback 6 2.0 74.0 214 0.0 ... 30.0 113 0 1 Louisville 0 32 32 0 6.3
369 2014 Derek Carr Derek Carr Quarterback 6 2.0 74.0 214 0.0 ... 34.5 110 0 2 Fresno St. 0 4 36 0 5.8
410 2014 David Fales David Fales Quarterback 6 2.0 74.0 212 0.0 ... 28.0 103 0 6 San Jose St. 0 23 183 0 5.2
429 2014 Jimmy Garoppolo Jimmy Garoppolo Quarterback 6 2.0 74.0 226 0.0 ... 30.5 110 0 2 Eastern Illinois 0 30 62 0 5.4
511 2014 Jordan Lynch Jordan Lynch Quarterback 6 0.0 72.0 217 0.0 ... 29.5 109 0 0 Northern Illinois 0 0 0 0 5.0
515 2014 Johnny Manziel Johnny Manziel Quarterback 6 0.0 72.0 207 0.0 ... 31.5 113 0 1 Texas A&M 0 22 22 0 5.9
521 2014 Jeff Mathews Jeff Mathews Quarterback 6 4.0 76.0 223 0.0 ... 25.5 105 0 0 Cornell 0 0 0 0 5.2
525 2014 AJ McCarron AJ McCarron Quarterback 6 3.0 75.0 220 0.0 ... 28.0 99 0 6 Alabama 0 4 164 0 5.6
530 2014 Zach Mettenberger Zach Mettenberger Quarterback 6 5.0 77.0 224 0.0 ... 0.0 0 0 6 LSU 0 18 178 0 5.3
538 2014 Stephen Morris Stephen Morris Quarterback 6 2.0 74.0 213 0.0 ... 30.0 111 0 0 Miami 0 0 0 0 5.3
544 2014 Aaron Murray Aaron Murray Quarterback 6 1.0 73.0 207 0.0 ... 0.0 0 0 6 Georgia 0 3 163 0 5.2
571 2014 Bryn Renner Bryn Renner Quarterback 6 3.0 75.0 228 0.0 ... 25.5 108 0 0 North Carolina 0 0 0 0 5.1
587 2014 Tom Savage Tom Savage Quarterback 6 4.0 76.0 228 0.0 ... 27.0 105 0 5 Pittsburgh 0 7 135 0 5.2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4445 2000 Sean Keenan Sean Keenan Quarterback 6 3.0 75.0 202 0.0 ... 25.5 95 0 0 No College NaN 0 0 32 0.0
4457 2000 Tim Lester Tim Lester Quarterback 6 1.0 73.0 202 0.0 ... 27.0 97 0 0 No College NaN 0 0 26 0.0
4464 2000 Tee Martin Tee Martin Quarterback 6 2.0 74.0 227 0.0 ... 34.5 113 0 5 Tennessee 34(163) 34 163 11 0.0
4494 2000 Chad Pennington Chad Pennington Quarterback 6 4.0 76.0 229 0.0 ... 33.5 111 0 1 Marshall 18(18) 18 18 25 0.0
4500 2000 Tim Rattay Tim Rattay Quarterback 6 1.0 73.0 215 0.0 ... 25.5 100 0 7 Louisiana Tech 6(212) 6 212 27 0.0
4502 2000 Chris Redman Chris Redman Quarterback 6 3.0 75.0 222 0.0 ... 26.5 98 0 3 Louisville 13(75) 13 75 16 0.0
4567 2000 Spergon Wynn Spergon Wynn Quarterback 6 4.0 76.0 229 0.0 ... 34.0 108 0 6 Texas State University 17(183) 17 183 25 0.0
4578 1999 Todd Bandhaur Todd Bandhaur Quarterback 6 3.0 75.0 236 0.0 ... 31.0 103 0 0 No College NaN 0 0 31 0.0
4586 1999 Michael Bishop Michael Bishop Quarterback 6 1.0 73.0 205 0.0 ... 33.0 113 0 7 Kansas State 21(227) 21 227 10 0.0
4594 1999 Aaron Brooks Aaron Brooks Quarterback 6 4.0 76.0 203 0.0 ... 35.5 120 0 4 Virginia 36(131) 36 131 17 0.0
4630 1999 Mike Cook Mike Cook Quarterback 6 1.0 73.0 207 0.0 ... 33.5 112 0 0 No College NaN 0 0 22 0.0
4634 1999 Tim Couch Tim Couch Quarterback 6 4.0 76.0 225 0.0 ... 31.0 0 0 1 Kentucky 1(1) 1 1 22 0.0
4635 1999 Scott Covington Scott Covington Quarterback 6 2.0 74.0 219 0.0 ... 34.0 109 0 7 Miami (FL) 39(245) 39 245 21 0.0
4637 1999 Daunte Culpepper Daunte Culpepper Quarterback 6 4.0 76.0 255 0.0 ... 39.0 122 0 1 UCF 11(11) 11 11 18 0.0
4640 1999 Kevin Daft Kevin Daft Quarterback 6 2.0 74.0 203 0.0 ... 32.5 114 0 5 UC Davis 18(151) 18 151 32 0.0
4645 1999 Oscar Davenport Oscar Davenport Quarterback 6 4.0 76.0 199 0.0 ... 31.0 109 0 0 No College NaN 0 0 6 0.0
4656 1999 Scott Dreisbach Scott Dreisbach Quarterback 6 3.0 75.0 216 0.0 ... 33.0 112 0 0 No College NaN 0 0 20 0.0
4681 1999 Mark Garcia Mark Garcia Quarterback 6 3.0 75.0 220 0.0 ... 33.5 108 0 0 No College NaN 0 0 21 0.0
4686 1999 Joe Germaine Joe Germaine Quarterback 6 1.0 73.0 211 0.0 ... 32.0 105 0 4 Ohio State 6(101) 6 101 25 0.0
4696 1999 Chris Greisen Chris Greisen Quarterback 6 3.0 75.0 223 0.0 ... 30.5 108 0 7 Northwest Missouri State 33(239) 33 239 27 0.0
4718 1999 Brock Huard Brock Huard Quarterback 6 5.0 77.0 227 0.0 ... 30.5 107 0 3 Washington 16(77) 16 77 25 0.0
4759 1999 Shaun King Shaun King Quarterback 6 1.0 73.0 221 0.0 ... 33.0 113 0 2 Tulane 19(50) 19 50 25 0.0
4764 1999 Brian Kuklick Brian Kuklick Quarterback 6 2.0 74.0 204 0.0 ... 31.0 107 0 0 No College NaN 0 0 29 0.0
4768 1999 Graham Leigh Graham Leigh Quarterback 6 2.0 74.0 212 0.0 ... 32.5 116 0 0 No College NaN 0 0 22 0.0
4775 1999 Jason Maas Jason Maas Quarterback 6 2.0 74.0 210 0.0 ... 31.5 103 0 0 No College NaN 0 0 43 0.0
4794 1999 Donovan McNabb Donovan McNabb Quarterback 6 2.0 74.0 223 0.0 ... 33.0 117 0 1 Syracuse 2(2) 2 2 14 0.0
4795 1999 Cade McNown Cade McNown Quarterback 6 1.0 73.0 213 0.0 ... 33.5 113 0 1 UCLA 12(12) 12 12 28 0.0
4873 1999 Akili Smith Akili Smith Quarterback 6 3.0 75.0 227 0.0 ... 34.0 114 0 1 Oregon 3(3) 3 3 26 0.0
4927 1999 Ted White Ted White Quarterback 6 3.0 75.0 226 0.0 ... 30.5 106 0 0 No College NaN 0 0 12 0.0
4941 1999 Anthony Wright Anthony Wright Quarterback 6 2.0 74.0 195 0.0 ... 31.0 116 0 0 No College NaN 0 0 16 0.0

304 rows × 26 columns

In [89]:
data_dropped = data.drop(['arms','wonderlic'],axis=1)
In [90]:
data_dropped.head(10)
Out[90]:
year name firstname lastname position heightfeet heightinches heightinchestotal weight hands ... threecone vertical broad bench round college pick pickround picktotal nflgrade
0 2015 Ameer Abdullah Ameer Abdullah Running Back 5 9.0 69.0 205 0.0 ... 6.79 42.5 130 24 0 Nebraska NaN 0 0 5.9
1 2015 Nelson Agholor Nelson Agholor Wide Receiver 6 0.0 72.0 198 0.0 ... 0.00 0.0 0 12 0 USC NaN 0 0 5.6
2 2015 Jay Ajayi Jay Ajayi Running Back 6 0.0 72.0 221 0.0 ... 7.10 39.0 121 19 0 Boise St. NaN 0 0 6.0
3 2015 Kwon Alexander Kwon Alexander OLB 6 1.0 73.0 227 0.0 ... 7.14 36.0 121 24 0 LSU NaN 0 0 5.4
4 2015 Mario Alford Mario Alford Wide Receiver 5 8.0 68.0 180 0.0 ... 6.64 34.0 121 13 0 West Virginia NaN 0 0 5.3
5 2015 Javorius Allen Javorius Allen Running Back 6 0.0 72.0 221 0.0 ... 6.96 35.5 121 11 0 USC NaN 0 0 5.3
6 2015 Adrian Amos Adrian Amos FS 6 0.0 72.0 218 0.0 ... 7.09 35.5 122 0 0 Penn St. NaN 0 0 5.5
7 2015 Dres Anderson Dres Anderson Wide Receiver 6 1.0 73.0 187 0.0 ... 0.00 0.0 0 13 0 Utah NaN 0 0 5.5
8 2015 Henry Anderson Henry Anderson DE 6 6.0 78.0 294 0.0 ... 7.20 30.0 111 0 0 Stanford NaN 0 0 5.6
9 2015 Rory 'Busta' Anderson Rory 'Busta' Anderson TE 6 5.0 77.0 244 0.0 ... 0.00 0.0 0 0 0 South Carolina NaN 0 0 5.5

10 rows × 24 columns

In [91]:
# Creating Dummy Variables
# In order to pass values, such as position, into an algorithm we have to pass numerical values into the algorithm
# Therefore, we created dummy variables using pd.get_dummies

data_positions = pd.get_dummies(data_dropped.position, prefix='Pos',drop_first=True)
data_positions
Out[91]:
Pos_CB Pos_DE Pos_DT Pos_FB Pos_FS Pos_ILB Pos_K Pos_LS Pos_NT Pos_OC Pos_OG Pos_OLB Pos_OT Pos_P Pos_Quarterback Pos_Running Back Pos_SS Pos_TE Pos_Wide Receiver
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
3 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
6 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
8 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
10 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
11 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
13 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
14 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
18 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
19 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
20 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
22 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
23 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
24 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
25 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
26 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
27 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
29 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4917 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
4918 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4919 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4920 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4921 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
4922 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
4923 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
4924 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
4925 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
4926 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4927 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
4928 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4929 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
4930 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
4931 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
4932 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
4933 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
4934 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4935 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4936 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
4937 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
4938 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
4939 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
4940 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
4941 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
4942 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4943 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
4944 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
4945 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4946 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0

4947 rows × 19 columns

In [92]:
data_positions.shape
Out[92]:
(4947, 19)
In [93]:
data_dropped.shape
Out[93]:
(4947, 24)
In [94]:
# Now that we have dummy variables, we are going to merge the dummy variables into our dataset

data = pd.merge(data_dropped, data_positions, left_index=True, right_index = True)
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4947 entries, 0 to 4946
Data columns (total 43 columns):
year                 4947 non-null int64
name                 4947 non-null object
firstname            4947 non-null object
lastname             4947 non-null object
position             4947 non-null object
heightfeet           4947 non-null int64
heightinches         4947 non-null float64
heightinchestotal    4947 non-null float64
weight               4947 non-null int64
hands                4947 non-null float64
fortyyd              4947 non-null float64
twentyyd             4947 non-null float64
tenyd                4947 non-null float64
twentyss             4947 non-null float64
threecone            4947 non-null float64
vertical             4947 non-null float64
broad                4947 non-null int64
bench                4947 non-null int64
round                4947 non-null int64
college              4947 non-null object
pick                 3156 non-null object
pickround            4947 non-null int64
picktotal            4947 non-null int64
nflgrade             4947 non-null float64
Pos_CB               4947 non-null uint8
Pos_DE               4947 non-null uint8
Pos_DT               4947 non-null uint8
Pos_FB               4947 non-null uint8
Pos_FS               4947 non-null uint8
Pos_ILB              4947 non-null uint8
Pos_K                4947 non-null uint8
Pos_LS               4947 non-null uint8
Pos_NT               4947 non-null uint8
Pos_OC               4947 non-null uint8
Pos_OG               4947 non-null uint8
Pos_OLB              4947 non-null uint8
Pos_OT               4947 non-null uint8
Pos_P                4947 non-null uint8
Pos_Quarterback      4947 non-null uint8
Pos_Running Back     4947 non-null uint8
Pos_SS               4947 non-null uint8
Pos_TE               4947 non-null uint8
Pos_Wide Receiver    4947 non-null uint8
dtypes: float64(10), int64(8), object(6), uint8(19)
memory usage: 1019.4+ KB
In [95]:
data.head(20)
Out[95]:
year name firstname lastname position heightfeet heightinches heightinchestotal weight hands ... Pos_OC Pos_OG Pos_OLB Pos_OT Pos_P Pos_Quarterback Pos_Running Back Pos_SS Pos_TE Pos_Wide Receiver
0 2015 Ameer Abdullah Ameer Abdullah Running Back 5 9.0 69.0 205 0.0 ... 0 0 0 0 0 0 1 0 0 0
1 2015 Nelson Agholor Nelson Agholor Wide Receiver 6 0.0 72.0 198 0.0 ... 0 0 0 0 0 0 0 0 0 1
2 2015 Jay Ajayi Jay Ajayi Running Back 6 0.0 72.0 221 0.0 ... 0 0 0 0 0 0 1 0 0 0
3 2015 Kwon Alexander Kwon Alexander OLB 6 1.0 73.0 227 0.0 ... 0 0 1 0 0 0 0 0 0 0
4 2015 Mario Alford Mario Alford Wide Receiver 5 8.0 68.0 180 0.0 ... 0 0 0 0 0 0 0 0 0 1
5 2015 Javorius Allen Javorius Allen Running Back 6 0.0 72.0 221 0.0 ... 0 0 0 0 0 0 1 0 0 0
6 2015 Adrian Amos Adrian Amos FS 6 0.0 72.0 218 0.0 ... 0 0 0 0 0 0 0 0 0 0
7 2015 Dres Anderson Dres Anderson Wide Receiver 6 1.0 73.0 187 0.0 ... 0 0 0 0 0 0 0 0 0 1
8 2015 Henry Anderson Henry Anderson DE 6 6.0 78.0 294 0.0 ... 0 0 0 0 0 0 0 0 0 0
9 2015 Rory 'Busta' Anderson Rory 'Busta' Anderson TE 6 5.0 77.0 244 0.0 ... 0 0 0 0 0 0 0 0 1 0
10 2015 Stephone Anthony Stephone Anthony ILB 6 3.0 75.0 243 0.0 ... 0 0 0 0 0 0 0 0 0 0
11 2015 Arik Armstead Arik Armstead DT 6 7.0 79.0 292 0.0 ... 0 0 0 0 0 0 0 0 0 0
12 2015 Cameron Artis-Payne Cameron Artis-Payne Running Back 5 10.0 70.0 212 0.0 ... 0 0 0 0 0 0 1 0 0 0
13 2015 Neiron Ball Neiron Ball OLB 6 2.0 74.0 236 0.0 ... 0 0 1 0 0 0 0 0 0 0
14 2015 Tavaris Barnes Tavaris Barnes DE 6 4.0 76.0 282 0.0 ... 0 0 0 0 0 0 0 0 0 0
15 2015 Wil Baumann Wil Baumann P 6 6.0 78.0 187 0.0 ... 0 0 0 0 1 0 0 0 0 0
16 2015 Vic Beasley Vic Beasley OLB 6 3.0 75.0 246 0.0 ... 0 0 1 0 0 0 0 0 0 0
17 2015 Blake Bell Blake Bell TE 6 6.0 78.0 252 0.0 ... 0 0 0 0 0 0 0 0 1 0
18 2015 Kenny Bell Kenny Bell Wide Receiver 6 1.0 73.0 197 0.0 ... 0 0 0 0 0 0 0 0 0 1
19 2015 Bryan Bennett Bryan Bennett Quarterback 6 2.0 74.0 211 0.0 ... 0 0 0 0 0 1 0 0 0 0

20 rows × 43 columns

In [ ]:

rss facebook twitter github youtube mail spotify lastfm instagram linkedin google google-plus pinterest medium vimeo stackoverflow reddit quora quora