Class Day 2 - Python Basics

Day #2 - Class Notes
In [1]:
# Import the numpy library. Gives you tools for manipulating data in arrays and more

import numpy as np
In [2]:
# Create a 1X3 numpy array

np.array([1,2,3])
Out[2]:
array([1, 2, 3])
In [3]:
# Create a 3X2 numpy array

np.array([[1,2],[3,4],[4,5]])
Out[3]:
array([[1, 2],
       [3, 4],
       [4, 5]])
In [4]:
np.zeros([2,3])
Out[4]:
array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])
In [5]:
np.ones([3,2])
Out[5]:
array([[ 1.,  1.],
       [ 1.,  1.],
       [ 1.,  1.]])
In [6]:
np.diag(np.ones(4))
Out[6]:
array([[ 1.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.],
       [ 0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  1.]])
In [7]:
# Create a numpy array filled with values from 0 to 10 with a jump of 2

np.arange(0,10,2)
Out[7]:
array([0, 2, 4, 6, 8])
In [8]:
np.linspace(0,10,5)
Out[8]:
array([  0. ,   2.5,   5. ,   7.5,  10. ])
In [9]:
# Create a numpy array filled with (4) evenly spaced values from 0 to 1

np.linspace(0,1,4)
Out[9]:
array([ 0.        ,  0.33333333,  0.66666667,  1.        ])
In [10]:
np.linspace(0,1,5)
Out[10]:
array([ 0.  ,  0.25,  0.5 ,  0.75,  1.  ])
In [11]:
np.linspace(0,1,27)
Out[11]:
array([ 0.        ,  0.03846154,  0.07692308,  0.11538462,  0.15384615,
        0.19230769,  0.23076923,  0.26923077,  0.30769231,  0.34615385,
        0.38461538,  0.42307692,  0.46153846,  0.5       ,  0.53846154,
        0.57692308,  0.61538462,  0.65384615,  0.69230769,  0.73076923,
        0.76923077,  0.80769231,  0.84615385,  0.88461538,  0.92307692,
        0.96153846,  1.        ])

Array Math

In [12]:
# Cannont add 10 to all values because this is a list

x = [1,2,3,4,5]
print x+10
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-12-6d60e3c030e8> in <module>()
      2
      3 x = [1,2,3,4,5]
----> 4 print x+10

TypeError: can only concatenate list (not "int") to list
In [13]:
# Add to to all values using a numpy array

x = [1,2,3,4,5]
x =  np.array(x)
print x +10
[11 12 13 14 15]
In [14]:
y = np.array([1,2,3,4,True,"A"])
print y
# Prints out all the items as strings
# Arrays are homogeneous in that they convert all the items to the same data type
['1' '2' '3' '4' 'True' 'A']
In [15]:
z = np.delete(y,3)
print z
['1' '2' '3' 'True' 'A']
In [16]:
a = np.arange(0,11,1)
a
Out[16]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])
In [17]:
# Using numpy array indexing

x = np.arange(4)
print x
print x[0]
print x[-1]
[0 1 2 3]
0
3
In [18]:
x = np.arange(6).reshape(2,3)
print x
#The number of rows and columns must multiply to the number of items correctly
#Else the reshape will not work
[[0 1 2]
 [3 4 5]]
In [19]:
print x[1,2]
5
In [20]:
print x[-1]
#Print last row of the array
[3 4 5]
In [21]:
print x[-1,-1]
print x[-2,-2]
print x[-2,-3]
#Printing numbers of an array working backwards
# array[row,column]
5
1
0
In [22]:
a = np.arange(10)
print a[:7]
print a[-3:]
[0 1 2 3 4 5 6]
[7 8 9]
In [23]:
y = np.arange(9).reshape(3,3)
y[:2,1:2]
Out[23]:
array([[1],
       [4]])
In [24]:
array1 = np.arange(25).reshape(5,5)
array1
Out[24]:
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])
In [25]:
z = array1.ravel()
z
#Flatten an array to a single row
Out[25]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])
In [26]:
# Array z with all values arranged in opposite order

z[::-1]
Out[26]:
array([24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10,  9,  8,
        7,  6,  5,  4,  3,  2,  1,  0])
In [27]:
# Array z with every 3rd values in opposite order

z[::-3]
Out[27]:
array([24, 21, 18, 15, 12,  9,  6,  3,  0])
In [28]:
children = np.array([0,1,2,2,5])
In [29]:
house_sum = sum(children)
num_of_fam = len(children)
print "The mean of children in a house is", house_sum/num_of_fam
The mean of children in a house is 2
In [30]:
print "The mean of children in a house is", np.mean(children)
The mean of children in a house is 2.0
In [31]:
m = int(len(children)/2)
print "The median of children in a house is", children[m]
The median of children in a house is 2
In [32]:
bill = 10000
ted = 24640
cyrus = 30000
john = 25000
trump = 100000000

income = [bill, ted, cyrus, john, trump]
sorted_income = sorted(income)
print "Median of sorted income is",sorted_income[int(len(sorted_income)/2)]
Median of sorted income is 25000
In [33]:
print "Median of sorted income is",int(np.median(sorted_income))
Median of sorted income is 25000
In [34]:
# Median is less affected by outliers
# Mean is more affected by outliers. 
# When filling in empty data spaces, may want to use median when data has lots of outliers
In [35]:
income = np.random.normal(24000,15000,10000)
income_with_trump = np.append(income,trump)

# Created a random normal distribution with:
# Median = 24000
# Standard Deviation = 15000
# Number of Points = 10000
In [36]:
print "Mean:",np.mean(income),"Median:", np.median(income),"Max:", max(income),"Min:", min(income)
Mean: 23954.0333672 Median: 23708.6545259 Max: 78162.1620809 Min: -33396.110874
In [37]:
print "Mean:",np.mean(income_with_trump),"Median:", np.median(income_with_trump),"Max:", max(income_with_trump),"Min:", min(income_with_trump)
Mean: 33950.6383034 Median: 23709.6828088 Max: 100000000.0 Min: -33396.110874
In [38]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.hist(income,100)
# Plot (data, number of rectangular bars/histograms)
plt.show()
In [39]:
print np.mode(income)
# numby does not have mode. Will create an error message
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-39-acc4db92557f> in <module>()
----> 1 print np.mode(income)
      2 # numby does not have mode. Will create an error message

AttributeError: 'module' object has no attribute 'mode'
In [ ]:
ages = np.random.randint(2,high=25, size=50)
ages
In [ ]:
from scipy import stats
print stats.mode(ages)
In [ ]:
d = np.array([1,4,5,4,8])
m = np.mean(d)
differ = (d-m)**2
sum_differ = sum(differ)
var = sum_differ/len(d)
std_dev = var**1/2
print var, std_dev
In [ ]:
income = np.random.normal(100,20,10000)
print min(income), max(income)

PANDAS

In [40]:
import pandas as pd
df = pd.DataFrame({"name":["Bob","Jen","Tim","Jacob"],"age":[20,30,40,50],"pet":["cat","dog","bird","hamster"]})
df
Out[40]:
age name pet
0 20 Bob cat
1 30 Jen dog
2 40 Tim bird
3 50 Jacob hamster
In [41]:
df.columns
df.index
Out[41]:
RangeIndex(start=0, stop=4, step=1)
In [42]:
df[1:4]
# Not displaying the first index
Out[42]:
age name pet
1 30 Jen dog
2 40 Tim bird
3 50 Jacob hamster
In [43]:
df["name"]
Out[43]:
0      Bob
1      Jen
2      Tim
3    Jacob
Name: name, dtype: object
In [44]:
df["pet"]
Out[44]:
0        cat
1        dog
2       bird
3    hamster
Name: pet, dtype: object
In [45]:
df[["name","pet"]]
# In order to display just 2 columns, first make a list of the columns you want to display.
# Thus you are passing just a single list object
Out[45]:
name pet
0 Bob cat
1 Jen dog
2 Tim bird
3 Jacob hamster
In [46]:
df.name
Out[46]:
0      Bob
1      Jen
2      Tim
3    Jacob
Name: name, dtype: object
In [47]:
df['pet']
Out[47]:
0        cat
1        dog
2       bird
3    hamster
Name: pet, dtype: object
In [48]:
df.ix[1:3]
C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  """Entry point for launching an IPython kernel.
Out[48]:
age name pet
1 30 Jen dog
2 40 Tim bird
3 50 Jacob hamster
In [49]:
df = pd.DataFrame({"name":["Bob","Jen","Tim","Jacob"],"age":[20,30,40,50],"pet":["cat","dog","bird","hamster"]})
df.sort_values('pet',inplace=True)
df
Out[49]:
age name pet
2 40 Tim bird
0 20 Bob cat
1 30 Jen dog
3 50 Jacob hamster
In [50]:
df.ix[0]
C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  """Entry point for launching an IPython kernel.
Out[50]:
age      20
name    Bob
pet     cat
Name: 0, dtype: object
In [51]:
df.iloc[0,2]
Out[51]:
'bird'
In [52]:
df.ix[0:1]
Out[52]:
age name pet
0 20 Bob cat
1 30 Jen dog
In [53]:
df.reset_index(inplace=True)
df
Out[53]:
index age name pet
0 2 40 Tim bird
1 0 20 Bob cat
2 1 30 Jen dog
3 3 50 Jacob hamster
In [54]:
df.drop('index', axis=1, inplace=True)
df
Out[54]:
age name pet
0 40 Tim bird
1 20 Bob cat
2 30 Jen dog
3 50 Jacob hamster
In [55]:
df.iloc[:,0:2]
Out[55]:
age name
0 40 Tim
1 20 Bob
2 30 Jen
3 50 Jacob
In [56]:
df.loc[:,'name':'pet']
# Display just columns you desired by filtering by column header names
Out[56]:
name pet
0 Tim bird
1 Bob cat
2 Jen dog
3 Jacob hamster
In [57]:
data = pd.read_csv('test_pandas.csv',header=None)
data
# Read a CSV file
# If header !=None, the first excel row will become the header
Out[57]:
0 1 2 3
0 0 1 cat 1.1
1 1 2 dog 2.2
2 2 3 bird 3.3
In [58]:
data.to_csv('test_pandas_no_header.csv',header=False,index=False)
#write to a csv file
In [59]:
data = pd.read_excel('test_pandas.xlsm','Sheet1')
data
# reading an excel file
Out[59]:
Column A Column B Column C
0 1 cat 1.1
1 2 dog 2.2
2 3 bird 3.3
In [60]:
writer = pd.ExcelWriter('test_sheets.xlsx')
data.to_excel(writer,'Original')
data.to_excel(writer,'Copy')
writer.save()

# writing to an excel file
In [70]:
import sqlite3
conn = sqlite3.connect('test_pandas.db')

# Create connection to data base via sql
In [71]:
sqlite3.version
Out[71]:
'2.6.0'
In [72]:
tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table';")

for name in tables:
    print name[0]

# reading the names of the tables
test
new_test
new_test1
In [73]:
sql_guery = "SELECT * FROM test"
data = pd.read_sql(sql_guery,conn)
data

# Reading the sql file
# * Means choose all
# Choosing to read all the data
Out[73]:
id city mascot
0 1 San Francisco 49ers
1 2 Oakland Raiders
2 3 Seattle Seahawks
3 4 Chicago Bears
4 5 NYC Jets
5 6 Milwaukee Cheesehead
6 7 New England Lions
7 6 Milwaukee Cheesehead
8 7 New England Lions
9 6 Milwaukee Cheesehead
10 7 New England Lions
In [74]:
new_data = pd.DataFrame({'id':[6,7],'city':['Milwaukee','New England'],'mascot':['Cheesehead','Lions']})
new_data = new_data[['id','city','mascot']]
new_data
Out[74]:
id city mascot
0 6 Milwaukee Cheesehead
1 7 New England Lions
In [75]:
new_data.to_sql('new_test2',conn)

# Writing data to sql
In [76]:
new_data.to_sql('test',conn,if_exists='append', index=False)
In [77]:
conn.close()

http://sqlitebrowser.org/

Follow Link^ if you would like to see/visit the data base itself

In [78]:
from xml.etree import ElementTree as et

#parses the file
doc = et.parse('cars.xml')
print doc.find('CAR/MODEL').text
print doc.find('CAR[2]/MODEL').text

for element in doc.findall('CAR'):
    print element.find('MAKE').text + " " + element.find('MODEL').text + ", " + element.find('COST').text
Focus
Civic
Ford Focus, 15000
Honda Civic, 20000
Toyota Camry, 25000
Honda Accord, 22000
In [79]:
import requests

#retrieve an xml document from a web server
xml = requests.get("http://www.w3schools.com/xml/cd_catalog.xml")

print xml.content
<?xml version="1.0" encoding="UTF-8"?>
<CATALOG>
  <CD>
    <TITLE>Empire Burlesque</TITLE>
    <ARTIST>Bob Dylan</ARTIST>
    <COUNTRY>USA</COUNTRY>
    <COMPANY>Columbia</COMPANY>
    <PRICE>10.90</PRICE>
    <YEAR>1985</YEAR>
  </CD>
  <CD>
    <TITLE>Hide your heart</TITLE>
    <ARTIST>Bonnie Tyler</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>CBS Records</COMPANY>
    <PRICE>9.90</PRICE>
    <YEAR>1988</YEAR>
  </CD>
  <CD>
    <TITLE>Greatest Hits</TITLE>
    <ARTIST>Dolly Parton</ARTIST>
    <COUNTRY>USA</COUNTRY>
    <COMPANY>RCA</COMPANY>
    <PRICE>9.90</PRICE>
    <YEAR>1982</YEAR>
  </CD>
  <CD>
    <TITLE>Still got the blues</TITLE>
    <ARTIST>Gary Moore</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>Virgin records</COMPANY>
    <PRICE>10.20</PRICE>
    <YEAR>1990</YEAR>
  </CD>
  <CD>
    <TITLE>Eros</TITLE>
    <ARTIST>Eros Ramazzotti</ARTIST>
    <COUNTRY>EU</COUNTRY>
    <COMPANY>BMG</COMPANY>
    <PRICE>9.90</PRICE>
    <YEAR>1997</YEAR>
  </CD>
  <CD>
    <TITLE>One night only</TITLE>
    <ARTIST>Bee Gees</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>Polydor</COMPANY>
    <PRICE>10.90</PRICE>
    <YEAR>1998</YEAR>
  </CD>
  <CD>
    <TITLE>Sylvias Mother</TITLE>
    <ARTIST>Dr.Hook</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>CBS</COMPANY>
    <PRICE>8.10</PRICE>
    <YEAR>1973</YEAR>
  </CD>
  <CD>
    <TITLE>Maggie May</TITLE>
    <ARTIST>Rod Stewart</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>Pickwick</COMPANY>
    <PRICE>8.50</PRICE>
    <YEAR>1990</YEAR>
  </CD>
  <CD>
    <TITLE>Romanza</TITLE>
    <ARTIST>Andrea Bocelli</ARTIST>
    <COUNTRY>EU</COUNTRY>
    <COMPANY>Polydor</COMPANY>
    <PRICE>10.80</PRICE>
    <YEAR>1996</YEAR>
  </CD>
  <CD>
    <TITLE>When a man loves a woman</TITLE>
    <ARTIST>Percy Sledge</ARTIST>
    <COUNTRY>USA</COUNTRY>
    <COMPANY>Atlantic</COMPANY>
    <PRICE>8.70</PRICE>
    <YEAR>1987</YEAR>
  </CD>
  <CD>
    <TITLE>Black angel</TITLE>
    <ARTIST>Savage Rose</ARTIST>
    <COUNTRY>EU</COUNTRY>
    <COMPANY>Mega</COMPANY>
    <PRICE>10.90</PRICE>
    <YEAR>1995</YEAR>
  </CD>
  <CD>
    <TITLE>1999 Grammy Nominees</TITLE>
    <ARTIST>Many</ARTIST>
    <COUNTRY>USA</COUNTRY>
    <COMPANY>Grammy</COMPANY>
    <PRICE>10.20</PRICE>
    <YEAR>1999</YEAR>
  </CD>
  <CD>
    <TITLE>For the good times</TITLE>
    <ARTIST>Kenny Rogers</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>Mucik Master</COMPANY>
    <PRICE>8.70</PRICE>
    <YEAR>1995</YEAR>
  </CD>
  <CD>
    <TITLE>Big Willie style</TITLE>
    <ARTIST>Will Smith</ARTIST>
    <COUNTRY>USA</COUNTRY>
    <COMPANY>Columbia</COMPANY>
    <PRICE>9.90</PRICE>
    <YEAR>1997</YEAR>
  </CD>
  <CD>
    <TITLE>Tupelo Honey</TITLE>
    <ARTIST>Van Morrison</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>Polydor</COMPANY>
    <PRICE>8.20</PRICE>
    <YEAR>1971</YEAR>
  </CD>
  <CD>
    <TITLE>Soulsville</TITLE>
    <ARTIST>Jorn Hoel</ARTIST>
    <COUNTRY>Norway</COUNTRY>
    <COMPANY>WEA</COMPANY>
    <PRICE>7.90</PRICE>
    <YEAR>1996</YEAR>
  </CD>
  <CD>
    <TITLE>The very best of</TITLE>
    <ARTIST>Cat Stevens</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>Island</COMPANY>
    <PRICE>8.90</PRICE>
    <YEAR>1990</YEAR>
  </CD>
  <CD>
    <TITLE>Stop</TITLE>
    <ARTIST>Sam Brown</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>A and M</COMPANY>
    <PRICE>8.90</PRICE>
    <YEAR>1988</YEAR>
  </CD>
  <CD>
    <TITLE>Bridge of Spies</TITLE>
    <ARTIST>T'Pau</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>Siren</COMPANY>
    <PRICE>7.90</PRICE>
    <YEAR>1987</YEAR>
  </CD>
  <CD>
    <TITLE>Private Dancer</TITLE>
    <ARTIST>Tina Turner</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>Capitol</COMPANY>
    <PRICE>8.90</PRICE>
    <YEAR>1983</YEAR>
  </CD>
  <CD>
    <TITLE>Midt om natten</TITLE>
    <ARTIST>Kim Larsen</ARTIST>
    <COUNTRY>EU</COUNTRY>
    <COMPANY>Medley</COMPANY>
    <PRICE>7.80</PRICE>
    <YEAR>1983</YEAR>
  </CD>
  <CD>
    <TITLE>Pavarotti Gala Concert</TITLE>
    <ARTIST>Luciano Pavarotti</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>DECCA</COMPANY>
    <PRICE>9.90</PRICE>
    <YEAR>1991</YEAR>
  </CD>
  <CD>
    <TITLE>The dock of the bay</TITLE>
    <ARTIST>Otis Redding</ARTIST>
    <COUNTRY>USA</COUNTRY>
    <COMPANY>Stax Records</COMPANY>
    <PRICE>7.90</PRICE>
    <YEAR>1968</YEAR>
  </CD>
  <CD>
    <TITLE>Picture book</TITLE>
    <ARTIST>Simply Red</ARTIST>
    <COUNTRY>EU</COUNTRY>
    <COMPANY>Elektra</COMPANY>
    <PRICE>7.20</PRICE>
    <YEAR>1985</YEAR>
  </CD>
  <CD>
    <TITLE>Red</TITLE>
    <ARTIST>The Communards</ARTIST>
    <COUNTRY>UK</COUNTRY>
    <COMPANY>London</COMPANY>
    <PRICE>7.80</PRICE>
    <YEAR>1987</YEAR>
  </CD>
  <CD>
    <TITLE>Unchain my heart</TITLE>
    <ARTIST>Joe Cocker</ARTIST>
    <COUNTRY>USA</COUNTRY>
    <COMPANY>EMI</COMPANY>
    <PRICE>8.20</PRICE>
    <YEAR>1987</YEAR>
  </CD>
</CATALOG>

In [80]:
#Writing the code back to a local file

with open('test.xml','wb') as code:
    code.write(xml.content)
In [81]:
doc = et.parse("test.xml")

#outputs the album, artist, and year of each CD to the screen
for element in doc.findall('CD'):
    print 'Album: ', element.find('TITLE').text
    print 'Artists: ', element.find('ARTIST').text
    print 'Year: ', element.find('YEAR').text, "\n"
Album:  Empire Burlesque
Artists:  Bob Dylan
Year:  1985

Album:  Hide your heart
Artists:  Bonnie Tyler
Year:  1988

Album:  Greatest Hits
Artists:  Dolly Parton
Year:  1982

Album:  Still got the blues
Artists:  Gary Moore
Year:  1990

Album:  Eros
Artists:  Eros Ramazzotti
Year:  1997

Album:  One night only
Artists:  Bee Gees
Year:  1998

Album:  Sylvias Mother
Artists:  Dr.Hook
Year:  1973

Album:  Maggie May
Artists:  Rod Stewart
Year:  1990

Album:  Romanza
Artists:  Andrea Bocelli
Year:  1996

Album:  When a man loves a woman
Artists:  Percy Sledge
Year:  1987

Album:  Black angel
Artists:  Savage Rose
Year:  1995

Album:  1999 Grammy Nominees
Artists:  Many
Year:  1999

Album:  For the good times
Artists:  Kenny Rogers
Year:  1995

Album:  Big Willie style
Artists:  Will Smith
Year:  1997

Album:  Tupelo Honey
Artists:  Van Morrison
Year:  1971

Album:  Soulsville
Artists:  Jorn Hoel
Year:  1996

Album:  The very best of
Artists:  Cat Stevens
Year:  1990

Album:  Stop
Artists:  Sam Brown
Year:  1988

Album:  Bridge of Spies
Artists:  T'Pau
Year:  1987

Album:  Private Dancer
Artists:  Tina Turner
Year:  1983

Album:  Midt om natten
Artists:  Kim Larsen
Year:  1983

Album:  Pavarotti Gala Concert
Artists:  Luciano Pavarotti
Year:  1991

Album:  The dock of the bay
Artists:  Otis Redding
Year:  1968

Album:  Picture book
Artists:  Simply Red
Year:  1985

Album:  Red
Artists:  The Communards
Year:  1987

Album:  Unchain my heart
Artists:  Joe Cocker
Year:  1987

In [82]:
titanicData = pd.read_csv('train.csv')
titanicData
Out[82]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 NaN S
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 NaN C
10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.0 1 1 PP 9549 16.7000 G6 S
11 12 1 1 Bonnell, Miss. Elizabeth female 58.0 0 0 113783 26.5500 C103 S
12 13 0 3 Saundercock, Mr. William Henry male 20.0 0 0 A/5. 2151 8.0500 NaN S
13 14 0 3 Andersson, Mr. Anders Johan male 39.0 1 5 347082 31.2750 NaN S
14 15 0 3 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 0 350406 7.8542 NaN S
15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 0 248706 16.0000 NaN S
16 17 0 3 Rice, Master. Eugene male 2.0 4 1 382652 29.1250 NaN Q
17 18 1 2 Williams, Mr. Charles Eugene male NaN 0 0 244373 13.0000 NaN S
18 19 0 3 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 0 345763 18.0000 NaN S
19 20 1 3 Masselmani, Mrs. Fatima female NaN 0 0 2649 7.2250 NaN C
20 21 0 2 Fynney, Mr. Joseph J male 35.0 0 0 239865 26.0000 NaN S
21 22 1 2 Beesley, Mr. Lawrence male 34.0 0 0 248698 13.0000 D56 S
22 23 1 3 McGowan, Miss. Anna "Annie" female 15.0 0 0 330923 8.0292 NaN Q
23 24 1 1 Sloper, Mr. William Thompson male 28.0 0 0 113788 35.5000 A6 S
24 25 0 3 Palsson, Miss. Torborg Danira female 8.0 3 1 349909 21.0750 NaN S
25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 5 347077 31.3875 NaN S
26 27 0 3 Emir, Mr. Farred Chehab male NaN 0 0 2631 7.2250 NaN C
27 28 0 1 Fortune, Mr. Charles Alexander male 19.0 3 2 19950 263.0000 C23 C25 C27 S
28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female NaN 0 0 330959 7.8792 NaN Q
29 30 0 3 Todoroff, Mr. Lalio male NaN 0 0 349216 7.8958 NaN S
... ... ... ... ... ... ... ... ... ... ... ... ...
861 862 0 2 Giles, Mr. Frederick Edward male 21.0 1 0 28134 11.5000 NaN S
862 863 1 1 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 0 17466 25.9292 D17 S
863 864 0 3 Sage, Miss. Dorothy Edith "Dolly" female NaN 8 2 CA. 2343 69.5500 NaN S
864 865 0 2 Gill, Mr. John William male 24.0 0 0 233866 13.0000 NaN S
865 866 1 2 Bystrom, Mrs. (Karolina) female 42.0 0 0 236852 13.0000 NaN S
866 867 1 2 Duran y More, Miss. Asuncion female 27.0 1 0 SC/PARIS 2149 13.8583 NaN C
867 868 0 1 Roebling, Mr. Washington Augustus II male 31.0 0 0 PC 17590 50.4958 A24 S
868 869 0 3 van Melkebeke, Mr. Philemon male NaN 0 0 345777 9.5000 NaN S
869 870 1 3 Johnson, Master. Harold Theodor male 4.0 1 1 347742 11.1333 NaN S
870 871 0 3 Balkic, Mr. Cerin male 26.0 0 0 349248 7.8958 NaN S
871 872 1 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 1 11751 52.5542 D35 S
872 873 0 1 Carlsson, Mr. Frans Olof male 33.0 0 0 695 5.0000 B51 B53 B55 S
873 874 0 3 Vander Cruyssen, Mr. Victor male 47.0 0 0 345765 9.0000 NaN S
874 875 1 2 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 0 P/PP 3381 24.0000 NaN C
875 876 1 3 Najib, Miss. Adele Kiamie "Jane" female 15.0 0 0 2667 7.2250 NaN C
876 877 0 3 Gustafsson, Mr. Alfred Ossian male 20.0 0 0 7534 9.8458 NaN S
877 878 0 3 Petroff, Mr. Nedelio male 19.0 0 0 349212 7.8958 NaN S
878 879 0 3 Laleff, Mr. Kristo male NaN 0 0 349217 7.8958 NaN S
879 880 1 1 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 1 11767 83.1583 C50 C
880 881 1 2 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 1 230433 26.0000 NaN S
881 882 0 3 Markun, Mr. Johann male 33.0 0 0 349257 7.8958 NaN S
882 883 0 3 Dahlberg, Miss. Gerda Ulrika female 22.0 0 0 7552 10.5167 NaN S
883 884 0 2 Banfield, Mr. Frederick James male 28.0 0 0 C.A./SOTON 34068 10.5000 NaN S
884 885 0 3 Sutehall, Mr. Henry Jr male 25.0 0 0 SOTON/OQ 392076 7.0500 NaN S
885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.0 0 5 382652 29.1250 NaN Q
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.4500 NaN S
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

891 rows × 12 columns

In [83]:
titanicData.info()
#This gives you the info for your data set
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
PassengerId    891 non-null int64
Survived       891 non-null int64
Pclass         891 non-null int64
Name           891 non-null object
Sex            891 non-null object
Age            714 non-null float64
SibSp          891 non-null int64
Parch          891 non-null int64
Ticket         891 non-null object
Fare           891 non-null float64
Cabin          204 non-null object
Embarked       889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.6+ KB
In [84]:
titanicData.isnull().sum()
Out[84]:
PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64
In [85]:
titanicData.head(10)
Out[85]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 2 347742 11.1333 NaN S
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 0 237736 30.0708 NaN C
In [86]:
titanicData.tail(10)
Out[86]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
881 882 0 3 Markun, Mr. Johann male 33.0 0 0 349257 7.8958 NaN S
882 883 0 3 Dahlberg, Miss. Gerda Ulrika female 22.0 0 0 7552 10.5167 NaN S
883 884 0 2 Banfield, Mr. Frederick James male 28.0 0 0 C.A./SOTON 34068 10.5000 NaN S
884 885 0 3 Sutehall, Mr. Henry Jr male 25.0 0 0 SOTON/OQ 392076 7.0500 NaN S
885 886 0 3 Rice, Mrs. William (Margaret Norton) female 39.0 0 5 382652 29.1250 NaN Q
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 889 0 3 Johnston, Miss. Catherine Helen "Carrie" female NaN 1 2 W./C. 6607 23.4500 NaN S
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q
In [87]:
# nunique = number of unique values
print titanicData['Pclass'].nunique()

# provides the unique values
titanicData['Pclass'].unique()
3
Out[87]:
array([3, 1, 2], dtype=int64)
In [88]:
titanicData['Sex'].unique()
Out[88]:
array(['male', 'female'], dtype=object)
In [89]:
# provides the number counts of each value
titanicData['Pclass'].value_counts()
Out[89]:
3    491
1    216
2    184
Name: Pclass, dtype: int64
In [90]:
titanicData.describe()
Out[90]:
PassengerId Survived Pclass Age SibSp Parch Fare
count 891.000000 891.000000 891.000000 714.000000 891.000000 891.000000 891.000000
mean 446.000000 0.383838 2.308642 29.699118 0.523008 0.381594 32.204208
std 257.353842 0.486592 0.836071 14.526497 1.102743 0.806057 49.693429
min 1.000000 0.000000 1.000000 0.420000 0.000000 0.000000 0.000000
25% 223.500000 0.000000 2.000000 20.125000 0.000000 0.000000 7.910400
50% 446.000000 0.000000 3.000000 28.000000 0.000000 0.000000 14.454200
75% 668.500000 1.000000 3.000000 38.000000 1.000000 0.000000 31.000000
max 891.000000 1.000000 3.000000 80.000000 8.000000 6.000000 512.329200
In [91]:
a = titanicData[['Survived','Name']]
a
Out[91]:
Survived Name
0 0 Braund, Mr. Owen Harris
1 1 Cumings, Mrs. John Bradley (Florence Briggs Th...
2 1 Heikkinen, Miss. Laina
3 1 Futrelle, Mrs. Jacques Heath (Lily May Peel)
4 0 Allen, Mr. William Henry
5 0 Moran, Mr. James
6 0 McCarthy, Mr. Timothy J
7 0 Palsson, Master. Gosta Leonard
8 1 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)
9 1 Nasser, Mrs. Nicholas (Adele Achem)
10 1 Sandstrom, Miss. Marguerite Rut
11 1 Bonnell, Miss. Elizabeth
12 0 Saundercock, Mr. William Henry
13 0 Andersson, Mr. Anders Johan
14 0 Vestrom, Miss. Hulda Amanda Adolfina
15 1 Hewlett, Mrs. (Mary D Kingcome)
16 0 Rice, Master. Eugene
17 1 Williams, Mr. Charles Eugene
18 0 Vander Planke, Mrs. Julius (Emelia Maria Vande...
19 1 Masselmani, Mrs. Fatima
20 0 Fynney, Mr. Joseph J
21 1 Beesley, Mr. Lawrence
22 1 McGowan, Miss. Anna "Annie"
23 1 Sloper, Mr. William Thompson
24 0 Palsson, Miss. Torborg Danira
25 1 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...
26 0 Emir, Mr. Farred Chehab
27 0 Fortune, Mr. Charles Alexander
28 1 O'Dwyer, Miss. Ellen "Nellie"
29 0 Todoroff, Mr. Lalio
... ... ...
861 0 Giles, Mr. Frederick Edward
862 1 Swift, Mrs. Frederick Joel (Margaret Welles Ba...
863 0 Sage, Miss. Dorothy Edith "Dolly"
864 0 Gill, Mr. John William
865 1 Bystrom, Mrs. (Karolina)
866 1 Duran y More, Miss. Asuncion
867 0 Roebling, Mr. Washington Augustus II
868 0 van Melkebeke, Mr. Philemon
869 1 Johnson, Master. Harold Theodor
870 0 Balkic, Mr. Cerin
871 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny)
872 0 Carlsson, Mr. Frans Olof
873 0 Vander Cruyssen, Mr. Victor
874 1 Abelson, Mrs. Samuel (Hannah Wizosky)
875 1 Najib, Miss. Adele Kiamie "Jane"
876 0 Gustafsson, Mr. Alfred Ossian
877 0 Petroff, Mr. Nedelio
878 0 Laleff, Mr. Kristo
879 1 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)
880 1 Shelley, Mrs. William (Imanita Parrish Hall)
881 0 Markun, Mr. Johann
882 0 Dahlberg, Miss. Gerda Ulrika
883 0 Banfield, Mr. Frederick James
884 0 Sutehall, Mr. Henry Jr
885 0 Rice, Mrs. William (Margaret Norton)
886 0 Montvila, Rev. Juozas
887 1 Graham, Miss. Margaret Edith
888 0 Johnston, Miss. Catherine Helen "Carrie"
889 1 Behr, Mr. Karl Howell
890 0 Dooley, Mr. Patrick

891 rows × 2 columns

In [92]:
titanicData.loc[:5,'PassengerId':'Name']
Out[92]:
PassengerId Survived Pclass Name
0 1 0 3 Braund, Mr. Owen Harris
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th...
2 3 1 3 Heikkinen, Miss. Laina
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel)
4 5 0 3 Allen, Mr. William Henry
5 6 0 3 Moran, Mr. James
In [93]:
print titanicData['Sex'].unique()
titanicData['Sex'].value_counts()
['male' 'female']
Out[93]:
male      577
female    314
Name: Sex, dtype: int64
In [94]:
titanicData[titanicData.Sex=='male']
Out[94]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
5 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q
6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S
7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S
12 13 0 3 Saundercock, Mr. William Henry male 20.0 0 0 A/5. 2151 8.0500 NaN S
13 14 0 3 Andersson, Mr. Anders Johan male 39.0 1 5 347082 31.2750 NaN S
16 17 0 3 Rice, Master. Eugene male 2.0 4 1 382652 29.1250 NaN Q
17 18 1 2 Williams, Mr. Charles Eugene male NaN 0 0 244373 13.0000 NaN S
20 21 0 2 Fynney, Mr. Joseph J male 35.0 0 0 239865 26.0000 NaN S
21 22 1 2 Beesley, Mr. Lawrence male 34.0 0 0 248698 13.0000 D56 S
23 24 1 1 Sloper, Mr. William Thompson male 28.0 0 0 113788 35.5000 A6 S
26 27 0 3 Emir, Mr. Farred Chehab male NaN 0 0 2631 7.2250 NaN C
27 28 0 1 Fortune, Mr. Charles Alexander male 19.0 3 2 19950 263.0000 C23 C25 C27 S
29 30 0 3 Todoroff, Mr. Lalio male NaN 0 0 349216 7.8958 NaN S
30 31 0 1 Uruchurtu, Don. Manuel E male 40.0 0 0 PC 17601 27.7208 NaN C
33 34 0 2 Wheadon, Mr. Edward H male 66.0 0 0 C.A. 24579 10.5000 NaN S
34 35 0 1 Meyer, Mr. Edgar Joseph male 28.0 1 0 PC 17604 82.1708 NaN C
35 36 0 1 Holverson, Mr. Alexander Oskar male 42.0 1 0 113789 52.0000 NaN S
36 37 1 3 Mamee, Mr. Hanna male NaN 0 0 2677 7.2292 NaN C
37 38 0 3 Cann, Mr. Ernest Charles male 21.0 0 0 A./5. 2152 8.0500 NaN S
42 43 0 3 Kraeff, Mr. Theodor male NaN 0 0 349253 7.8958 NaN C
45 46 0 3 Rogers, Mr. William John male NaN 0 0 S.C./A.4. 23567 8.0500 NaN S
46 47 0 3 Lennon, Mr. Denis male NaN 1 0 370371 15.5000 NaN Q
48 49 0 3 Samaan, Mr. Youssef male NaN 2 0 2662 21.6792 NaN C
50 51 0 3 Panula, Master. Juha Niilo male 7.0 4 1 3101295 39.6875 NaN S
51 52 0 3 Nosworthy, Mr. Richard Cater male 21.0 0 0 A/4. 39886 7.8000 NaN S
54 55 0 1 Ostby, Mr. Engelhart Cornelius male 65.0 0 1 113509 61.9792 B30 C
55 56 1 1 Woolner, Mr. Hugh male NaN 0 0 19947 35.5000 C52 S
57 58 0 3 Novel, Mr. Mansouer male 28.5 0 0 2697 7.2292 NaN C
... ... ... ... ... ... ... ... ... ... ... ... ...
840 841 0 3 Alhomaki, Mr. Ilmari Rudolf male 20.0 0 0 SOTON/O2 3101287 7.9250 NaN S
841 842 0 2 Mudd, Mr. Thomas Charles male 16.0 0 0 S.O./P.P. 3 10.5000 NaN S
843 844 0 3 Lemberopolous, Mr. Peter L male 34.5 0 0 2683 6.4375 NaN C
844 845 0 3 Culumovic, Mr. Jeso male 17.0 0 0 315090 8.6625 NaN S
845 846 0 3 Abbing, Mr. Anthony male 42.0 0 0 C.A. 5547 7.5500 NaN S
846 847 0 3 Sage, Mr. Douglas Bullen male NaN 8 2 CA. 2343 69.5500 NaN S
847 848 0 3 Markoff, Mr. Marin male 35.0 0 0 349213 7.8958 NaN C
848 849 0 2 Harper, Rev. John male 28.0 0 1 248727 33.0000 NaN S
850 851 0 3 Andersson, Master. Sigvard Harald Elias male 4.0 4 2 347082 31.2750 NaN S
851 852 0 3 Svensson, Mr. Johan male 74.0 0 0 347060 7.7750 NaN S
857 858 1 1 Daly, Mr. Peter Denis male 51.0 0 0 113055 26.5500 E17 S
859 860 0 3 Razi, Mr. Raihed male NaN 0 0 2629 7.2292 NaN C
860 861 0 3 Hansen, Mr. Claus Peter male 41.0 2 0 350026 14.1083 NaN S
861 862 0 2 Giles, Mr. Frederick Edward male 21.0 1 0 28134 11.5000 NaN S
864 865 0 2 Gill, Mr. John William male 24.0 0 0 233866 13.0000 NaN S
867 868 0 1 Roebling, Mr. Washington Augustus II male 31.0 0 0 PC 17590 50.4958 A24 S
868 869 0 3 van Melkebeke, Mr. Philemon male NaN 0 0 345777 9.5000 NaN S
869 870 1 3 Johnson, Master. Harold Theodor male 4.0 1 1 347742 11.1333 NaN S
870 871 0 3 Balkic, Mr. Cerin male 26.0 0 0 349248 7.8958 NaN S
872 873 0 1 Carlsson, Mr. Frans Olof male 33.0 0 0 695 5.0000 B51 B53 B55 S
873 874 0 3 Vander Cruyssen, Mr. Victor male 47.0 0 0 345765 9.0000 NaN S
876 877 0 3 Gustafsson, Mr. Alfred Ossian male 20.0 0 0 7534 9.8458 NaN S
877 878 0 3 Petroff, Mr. Nedelio male 19.0 0 0 349212 7.8958 NaN S
878 879 0 3 Laleff, Mr. Kristo male NaN 0 0 349217 7.8958 NaN S
881 882 0 3 Markun, Mr. Johann male 33.0 0 0 349257 7.8958 NaN S
883 884 0 2 Banfield, Mr. Frederick James male 28.0 0 0 C.A./SOTON 34068 10.5000 NaN S
884 885 0 3 Sutehall, Mr. Henry Jr male 25.0 0 0 SOTON/OQ 392076 7.0500 NaN S
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

577 rows × 12 columns

In [95]:
titanicData[(titanicData.Sex == 'male') & (titanicData.Age > 18)]
len(titanicData[(titanicData.Sex == 'male') & (titanicData.Age > 18)])
Out[95]:
382
In [96]:
print len(titanicData[(titanicData.Survived == 1)])
print len(titanicData[(titanicData.Survived == 0)])
titanicData[(titanicData.Survived == 1)]
342
549
Out[96]:
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.00 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.00 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.00 1 0 113803 53.1000 C123 S
8 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.00 0 2 347742 11.1333 NaN S
9 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14.00 1 0 237736 30.0708 NaN C
10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.00 1 1 PP 9549 16.7000 G6 S
11 12 1 1 Bonnell, Miss. Elizabeth female 58.00 0 0 113783 26.5500 C103 S
15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.00 0 0 248706 16.0000 NaN S
17 18 1 2 Williams, Mr. Charles Eugene male NaN 0 0 244373 13.0000 NaN S
19 20 1 3 Masselmani, Mrs. Fatima female NaN 0 0 2649 7.2250 NaN C
21 22 1 2 Beesley, Mr. Lawrence male 34.00 0 0 248698 13.0000 D56 S
22 23 1 3 McGowan, Miss. Anna "Annie" female 15.00 0 0 330923 8.0292 NaN Q
23 24 1 1 Sloper, Mr. William Thompson male 28.00 0 0 113788 35.5000 A6 S
25 26 1 3 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.00 1 5 347077 31.3875 NaN S
28 29 1 3 O'Dwyer, Miss. Ellen "Nellie" female NaN 0 0 330959 7.8792 NaN Q
31 32 1 1 Spencer, Mrs. William Augustus (Marie Eugenie) female NaN 1 0 PC 17569 146.5208 B78 C
32 33 1 3 Glynn, Miss. Mary Agatha female NaN 0 0 335677 7.7500 NaN Q
36 37 1 3 Mamee, Mr. Hanna male NaN 0 0 2677 7.2292 NaN C
39 40 1 3 Nicola-Yarred, Miss. Jamila female 14.00 1 0 2651 11.2417 NaN C
43 44 1 2 Laroche, Miss. Simonne Marie Anne Andree female 3.00 1 2 SC/Paris 2123 41.5792 NaN C
44 45 1 3 Devaney, Miss. Margaret Delia female 19.00 0 0 330958 7.8792 NaN Q
47 48 1 3 O'Driscoll, Miss. Bridget female NaN 0 0 14311 7.7500 NaN Q
52 53 1 1 Harper, Mrs. Henry Sleeper (Myna Haxtun) female 49.00 1 0 PC 17572 76.7292 D33 C
53 54 1 2 Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkin... female 29.00 1 0 2926 26.0000 NaN S
55 56 1 1 Woolner, Mr. Hugh male NaN 0 0 19947 35.5000 C52 S
56 57 1 2 Rugg, Miss. Emily female 21.00 0 0 C.A. 31026 10.5000 NaN S
58 59 1 2 West, Miss. Constance Mirium female 5.00 1 2 C.A. 34651 27.7500 NaN S
61 62 1 1 Icard, Miss. Amelie female 38.00 0 0 113572 80.0000 B28 NaN
65 66 1 3 Moubarek, Master. Gerios male NaN 1 1 2661 15.2458 NaN C
66 67 1 2 Nye, Mrs. (Elizabeth Ramell) female 29.00 0 0 C.A. 29395 10.5000 F33 S
... ... ... ... ... ... ... ... ... ... ... ... ...
809 810 1 1 Chambers, Mrs. Norman Campbell (Bertha Griggs) female 33.00 1 0 113806 53.1000 E8 S
820 821 1 1 Hays, Mrs. Charles Melville (Clara Jennings Gr... female 52.00 1 1 12749 93.5000 B69 S
821 822 1 3 Lulic, Mr. Nikola male 27.00 0 0 315098 8.6625 NaN S
823 824 1 3 Moor, Mrs. (Beila) female 27.00 0 1 392096 12.4750 E121 S
827 828 1 2 Mallet, Master. Andre male 1.00 0 2 S.C./PARIS 2079 37.0042 NaN C
828 829 1 3 McCormack, Mr. Thomas Joseph male NaN 0 0 367228 7.7500 NaN Q
829 830 1 1 Stone, Mrs. George Nelson (Martha Evelyn) female 62.00 0 0 113572 80.0000 B28 NaN
830 831 1 3 Yasbeck, Mrs. Antoni (Selini Alexander) female 15.00 1 0 2659 14.4542 NaN C
831 832 1 2 Richards, Master. George Sibley male 0.83 1 1 29106 18.7500 NaN S
835 836 1 1 Compton, Miss. Sara Rebecca female 39.00 1 1 PC 17756 83.1583 E49 C
838 839 1 3 Chip, Mr. Chang male 32.00 0 0 1601 56.4958 NaN S
839 840 1 1 Marechal, Mr. Pierre male NaN 0 0 11774 29.7000 C47 C
842 843 1 1 Serepeca, Miss. Augusta female 30.00 0 0 113798 31.0000 NaN C
849 850 1 1 Goldenberg, Mrs. Samuel L (Edwiga Grabowska) female NaN 1 0 17453 89.1042 C92 C
853 854 1 1 Lines, Miss. Mary Conover female 16.00 0 1 PC 17592 39.4000 D28 S
855 856 1 3 Aks, Mrs. Sam (Leah Rosen) female 18.00 0 1 392091 9.3500 NaN S
856 857 1 1 Wick, Mrs. George Dennick (Mary Hitchcock) female 45.00 1 1 36928 164.8667 NaN S
857 858 1 1 Daly, Mr. Peter Denis male 51.00 0 0 113055 26.5500 E17 S
858 859 1 3 Baclini, Mrs. Solomon (Latifa Qurban) female 24.00 0 3 2666 19.2583 NaN C
862 863 1 1 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.00 0 0 17466 25.9292 D17 S
865 866 1 2 Bystrom, Mrs. (Karolina) female 42.00 0 0 236852 13.0000 NaN S
866 867 1 2 Duran y More, Miss. Asuncion female 27.00 1 0 SC/PARIS 2149 13.8583 NaN C
869 870 1 3 Johnson, Master. Harold Theodor male 4.00 1 1 347742 11.1333 NaN S
871 872 1 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.00 1 1 11751 52.5542 D35 S
874 875 1 2 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.00 1 0 P/PP 3381 24.0000 NaN C
875 876 1 3 Najib, Miss. Adele Kiamie "Jane" female 15.00 0 0 2667 7.2250 NaN C
879 880 1 1 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.00 0 1 11767 83.1583 C50 C
880 881 1 2 Shelley, Mrs. William (Imanita Parrish Hall) female 25.00 0 1 230433 26.0000 NaN S
887 888 1 1 Graham, Miss. Margaret Edith female 19.00 0 0 112053 30.0000 B42 S
889 890 1 1 Behr, Mr. Karl Howell male 26.00 0 0 111369 30.0000 C148 C

342 rows × 12 columns

In [97]:
titanicData[titanicData.Sex=='male']

num_men_survived = float(len(titanicData[(titanicData.Survived == 1) & (titanicData.Sex == 'male')]))
num_women_survived = float(len(titanicData[(titanicData.Survived == 1) & (titanicData.Sex == 'female')]))
total_men = float(len(titanicData[titanicData.Sex == 'male']))
total_women = float(len(titanicData[titanicData.Sex == 'female']))

men_survival_rate = round(num_men_survived/total_men*100,2)
women_survival_rate = round(num_women_survived/total_women*100,2)

print "Male Survival Rate:",men_survival_rate,"percent. Female Survival Rate:", women_survival_rate,"percent."
Male Survival Rate: 18.89 percent. Female Survival Rate: 74.2 percent.
In [ ]:

rss facebook twitter github youtube mail spotify lastfm instagram linkedin google google-plus pinterest medium vimeo stackoverflow reddit quora quora