To use the given data to create a regression algorithm that will accurately predict the prices of homes.
# Read in Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.cross_validation import train_test_split as tts
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\sklearn\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20. "This module will be removed in 0.20.", DeprecationWarning)
# Read in Data
dfTrain = pd.read_csv('train.csv') # 1460 entries, 0 to 1459
dfTest = pd.read_csv('test.csv') # 1459 entries, 1460 to 2918
df = dfTrain.append(dfTest)
print len(dfTrain), len(dfTest), len(df)
1460 1459 2919
df.head()
1stFlrSF | 2ndFlrSF | 3SsnPorch | Alley | BedroomAbvGr | BldgType | BsmtCond | BsmtExposure | BsmtFinSF1 | BsmtFinSF2 | ... | SaleType | ScreenPorch | Street | TotRmsAbvGrd | TotalBsmtSF | Utilities | WoodDeckSF | YearBuilt | YearRemodAdd | YrSold | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 856 | 854 | 0 | NaN | 3 | 1Fam | TA | No | 706.0 | 0.0 | ... | WD | 0 | Pave | 8 | 856.0 | AllPub | 0 | 2003 | 2003 | 2008 |
1 | 1262 | 0 | 0 | NaN | 3 | 1Fam | TA | Gd | 978.0 | 0.0 | ... | WD | 0 | Pave | 6 | 1262.0 | AllPub | 298 | 1976 | 1976 | 2007 |
2 | 920 | 866 | 0 | NaN | 3 | 1Fam | TA | Mn | 486.0 | 0.0 | ... | WD | 0 | Pave | 6 | 920.0 | AllPub | 0 | 2001 | 2002 | 2008 |
3 | 961 | 756 | 0 | NaN | 3 | 1Fam | Gd | No | 216.0 | 0.0 | ... | WD | 0 | Pave | 7 | 756.0 | AllPub | 0 | 1915 | 1970 | 2006 |
4 | 1145 | 1053 | 0 | NaN | 4 | 1Fam | TA | Av | 655.0 | 0.0 | ... | WD | 0 | Pave | 9 | 1145.0 | AllPub | 192 | 2000 | 2000 | 2008 |
5 rows × 81 columns
df.tail()
1stFlrSF | 2ndFlrSF | 3SsnPorch | Alley | BedroomAbvGr | BldgType | BsmtCond | BsmtExposure | BsmtFinSF1 | BsmtFinSF2 | ... | SaleType | ScreenPorch | Street | TotRmsAbvGrd | TotalBsmtSF | Utilities | WoodDeckSF | YearBuilt | YearRemodAdd | YrSold | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1454 | 546 | 546 | 0 | NaN | 3 | Twnhs | TA | No | 0.0 | 0.0 | ... | WD | 0 | Pave | 5 | 546.0 | AllPub | 0 | 1970 | 1970 | 2006 |
1455 | 546 | 546 | 0 | NaN | 3 | TwnhsE | TA | No | 252.0 | 0.0 | ... | WD | 0 | Pave | 6 | 546.0 | AllPub | 0 | 1970 | 1970 | 2006 |
1456 | 1224 | 0 | 0 | NaN | 4 | 1Fam | TA | No | 1224.0 | 0.0 | ... | WD | 0 | Pave | 7 | 1224.0 | AllPub | 474 | 1960 | 1996 | 2006 |
1457 | 970 | 0 | 0 | NaN | 3 | 1Fam | TA | Av | 337.0 | 0.0 | ... | WD | 0 | Pave | 6 | 912.0 | AllPub | 80 | 1992 | 1992 | 2006 |
1458 | 996 | 1004 | 0 | NaN | 3 | 1Fam | TA | Av | 758.0 | 0.0 | ... | WD | 0 | Pave | 9 | 996.0 | AllPub | 190 | 1993 | 1994 | 2006 |
5 rows × 81 columns
df.describe()
1stFlrSF | 2ndFlrSF | 3SsnPorch | BedroomAbvGr | BsmtFinSF1 | BsmtFinSF2 | BsmtFullBath | BsmtHalfBath | BsmtUnfSF | EnclosedPorch | ... | OverallQual | PoolArea | SalePrice | ScreenPorch | TotRmsAbvGrd | TotalBsmtSF | WoodDeckSF | YearBuilt | YearRemodAdd | YrSold | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 2919.000000 | 2919.000000 | 2919.000000 | 2919.000000 | 2918.000000 | 2918.000000 | 2917.000000 | 2917.000000 | 2918.000000 | 2919.000000 | ... | 2919.000000 | 2919.000000 | 1460.000000 | 2919.000000 | 2919.000000 | 2918.000000 | 2919.000000 | 2919.000000 | 2919.000000 | 2919.000000 |
mean | 1159.581706 | 336.483727 | 2.602261 | 2.860226 | 441.423235 | 49.582248 | 0.429894 | 0.061364 | 560.772104 | 23.098321 | ... | 6.089072 | 2.251799 | 180921.195890 | 16.062350 | 6.451524 | 1051.777587 | 93.709832 | 1971.312778 | 1984.264474 | 2007.792737 |
std | 392.362079 | 428.701456 | 25.188169 | 0.822693 | 455.610826 | 169.205611 | 0.524736 | 0.245687 | 439.543659 | 64.244246 | ... | 1.409947 | 35.663946 | 79442.502883 | 56.184365 | 1.569379 | 440.766258 | 126.526589 | 30.291442 | 20.894344 | 1.314964 |
min | 334.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 1.000000 | 0.000000 | 34900.000000 | 0.000000 | 2.000000 | 0.000000 | 0.000000 | 1872.000000 | 1950.000000 | 2006.000000 |
25% | 876.000000 | 0.000000 | 0.000000 | 2.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 220.000000 | 0.000000 | ... | 5.000000 | 0.000000 | 129975.000000 | 0.000000 | 5.000000 | 793.000000 | 0.000000 | 1953.500000 | 1965.000000 | 2007.000000 |
50% | 1082.000000 | 0.000000 | 0.000000 | 3.000000 | 368.500000 | 0.000000 | 0.000000 | 0.000000 | 467.000000 | 0.000000 | ... | 6.000000 | 0.000000 | 163000.000000 | 0.000000 | 6.000000 | 989.500000 | 0.000000 | 1973.000000 | 1993.000000 | 2008.000000 |
75% | 1387.500000 | 704.000000 | 0.000000 | 3.000000 | 733.000000 | 0.000000 | 1.000000 | 0.000000 | 805.500000 | 0.000000 | ... | 7.000000 | 0.000000 | 214000.000000 | 0.000000 | 7.000000 | 1302.000000 | 168.000000 | 2001.000000 | 2004.000000 | 2009.000000 |
max | 5095.000000 | 2065.000000 | 508.000000 | 8.000000 | 5644.000000 | 1526.000000 | 3.000000 | 2.000000 | 2336.000000 | 1012.000000 | ... | 10.000000 | 800.000000 | 755000.000000 | 576.000000 | 15.000000 | 6110.000000 | 1424.000000 | 2010.000000 | 2010.000000 | 2010.000000 |
8 rows × 38 columns
df.corr()
1stFlrSF | 2ndFlrSF | 3SsnPorch | BedroomAbvGr | BsmtFinSF1 | BsmtFinSF2 | BsmtFullBath | BsmtHalfBath | BsmtUnfSF | EnclosedPorch | ... | OverallQual | PoolArea | SalePrice | ScreenPorch | TotRmsAbvGrd | TotalBsmtSF | WoodDeckSF | YearBuilt | YearRemodAdd | YrSold | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1stFlrSF | 1.000000 | -0.249823 | 0.044086 | 0.108418 | 0.458092 | 0.084330 | 0.261816 | 0.011074 | 0.296600 | -0.065796 | ... | 0.479152 | 0.121900 | 0.605852 | 0.098381 | 0.391782 | 0.801670 | 0.227347 | 0.310814 | 0.242245 | -0.013442 |
2ndFlrSF | -0.249823 | 1.000000 | -0.032458 | 0.503506 | -0.162301 | -0.097744 | -0.161468 | -0.060282 | -0.000376 | 0.054645 | ... | 0.245596 | 0.044503 | 0.319334 | 0.011070 | 0.584586 | -0.205665 | 0.089922 | 0.017588 | 0.158985 | -0.019229 |
3SsnPorch | 0.044086 | -0.032458 | 1.000000 | -0.048279 | 0.050908 | -0.023279 | 0.027378 | 0.026861 | -0.005810 | -0.032822 | ... | 0.018715 | -0.006525 | 0.044584 | -0.029546 | -0.025764 | 0.037892 | -0.003935 | 0.015958 | 0.037433 | 0.022560 |
BedroomAbvGr | 0.108418 | 0.503506 | -0.048279 | 1.000000 | -0.113547 | -0.031223 | -0.155200 | 0.018848 | 0.183259 | 0.049940 | ... | 0.073075 | 0.036544 | 0.168213 | 0.007254 | 0.669737 | 0.053393 | 0.031644 | -0.053101 | -0.021912 | -0.020453 |
BsmtFinSF1 | 0.458092 | -0.162301 | 0.050908 | -0.113547 | 1.000000 | -0.055045 | 0.638847 | 0.078361 | -0.477404 | -0.099712 | ... | 0.281810 | 0.084462 | 0.386420 | 0.096823 | 0.052141 | 0.536467 | 0.223492 | 0.279581 | 0.152126 | 0.022556 |
BsmtFinSF2 | 0.084330 | -0.097744 | -0.023279 | -0.031223 | -0.055045 | 1.000000 | 0.162835 | 0.099485 | -0.238241 | 0.032740 | ... | -0.042771 | 0.044524 | -0.011378 | 0.063302 | -0.048423 | 0.089410 | 0.098399 | -0.027595 | -0.062153 | 0.008883 |
BsmtFullBath | 0.261816 | -0.161468 | 0.027378 | -0.155200 | 0.638847 | 0.162835 | 1.000000 | -0.148832 | -0.398678 | -0.068393 | ... | 0.164543 | 0.043970 | 0.227122 | 0.053119 | -0.038916 | 0.325623 | 0.186107 | 0.211580 | 0.134947 | 0.045255 |
BsmtHalfBath | 0.011074 | -0.060282 | 0.026861 | 0.018848 | 0.078361 | 0.099485 | -0.148832 | 1.000000 | -0.107121 | -0.009675 | ... | -0.040732 | 0.066851 | -0.016844 | 0.042069 | -0.049610 | 0.012396 | 0.051549 | -0.030282 | -0.046285 | -0.020028 |
BsmtUnfSF | 0.296600 | -0.000376 | -0.005810 | 0.183259 | -0.477404 | -0.238241 | -0.398678 | -0.107121 | 1.000000 | 0.005006 | ... | 0.275175 | -0.032273 | 0.214479 | -0.049158 | 0.247505 | 0.412285 | -0.039302 | 0.130473 | 0.165175 | -0.038015 |
EnclosedPorch | -0.065796 | 0.054645 | -0.032822 | 0.049940 | -0.099712 | 0.032740 | -0.068393 | -0.009675 | 0.005006 | 1.000000 | ... | -0.139256 | 0.092534 | -0.128578 | -0.064377 | 0.015345 | -0.085510 | -0.119114 | -0.374073 | -0.220456 | -0.001134 |
Fireplaces | 0.407545 | 0.169991 | 0.018852 | 0.086793 | 0.293089 | 0.065625 | 0.169580 | 0.039265 | 0.004814 | 0.000971 | ... | 0.390753 | 0.099123 | 0.466929 | 0.169784 | 0.311260 | 0.332953 | 0.227608 | 0.170680 | 0.134157 | -0.006752 |
FullBath | 0.373077 | 0.402865 | 0.015192 | 0.356125 | 0.081525 | -0.075432 | -0.018815 | -0.047046 | 0.273207 | -0.118983 | ... | 0.528483 | 0.028091 | 0.560664 | -0.015910 | 0.527337 | 0.327762 | 0.181266 | 0.471169 | 0.457980 | -0.004882 |
GarageArea | 0.492011 | 0.128570 | 0.029440 | 0.073912 | 0.310449 | 0.003139 | 0.184738 | -0.021445 | 0.164409 | -0.106417 | ... | 0.565122 | 0.053049 | 0.623431 | 0.062389 | 0.328687 | 0.486067 | 0.238075 | 0.480735 | 0.376765 | -0.013451 |
GarageCars | 0.440540 | 0.182452 | 0.023383 | 0.092876 | 0.255482 | -0.014827 | 0.161709 | -0.033462 | 0.180099 | -0.132846 | ... | 0.600744 | 0.030424 | 0.640409 | 0.043112 | 0.358053 | 0.437997 | 0.240715 | 0.538074 | 0.426022 | -0.022918 |
GarageYrBlt | 0.260450 | 0.086094 | 0.020699 | -0.045364 | 0.194270 | -0.068549 | 0.150492 | -0.058379 | 0.171588 | -0.300751 | ... | 0.571803 | -0.014467 | 0.486362 | -0.062320 | 0.162684 | 0.348408 | 0.222584 | 0.834812 | 0.652365 | -0.004543 |
GrLivArea | 0.562538 | 0.655085 | 0.006268 | 0.517058 | 0.211669 | -0.017872 | 0.060531 | -0.043947 | 0.233988 | 0.003274 | ... | 0.575126 | 0.135441 | 0.708624 | 0.086284 | 0.808354 | 0.445276 | 0.251017 | 0.242666 | 0.316972 | -0.026919 |
HalfBath | -0.104141 | 0.611362 | -0.023417 | 0.245318 | -0.007311 | -0.032448 | -0.033468 | -0.058438 | -0.035803 | -0.081978 | ... | 0.272668 | 0.001424 | 0.284108 | 0.035622 | 0.345572 | -0.055718 | 0.116701 | 0.269743 | 0.211430 | 0.001587 |
Id | -0.008678 | -0.022252 | -0.046538 | 0.003074 | -0.016947 | 0.018251 | 0.000145 | 0.010387 | -0.014453 | 0.021609 | ... | -0.029771 | 0.014332 | -0.021917 | 0.022208 | -0.029368 | -0.024924 | -0.007056 | -0.016581 | -0.050438 | -0.256050 |
KitchenAbvGr | 0.076071 | 0.069032 | -0.021462 | 0.241182 | -0.086354 | -0.037779 | -0.017931 | -0.064916 | 0.064987 | 0.027639 | ... | -0.159325 | -0.013116 | -0.135907 | -0.056573 | 0.294569 | -0.038959 | -0.087399 | -0.137614 | -0.142431 | 0.035173 |
LotArea | 0.332460 | 0.031515 | 0.015995 | 0.132801 | 0.194031 | 0.084059 | 0.128349 | 0.026292 | 0.021362 | 0.020974 | ... | 0.100541 | 0.093708 | 0.263843 | 0.054375 | 0.213802 | 0.254138 | 0.158045 | 0.024128 | 0.021612 | -0.024234 |
LotFrontage | 0.458247 | 0.026545 | 0.028289 | 0.234892 | 0.219408 | 0.047431 | 0.113245 | -0.025629 | 0.113714 | 0.011509 | ... | 0.217645 | 0.174119 | 0.351799 | 0.075858 | 0.349513 | 0.354822 | 0.122070 | 0.122811 | 0.091557 | -0.007917 |
LowQualFinSF | -0.012704 | 0.017803 | -0.004545 | 0.070365 | -0.066028 | -0.004923 | -0.047099 | -0.013500 | 0.046913 | 0.087212 | ... | -0.048393 | 0.035177 | -0.025606 | 0.006835 | 0.102059 | -0.023359 | -0.015622 | -0.144191 | -0.060371 | -0.002257 |
MSSubClass | -0.248641 | 0.309309 | -0.037529 | -0.008796 | -0.064311 | -0.072530 | 0.009950 | -0.001878 | -0.125994 | -0.020867 | ... | 0.033638 | -0.003080 | -0.084284 | -0.049181 | 0.040509 | -0.219965 | -0.017654 | 0.034409 | 0.043315 | -0.015028 |
MasVnrArea | 0.395834 | 0.121014 | 0.013612 | 0.078126 | 0.303490 | -0.015645 | 0.141593 | 0.015006 | 0.090163 | -0.111499 | ... | 0.432947 | 0.004512 | 0.477493 | 0.065209 | 0.278228 | 0.397240 | 0.166200 | 0.314051 | 0.196875 | -0.018510 |
MiscVal | 0.093062 | -0.005299 | -0.000788 | 0.000245 | 0.093295 | -0.005139 | -0.004629 | 0.036902 | -0.010492 | 0.008654 | ... | 0.005562 | 0.011921 | -0.021190 | 0.007067 | 0.060903 | 0.084002 | 0.056883 | -0.010886 | -0.003124 | 0.008445 |
MoSold | 0.040143 | 0.014185 | 0.027391 | 0.055997 | -0.000942 | -0.009593 | -0.003568 | 0.023014 | 0.022607 | -0.020976 | ... | 0.030405 | -0.042230 | 0.046432 | 0.028535 | 0.045137 | 0.017888 | 0.018120 | 0.013938 | 0.017693 | -0.153895 |
OpenPorchSF | 0.238502 | 0.185387 | -0.009392 | 0.086212 | 0.124163 | -0.005876 | 0.081265 | -0.034917 | 0.119764 | -0.059672 | ... | 0.298084 | 0.064212 | 0.315856 | 0.047777 | 0.238156 | 0.245521 | 0.038457 | 0.198554 | 0.242182 | -0.037377 |
OverallCond | -0.157418 | 0.005494 | 0.043739 | -0.008477 | -0.050418 | 0.041501 | -0.042133 | 0.084181 | -0.138202 | 0.071044 | ... | -0.093847 | -0.016876 | -0.077856 | 0.043713 | -0.092027 | -0.174002 | 0.020123 | -0.368477 | 0.047654 | 0.030102 |
OverallQual | 0.479152 | 0.245596 | 0.018715 | 0.073075 | 0.281810 | -0.042771 | 0.164543 | -0.040732 | 0.275175 | -0.139256 | ... | 1.000000 | 0.030740 | 0.790982 | 0.042910 | 0.389761 | 0.549294 | 0.255317 | 0.597554 | 0.571532 | -0.019614 |
PoolArea | 0.121900 | 0.044503 | -0.006525 | 0.036544 | 0.084462 | 0.044524 | 0.043970 | 0.066851 | -0.032273 | 0.092534 | ... | 0.030740 | 1.000000 | 0.092404 | 0.026319 | 0.072061 | 0.072216 | 0.094231 | 0.002304 | -0.011407 | -0.052816 |
SalePrice | 0.605852 | 0.319334 | 0.044584 | 0.168213 | 0.386420 | -0.011378 | 0.227122 | -0.016844 | 0.214479 | -0.128578 | ... | 0.790982 | 0.092404 | 1.000000 | 0.111447 | 0.533723 | 0.613581 | 0.324413 | 0.522897 | 0.507101 | -0.028923 |
ScreenPorch | 0.098381 | 0.011070 | -0.029546 | 0.007254 | 0.096823 | 0.063302 | 0.053119 | 0.042069 | -0.049158 | -0.064377 | ... | 0.042910 | 0.026319 | 0.111447 | 1.000000 | 0.032324 | 0.075363 | -0.052134 | -0.041046 | -0.046878 | -0.006634 |
TotRmsAbvGrd | 0.391782 | 0.584586 | -0.025764 | 0.669737 | 0.052141 | -0.048423 | -0.038916 | -0.049610 | 0.247505 | 0.015345 | ... | 0.389761 | 0.072061 | 0.533723 | 0.032324 | 1.000000 | 0.282126 | 0.156543 | 0.114280 | 0.198250 | -0.032354 |
TotalBsmtSF | 0.801670 | -0.205665 | 0.037892 | 0.053393 | 0.536467 | 0.089410 | 0.325623 | 0.012396 | 0.412285 | -0.085510 | ... | 0.549294 | 0.072216 | 0.613581 | 0.075363 | 0.282126 | 1.000000 | 0.229600 | 0.408515 | 0.298107 | -0.011184 |
WoodDeckSF | 0.227347 | 0.089922 | -0.003935 | 0.031644 | 0.223492 | 0.098399 | 0.186107 | 0.051549 | -0.039302 | -0.119114 | ... | 0.255317 | 0.094231 | 0.324413 | -0.052134 | 0.156543 | 0.229600 | 1.000000 | 0.229426 | 0.218513 | -0.000180 |
YearBuilt | 0.310814 | 0.017588 | 0.015958 | -0.053101 | 0.279581 | -0.027595 | 0.211580 | -0.030282 | 0.130473 | -0.374073 | ... | 0.597554 | 0.002304 | 0.522897 | -0.041046 | 0.114280 | 0.408515 | 0.229426 | 1.000000 | 0.612235 | -0.012344 |
YearRemodAdd | 0.242245 | 0.158985 | 0.037433 | -0.021912 | 0.152126 | -0.062153 | 0.134947 | -0.046285 | 0.165175 | -0.220456 | ... | 0.571532 | -0.011407 | 0.507101 | -0.046878 | 0.198250 | 0.298107 | 0.218513 | 0.612235 | 1.000000 | 0.033203 |
YrSold | -0.013442 | -0.019229 | 0.022560 | -0.020453 | 0.022556 | 0.008883 | 0.045255 | -0.020028 | -0.038015 | -0.001134 | ... | -0.019614 | -0.052816 | -0.028923 | -0.006634 | -0.032354 | -0.011184 | -0.000180 | -0.012344 | 0.033203 | 1.000000 |
38 rows × 38 columns
corr = df.corr()
fig, ax = plt.subplots(figsize=(15,15))
sns.heatmap(corr, xticklabels = corr.columns.values, yticklabels = corr.columns.values, ax=ax)
<matplotlib.axes._subplots.AxesSubplot at 0x1fab8c50>
print corr['SalePrice'].sort_values(ascending=False)[:5], '\n'
print corr['SalePrice'].sort_values(ascending=False)[-5:]
SalePrice 1.000000 OverallQual 0.790982 GrLivArea 0.708624 GarageCars 0.640409 GarageArea 0.623431 Name: SalePrice, dtype: float64 YrSold -0.028923 OverallCond -0.077856 MSSubClass -0.084284 EnclosedPorch -0.128578 KitchenAbvGr -0.135907 Name: SalePrice, dtype: float64
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2919 entries, 0 to 1458 Data columns (total 81 columns): 1stFlrSF 2919 non-null int64 2ndFlrSF 2919 non-null int64 3SsnPorch 2919 non-null int64 Alley 198 non-null object BedroomAbvGr 2919 non-null int64 BldgType 2919 non-null object BsmtCond 2837 non-null object BsmtExposure 2837 non-null object BsmtFinSF1 2918 non-null float64 BsmtFinSF2 2918 non-null float64 BsmtFinType1 2840 non-null object BsmtFinType2 2839 non-null object BsmtFullBath 2917 non-null float64 BsmtHalfBath 2917 non-null float64 BsmtQual 2838 non-null object BsmtUnfSF 2918 non-null float64 CentralAir 2919 non-null object Condition1 2919 non-null object Condition2 2919 non-null object Electrical 2918 non-null object EnclosedPorch 2919 non-null int64 ExterCond 2919 non-null object ExterQual 2919 non-null object Exterior1st 2918 non-null object Exterior2nd 2918 non-null object Fence 571 non-null object FireplaceQu 1499 non-null object Fireplaces 2919 non-null int64 Foundation 2919 non-null object FullBath 2919 non-null int64 Functional 2917 non-null object GarageArea 2918 non-null float64 GarageCars 2918 non-null float64 GarageCond 2760 non-null object GarageFinish 2760 non-null object GarageQual 2760 non-null object GarageType 2762 non-null object GarageYrBlt 2760 non-null float64 GrLivArea 2919 non-null int64 HalfBath 2919 non-null int64 Heating 2919 non-null object HeatingQC 2919 non-null object HouseStyle 2919 non-null object Id 2919 non-null int64 KitchenAbvGr 2919 non-null int64 KitchenQual 2918 non-null object LandContour 2919 non-null object LandSlope 2919 non-null object LotArea 2919 non-null int64 LotConfig 2919 non-null object LotFrontage 2433 non-null float64 LotShape 2919 non-null object LowQualFinSF 2919 non-null int64 MSSubClass 2919 non-null int64 MSZoning 2915 non-null object MasVnrArea 2896 non-null float64 MasVnrType 2895 non-null object MiscFeature 105 non-null object MiscVal 2919 non-null int64 MoSold 2919 non-null int64 Neighborhood 2919 non-null object OpenPorchSF 2919 non-null int64 OverallCond 2919 non-null int64 OverallQual 2919 non-null int64 PavedDrive 2919 non-null object PoolArea 2919 non-null int64 PoolQC 10 non-null object RoofMatl 2919 non-null object RoofStyle 2919 non-null object SaleCondition 2919 non-null object SalePrice 1460 non-null float64 SaleType 2918 non-null object ScreenPorch 2919 non-null int64 Street 2919 non-null object TotRmsAbvGrd 2919 non-null int64 TotalBsmtSF 2918 non-null float64 Utilities 2917 non-null object WoodDeckSF 2919 non-null int64 YearBuilt 2919 non-null int64 YearRemodAdd 2919 non-null int64 YrSold 2919 non-null int64 dtypes: float64(12), int64(26), object(43) memory usage: 1.8+ MB
df = df.drop_duplicates()
print len(df) # No Duplicates
2919
for col in df:
print col,np.sort(df[col].unique()), '\n'
# MSSubCLass - No 150 Value
# MSZoning - No 'A', 'I', 'RP'
# Alley - Hard Code: nan = 0, Grvl = 1, Pave = 2
# MasVnrArea - fillNan with 0 & convert to INTs
# BsmtQual, BsmtCond, BsmtExposure, BsmtFinType1, BsmtFinSF1, BsmtFinType2, BsmtFinSF2, BsmtUnfSF - guessing 'nan' equals No Basement
# Electrical - has nulls (not sure how to fill them)
# MiscVal - will have to predict house price and then add on that feature afterwards
1stFlrSF [ 334 372 407 ..., 3820 4692 5095] 2ndFlrSF [ 0 110 125 144 167 180 182 185 192 208 213 218 220 224 228 240 245 252 272 297 299 304 308 312 316 319 320 322 325 328 330 332 336 341 343 348 349 351 356 358 360 363 368 370 371 375 376 378 380 384 390 396 400 403 406 408 412 420 423 424 425 428 430 432 434 436 438 439 441 442 444 445 448 450 453 454 455 456 457 462 464 465 466 467 468 472 473 475 476 477 482 486 488 492 493 494 495 496 498 499 500 501 502 504 505 510 511 512 514 516 517 518 520 521 523 524 525 526 527 528 530 531 532 533 534 536 537 539 540 541 544 545 546 547 548 549 550 551 552 554 556 557 558 560 561 563 564 566 567 568 570 571 573 574 576 580 581 582 583 584 585 586 587 588 589 590 591 592 594 595 596 600 601 602 604 605 606 608 610 611 612 614 615 616 620 622 623 624 625 626 628 629 630 631 634 636 638 639 640 642 644 645 646 648 649 650 651 653 656 658 659 660 661 662 663 664 665 668 670 671 672 673 674 676 677 678 679 680 682 683 684 685 686 687 688 689 690 691 694 695 697 698 700 701 702 703 704 707 708 709 711 712 713 714 715 716 717 718 720 725 726 727 728 729 730 732 734 735 736 738 739 741 742 743 744 745 747 748 750 752 753 754 755 756 757 760 761 762 764 765 766 767 768 769 770 772 775 776 778 779 780 782 783 784 785 786 787 788 790 792 793 794 795 796 797 798 800 804 806 807 808 809 810 811 812 813 814 815 816 817 818 821 823 825 826 827 828 829 830 831 832 833 834 836 838 839 840 841 842 843 844 845 846 848 850 851 854 855 857 858 860 861 862 863 864 866 867 868 869 870 871 872 873 874 875 876 878 880 881 882 883 884 885 886 887 888 890 892 893 895 896 897 898 899 900 901 902 903 904 908 910 912 913 914 915 916 917 918 919 920 923 924 925 926 927 928 929 930 932 933 936 939 940 941 942 943 950 954 955 956 957 959 960 966 967 971 972 973 975 976 977 978 979 980 981 983 984 985 988 989 990 992 994 995 998 1000 1001 1004 1005 1007 1008 1012 1015 1017 1020 1028 1029 1031 1032 1036 1037 1038 1039 1040 1042 1044 1045 1048 1051 1053 1054 1060 1061 1063 1066 1067 1070 1072 1074 1075 1080 1081 1087 1088 1089 1092 1093 1096 1097 1098 1099 1100 1101 1103 1104 1106 1111 1112 1114 1116 1120 1121 1122 1124 1126 1128 1129 1131 1133 1134 1139 1140 1141 1142 1150 1151 1152 1157 1158 1160 1162 1164 1168 1169 1171 1174 1175 1177 1178 1182 1185 1189 1194 1196 1198 1200 1203 1208 1209 1215 1216 1218 1221 1230 1232 1234 1237 1238 1240 1242 1243 1246 1248 1250 1254 1257 1259 1274 1275 1276 1277 1281 1285 1286 1288 1295 1296 1304 1306 1309 1312 1315 1319 1320 1321 1323 1325 1330 1332 1335 1336 1340 1342 1345 1347 1349 1357 1358 1360 1362 1368 1369 1370 1371 1377 1384 1392 1402 1405 1407 1414 1420 1426 1427 1440 1479 1518 1519 1523 1538 1540 1567 1589 1611 1619 1629 1721 1778 1788 1796 1818 1836 1862 1872 2065] 3SsnPorch [ 0 23 86 96 120 130 140 144 150 153 162 168 174 176 180 182 196 216 219 224 225 238 245 255 290 304 320 323 360 407 508] Alley [nan 'Grvl' 'Pave'] BedroomAbvGr [0 1 2 3 4 5 6 8] BldgType ['1Fam' '2fmCon' 'Duplex' 'Twnhs' 'TwnhsE'] BsmtCond [nan 'Fa' 'Gd' 'Po' 'TA'] BsmtExposure [nan 'Av' 'Gd' 'Mn' 'No'] BsmtFinSF1 [ 0.00000000e+00 2.00000000e+00 1.60000000e+01 2.00000000e+01 2.40000000e+01 2.50000000e+01 2.70000000e+01 2.80000000e+01 3.20000000e+01 3.30000000e+01 3.50000000e+01 3.60000000e+01 4.00000000e+01 4.10000000e+01 4.20000000e+01 4.80000000e+01 4.90000000e+01 5.00000000e+01 5.10000000e+01 5.20000000e+01 5.30000000e+01 5.40000000e+01 5.50000000e+01 5.60000000e+01 5.70000000e+01 6.00000000e+01 6.30000000e+01 6.40000000e+01 6.50000000e+01 6.80000000e+01 7.00000000e+01 7.20000000e+01 7.30000000e+01 7.50000000e+01 7.60000000e+01 7.70000000e+01 7.80000000e+01 8.00000000e+01 8.10000000e+01 8.50000000e+01 8.80000000e+01 9.40000000e+01 9.60000000e+01 1.00000000e+02 1.04000000e+02 1.08000000e+02 1.10000000e+02 1.11000000e+02 1.13000000e+02 1.14000000e+02 1.16000000e+02 1.19000000e+02 1.20000000e+02 1.21000000e+02 1.22000000e+02 1.25000000e+02 1.26000000e+02 1.28000000e+02 1.29000000e+02 1.30000000e+02 1.31000000e+02 1.32000000e+02 1.33000000e+02 1.34000000e+02 1.38000000e+02 1.40000000e+02 1.41000000e+02 1.43000000e+02 1.44000000e+02 1.48000000e+02 1.49000000e+02 1.50000000e+02 1.52000000e+02 1.54000000e+02 1.55000000e+02 1.56000000e+02 1.62000000e+02 1.65000000e+02 1.67000000e+02 1.68000000e+02 1.69000000e+02 1.70000000e+02 1.72000000e+02 1.73000000e+02 1.75000000e+02 1.76000000e+02 1.79000000e+02 1.80000000e+02 1.81000000e+02 1.82000000e+02 1.85000000e+02 1.86000000e+02 1.87000000e+02 1.88000000e+02 1.89000000e+02 1.90000000e+02 1.91000000e+02 1.92000000e+02 1.93000000e+02 1.94000000e+02 1.96000000e+02 1.97000000e+02 1.98000000e+02 2.00000000e+02 2.01000000e+02 2.03000000e+02 2.04000000e+02 2.05000000e+02 2.06000000e+02 2.07000000e+02 2.08000000e+02 2.09000000e+02 2.10000000e+02 2.12000000e+02 2.13000000e+02 2.15000000e+02 2.16000000e+02 2.18000000e+02 2.19000000e+02 2.20000000e+02 2.21000000e+02 2.22000000e+02 2.23000000e+02 2.24000000e+02 2.25000000e+02 2.26000000e+02 2.28000000e+02 2.30000000e+02 2.31000000e+02 2.34000000e+02 2.36000000e+02 2.38000000e+02 2.39000000e+02 2.40000000e+02 2.41000000e+02 2.42000000e+02 2.44000000e+02 2.46000000e+02 2.47000000e+02 2.48000000e+02 2.49000000e+02 2.50000000e+02 2.51000000e+02 2.52000000e+02 2.54000000e+02 2.56000000e+02 2.57000000e+02 2.58000000e+02 2.59000000e+02 2.60000000e+02 2.61000000e+02 2.62000000e+02 2.63000000e+02 2.64000000e+02 2.66000000e+02 2.67000000e+02 2.70000000e+02 2.71000000e+02 2.73000000e+02 2.74000000e+02 2.75000000e+02 2.76000000e+02 2.78000000e+02 2.79000000e+02 2.80000000e+02 2.81000000e+02 2.82000000e+02 2.83000000e+02 2.84000000e+02 2.85000000e+02 2.86000000e+02 2.88000000e+02 2.90000000e+02 2.92000000e+02 2.94000000e+02 2.96000000e+02 2.97000000e+02 2.98000000e+02 2.99000000e+02 3.00000000e+02 3.01000000e+02 3.05000000e+02 3.06000000e+02 3.08000000e+02 3.09000000e+02 3.10000000e+02 3.11000000e+02 3.12000000e+02 3.14000000e+02 3.15000000e+02 3.16000000e+02 3.17000000e+02 3.18000000e+02 3.19000000e+02 3.20000000e+02 3.21000000e+02 3.22000000e+02 3.24000000e+02 3.26000000e+02 3.28000000e+02 3.29000000e+02 3.30000000e+02 3.31000000e+02 3.32000000e+02 3.34000000e+02 3.35000000e+02 3.36000000e+02 3.37000000e+02 3.38000000e+02 3.39000000e+02 3.40000000e+02 3.41000000e+02 3.42000000e+02 3.43000000e+02 3.44000000e+02 3.46000000e+02 3.47000000e+02 3.48000000e+02 3.49000000e+02 3.50000000e+02 3.51000000e+02 3.52000000e+02 3.53000000e+02 3.54000000e+02 3.55000000e+02 3.56000000e+02 3.58000000e+02 3.60000000e+02 3.61000000e+02 3.62000000e+02 3.63000000e+02 3.64000000e+02 3.65000000e+02 3.66000000e+02 3.68000000e+02 3.69000000e+02 3.70000000e+02 3.71000000e+02 3.72000000e+02 3.73000000e+02 3.74000000e+02 3.75000000e+02 3.76000000e+02 3.77000000e+02 3.78000000e+02 3.79000000e+02 3.80000000e+02 3.81000000e+02 3.82000000e+02 3.83000000e+02 3.84000000e+02 3.85000000e+02 3.86000000e+02 3.87000000e+02 3.88000000e+02 3.89000000e+02 3.90000000e+02 3.92000000e+02 3.93000000e+02 3.94000000e+02 3.97000000e+02 3.98000000e+02 3.99000000e+02 4.00000000e+02 4.02000000e+02 4.03000000e+02 4.04000000e+02 4.05000000e+02 4.06000000e+02 4.08000000e+02 4.10000000e+02 4.12000000e+02 4.14000000e+02 4.15000000e+02 4.16000000e+02 4.17000000e+02 4.19000000e+02 4.20000000e+02 4.21000000e+02 4.22000000e+02 4.23000000e+02 4.24000000e+02 4.25000000e+02 4.26000000e+02 4.27000000e+02 4.28000000e+02 4.29000000e+02 4.30000000e+02 4.32000000e+02 4.33000000e+02 4.34000000e+02 4.35000000e+02 4.36000000e+02 4.37000000e+02 4.38000000e+02 4.39000000e+02 4.40000000e+02 4.41000000e+02 4.42000000e+02 4.43000000e+02 4.44000000e+02 4.45000000e+02 4.48000000e+02 4.50000000e+02 4.52000000e+02 4.53000000e+02 4.54000000e+02 4.55000000e+02 4.56000000e+02 4.57000000e+02 4.58000000e+02 4.59000000e+02 4.60000000e+02 4.62000000e+02 4.64000000e+02 4.65000000e+02 4.66000000e+02 4.67000000e+02 4.68000000e+02 4.69000000e+02 4.71000000e+02 4.72000000e+02 4.74000000e+02 4.75000000e+02 4.76000000e+02 4.77000000e+02 4.80000000e+02 4.81000000e+02 4.82000000e+02 4.83000000e+02 4.84000000e+02 4.85000000e+02 4.86000000e+02 4.88000000e+02 4.89000000e+02 4.90000000e+02 4.91000000e+02 4.92000000e+02 4.93000000e+02 4.94000000e+02 4.95000000e+02 4.96000000e+02 4.97000000e+02 4.98000000e+02 5.00000000e+02 5.01000000e+02 5.02000000e+02 5.03000000e+02 5.04000000e+02 5.05000000e+02 5.06000000e+02 5.07000000e+02 5.09000000e+02 5.10000000e+02 5.12000000e+02 5.13000000e+02 5.14000000e+02 5.15000000e+02 5.16000000e+02 5.18000000e+02 5.20000000e+02 5.21000000e+02 5.22000000e+02 5.23000000e+02 5.24000000e+02 5.26000000e+02 5.27000000e+02 5.28000000e+02 5.31000000e+02 5.32000000e+02 5.33000000e+02 5.34000000e+02 5.35000000e+02 5.36000000e+02 5.37000000e+02 5.38000000e+02 5.39000000e+02 5.40000000e+02 5.43000000e+02 5.44000000e+02 5.46000000e+02 5.47000000e+02 5.48000000e+02 5.49000000e+02 5.50000000e+02 5.51000000e+02 5.52000000e+02 5.53000000e+02 5.54000000e+02 5.56000000e+02 5.57000000e+02 5.59000000e+02 5.60000000e+02 5.62000000e+02 5.64000000e+02 5.65000000e+02 5.66000000e+02 5.67000000e+02 5.68000000e+02 5.69000000e+02 5.70000000e+02 5.72000000e+02 5.73000000e+02 5.74000000e+02 5.75000000e+02 5.76000000e+02 5.77000000e+02 5.78000000e+02 5.79000000e+02 5.80000000e+02 5.83000000e+02 5.84000000e+02 5.85000000e+02 5.86000000e+02 5.87000000e+02 5.88000000e+02 5.90000000e+02 5.92000000e+02 5.93000000e+02 5.94000000e+02 5.95000000e+02 5.96000000e+02 5.99000000e+02 6.00000000e+02 6.01000000e+02 6.02000000e+02 6.03000000e+02 6.04000000e+02 6.05000000e+02 6.06000000e+02 6.07000000e+02 6.08000000e+02 6.09000000e+02 6.11000000e+02 6.12000000e+02 6.14000000e+02 6.15000000e+02 6.16000000e+02 6.17000000e+02 6.19000000e+02 6.20000000e+02 6.21000000e+02 6.22000000e+02 6.23000000e+02 6.24000000e+02 6.25000000e+02 6.26000000e+02 6.30000000e+02 6.31000000e+02 6.32000000e+02 6.33000000e+02 6.34000000e+02 6.36000000e+02 6.37000000e+02 6.38000000e+02 6.39000000e+02 6.41000000e+02 6.42000000e+02 6.43000000e+02 6.44000000e+02 6.45000000e+02 6.46000000e+02 6.47000000e+02 6.48000000e+02 6.49000000e+02 6.50000000e+02 6.51000000e+02 6.52000000e+02 6.54000000e+02 6.55000000e+02 6.56000000e+02 6.58000000e+02 6.59000000e+02 6.60000000e+02 6.61000000e+02 6.62000000e+02 6.63000000e+02 6.64000000e+02 6.66000000e+02 6.68000000e+02 6.69000000e+02 6.70000000e+02 6.71000000e+02 6.72000000e+02 6.73000000e+02 6.74000000e+02 6.78000000e+02 6.79000000e+02 6.80000000e+02 6.81000000e+02 6.82000000e+02 6.83000000e+02 6.84000000e+02 6.85000000e+02 6.86000000e+02 6.87000000e+02 6.88000000e+02 6.89000000e+02 6.90000000e+02 6.91000000e+02 6.92000000e+02 6.94000000e+02 6.95000000e+02 6.96000000e+02 6.97000000e+02 6.98000000e+02 6.99000000e+02 7.00000000e+02 7.01000000e+02 7.02000000e+02 7.04000000e+02 7.05000000e+02 7.06000000e+02 7.08000000e+02 7.09000000e+02 7.10000000e+02 7.12000000e+02 7.13000000e+02 7.14000000e+02 7.16000000e+02 7.17000000e+02 7.18000000e+02 7.19000000e+02 7.20000000e+02 7.22000000e+02 7.24000000e+02 7.25000000e+02 7.26000000e+02 7.27000000e+02 7.28000000e+02 7.29000000e+02 7.31000000e+02 7.32000000e+02 7.33000000e+02 7.34000000e+02 7.35000000e+02 7.36000000e+02 7.37000000e+02 7.38000000e+02 7.39000000e+02 7.40000000e+02 7.41000000e+02 7.42000000e+02 7.44000000e+02 7.45000000e+02 7.46000000e+02 7.47000000e+02 7.48000000e+02 7.49000000e+02 7.50000000e+02 7.51000000e+02 7.55000000e+02 7.56000000e+02 7.58000000e+02 7.59000000e+02 7.60000000e+02 7.62000000e+02 7.63000000e+02 7.64000000e+02 7.65000000e+02 7.66000000e+02 7.67000000e+02 7.69000000e+02 7.70000000e+02 7.71000000e+02 7.72000000e+02 7.73000000e+02 7.74000000e+02 7.75000000e+02 7.76000000e+02 7.77000000e+02 7.78000000e+02 7.79000000e+02 7.80000000e+02 7.81000000e+02 7.82000000e+02 7.83000000e+02 7.84000000e+02 7.85000000e+02 7.86000000e+02 7.87000000e+02 7.88000000e+02 7.89000000e+02 7.90000000e+02 7.91000000e+02 7.92000000e+02 7.93000000e+02 7.94000000e+02 7.95000000e+02 7.96000000e+02 7.97000000e+02 7.99000000e+02 8.00000000e+02 8.03000000e+02 8.04000000e+02 8.06000000e+02 8.08000000e+02 8.09000000e+02 8.10000000e+02 8.11000000e+02 8.12000000e+02 8.13000000e+02 8.14000000e+02 8.16000000e+02 8.19000000e+02 8.20000000e+02 8.21000000e+02 8.22000000e+02 8.24000000e+02 8.26000000e+02 8.27000000e+02 8.28000000e+02 8.30000000e+02 8.31000000e+02 8.32000000e+02 8.33000000e+02 8.36000000e+02 8.37000000e+02 8.38000000e+02 8.40000000e+02 8.41000000e+02 8.42000000e+02 8.44000000e+02 8.46000000e+02 8.47000000e+02 8.48000000e+02 8.50000000e+02 8.51000000e+02 8.52000000e+02 8.53000000e+02 8.54000000e+02 8.56000000e+02 8.59000000e+02 8.60000000e+02 8.62000000e+02 8.63000000e+02 8.64000000e+02 8.65000000e+02 8.66000000e+02 8.67000000e+02 8.70000000e+02 8.71000000e+02 8.72000000e+02 8.73000000e+02 8.74000000e+02 8.76000000e+02 8.80000000e+02 8.81000000e+02 8.83000000e+02 8.85000000e+02 8.88000000e+02 8.90000000e+02 8.92000000e+02 8.93000000e+02 8.94000000e+02 8.95000000e+02 8.96000000e+02 8.97000000e+02 8.99000000e+02 9.00000000e+02 9.01000000e+02 9.02000000e+02 9.03000000e+02 9.04000000e+02 9.05000000e+02 9.06000000e+02 9.08000000e+02 9.09000000e+02 9.10000000e+02 9.12000000e+02 9.13000000e+02 9.14000000e+02 9.15000000e+02 9.16000000e+02 9.18000000e+02 9.19000000e+02 9.20000000e+02 9.22000000e+02 9.23000000e+02 9.24000000e+02 9.25000000e+02 9.26000000e+02 9.29000000e+02 9.30000000e+02 9.31000000e+02 9.32000000e+02 9.35000000e+02 9.36000000e+02 9.37000000e+02 9.38000000e+02 9.39000000e+02 9.41000000e+02 9.43000000e+02 9.44000000e+02 9.45000000e+02 9.46000000e+02 9.49000000e+02 9.50000000e+02 9.51000000e+02 9.52000000e+02 9.53000000e+02 9.54000000e+02 9.55000000e+02 9.56000000e+02 9.58000000e+02 9.60000000e+02 9.62000000e+02 9.64000000e+02 9.65000000e+02 9.67000000e+02 9.68000000e+02 9.69000000e+02 9.70000000e+02 9.73000000e+02 9.75000000e+02 9.76000000e+02 9.78000000e+02 9.80000000e+02 9.82000000e+02 9.83000000e+02 9.84000000e+02 9.85000000e+02 9.86000000e+02 9.87000000e+02 9.88000000e+02 9.90000000e+02 9.91000000e+02 9.94000000e+02 9.96000000e+02 9.98000000e+02 1.00000000e+03 1.00100000e+03 1.00200000e+03 1.00300000e+03 1.00400000e+03 1.00500000e+03 1.01000000e+03 1.01100000e+03 1.01200000e+03 1.01300000e+03 1.01400000e+03 1.01500000e+03 1.01600000e+03 1.01800000e+03 1.02100000e+03 1.02200000e+03 1.02300000e+03 1.02400000e+03 1.02600000e+03 1.02700000e+03 1.02900000e+03 1.03000000e+03 1.03200000e+03 1.03300000e+03 1.03400000e+03 1.03500000e+03 1.03600000e+03 1.03700000e+03 1.03800000e+03 1.03900000e+03 1.04000000e+03 1.04400000e+03 1.04600000e+03 1.04700000e+03 1.04800000e+03 1.05100000e+03 1.05300000e+03 1.05600000e+03 1.05900000e+03 1.06400000e+03 1.06500000e+03 1.07000000e+03 1.07100000e+03 1.07300000e+03 1.07400000e+03 1.07500000e+03 1.07800000e+03 1.07900000e+03 1.08000000e+03 1.08200000e+03 1.08400000e+03 1.08500000e+03 1.08600000e+03 1.08700000e+03 1.08800000e+03 1.09000000e+03 1.09200000e+03 1.09400000e+03 1.09600000e+03 1.09700000e+03 1.09800000e+03 1.10100000e+03 1.10400000e+03 1.10600000e+03 1.11000000e+03 1.11100000e+03 1.11200000e+03 1.11500000e+03 1.11600000e+03 1.11800000e+03 1.12100000e+03 1.12200000e+03 1.12400000e+03 1.12600000e+03 1.12700000e+03 1.12800000e+03 1.12900000e+03 1.13600000e+03 1.13700000e+03 1.13800000e+03 1.14100000e+03 1.14200000e+03 1.14800000e+03 1.14900000e+03 1.15000000e+03 1.15100000e+03 1.15200000e+03 1.15300000e+03 1.15400000e+03 1.15700000e+03 1.15800000e+03 1.15900000e+03 1.16200000e+03 1.16300000e+03 1.16500000e+03 1.17000000e+03 1.17200000e+03 1.17300000e+03 1.17800000e+03 1.18000000e+03 1.18100000e+03 1.18200000e+03 1.18600000e+03 1.18800000e+03 1.19100000e+03 1.19400000e+03 1.19600000e+03 1.19800000e+03 1.20000000e+03 1.20100000e+03 1.20400000e+03 1.20600000e+03 1.21300000e+03 1.21600000e+03 1.21800000e+03 1.21900000e+03 1.22000000e+03 1.22300000e+03 1.22400000e+03 1.22500000e+03 1.23000000e+03 1.23100000e+03 1.23200000e+03 1.23400000e+03 1.23600000e+03 1.23700000e+03 1.23800000e+03 1.23900000e+03 1.24300000e+03 1.24600000e+03 1.24700000e+03 1.24900000e+03 1.25200000e+03 1.25800000e+03 1.25900000e+03 1.26000000e+03 1.26100000e+03 1.26200000e+03 1.27000000e+03 1.27100000e+03 1.27400000e+03 1.27700000e+03 1.28000000e+03 1.28200000e+03 1.28300000e+03 1.28500000e+03 1.28800000e+03 1.29000000e+03 1.29400000e+03 1.29800000e+03 1.30000000e+03 1.30200000e+03 1.30400000e+03 1.30800000e+03 1.30900000e+03 1.31200000e+03 1.31400000e+03 1.31900000e+03 1.32000000e+03 1.32400000e+03 1.32800000e+03 1.32900000e+03 1.33000000e+03 1.33200000e+03 1.33300000e+03 1.33400000e+03 1.33600000e+03 1.33700000e+03 1.33800000e+03 1.34100000e+03 1.34600000e+03 1.35000000e+03 1.35100000e+03 1.35800000e+03 1.35900000e+03 1.36000000e+03 1.36100000e+03 1.36900000e+03 1.37300000e+03 1.37500000e+03 1.38300000e+03 1.38600000e+03 1.38700000e+03 1.39000000e+03 1.39200000e+03 1.39800000e+03 1.40000000e+03 1.40600000e+03 1.41000000e+03 1.41200000e+03 1.41400000e+03 1.41600000e+03 1.42000000e+03 1.42200000e+03 1.43000000e+03 1.43300000e+03 1.43600000e+03 1.44000000e+03 1.44100000e+03 1.44300000e+03 1.44500000e+03 1.44700000e+03 1.45500000e+03 1.45600000e+03 1.46000000e+03 1.46400000e+03 1.47000000e+03 1.47400000e+03 1.47600000e+03 1.47800000e+03 1.48000000e+03 1.50000000e+03 1.50500000e+03 1.51300000e+03 1.51800000e+03 1.53100000e+03 1.53800000e+03 1.54000000e+03 1.55700000e+03 1.55800000e+03 1.56200000e+03 1.56400000e+03 1.56700000e+03 1.57100000e+03 1.57200000e+03 1.57300000e+03 1.57600000e+03 1.59300000e+03 1.60600000e+03 1.61900000e+03 1.63200000e+03 1.63600000e+03 1.64000000e+03 1.64600000e+03 1.66000000e+03 1.68200000e+03 1.68400000e+03 1.69600000e+03 1.72100000e+03 1.72800000e+03 1.73200000e+03 1.73300000e+03 1.75800000e+03 1.76700000e+03 1.81000000e+03 1.81200000e+03 1.83600000e+03 1.88000000e+03 1.90400000e+03 1.96500000e+03 1.97200000e+03 2.08500000e+03 2.09600000e+03 2.14600000e+03 2.15800000e+03 2.18800000e+03 2.25700000e+03 2.26000000e+03 2.28800000e+03 4.01000000e+03 5.64400000e+03 nan] BsmtFinSF2 [ 0. 6. 12. 28. 32. 35. 38. 40. 41. 42. 46. 48. 52. 60. 63. 64. 66. 68. 72. 76. 78. 80. 81. 92. 93. 95. 96. 102. 105. 106. 108. 110. 113. 116. 117. 119. 120. 121. 123. 125. 127. 128. 132. 136. 138. 139. 144. 147. 149. 150. 153. 154. 156. 159. 162. 163. 165. 167. 168. 169. 173. 174. 175. 177. 180. 181. 182. 184. 186. 193. 196. 201. 202. 206. 208. 210. 211. 215. 216. 219. 227. 228. 230. 232. 239. 240. 243. 247. 250. 252. 258. 259. 262. 263. 264. 270. 273. 276. 278. 279. 281. 284. 286. 287. 288. 290. 294. 297. 306. 308. 311. 319. 321. 324. 334. 336. 337. 344. 345. 350. 351. 352. 354. 360. 362. 364. 373. 374. 375. 377. 380. 382. 387. 391. 393. 396. 398. 400. 402. 404. 411. 417. 419. 420. 432. 435. 438. 441. 442. 448. 449. 450. 453. 456. 465. 466. 468. 469. 472. 474. 479. 480. 483. 486. 488. 491. 492. 495. 497. 499. 500. 506. 507. 512. 522. 529. 530. 531. 532. 539. 543. 544. 546. 547. 551. 555. 557. 580. 590. 596. 600. 604. 606. 608. 612. 613. 619. 620. 622. 624. 627. 630. 634. 645. 661. 668. 670. 679. 682. 684. 688. 690. 691. 692. 694. 712. 713. 722. 723. 748. 750. 755. 761. 764. 768. 774. 791. 799. 811. 820. 823. 826. 829. 831. 841. 842. 850. 852. 859. 869. 872. 873. 875. 884. 891. 904. 906. 912. 915. 955. 956. 972. 981. 982. 1020. 1029. 1031. 1037. 1039. 1057. 1061. 1063. 1073. 1080. 1083. 1085. 1120. 1127. 1164. 1393. 1474. 1526. nan] BsmtFinType1 [nan 'ALQ' 'BLQ' 'GLQ' 'LwQ' 'Rec' 'Unf'] BsmtFinType2 [nan 'ALQ' 'BLQ' 'GLQ' 'LwQ' 'Rec' 'Unf'] BsmtFullBath [ 0. 1. 2. 3. nan] BsmtHalfBath [ 0. 1. 2. nan] BsmtQual [nan 'Ex' 'Fa' 'Gd' 'TA'] BsmtUnfSF [ 0. 14. 15. ..., 2153. 2336. nan] CentralAir ['N' 'Y'] Condition1 ['Artery' 'Feedr' 'Norm' 'PosA' 'PosN' 'RRAe' 'RRAn' 'RRNe' 'RRNn'] Condition2 ['Artery' 'Feedr' 'Norm' 'PosA' 'PosN' 'RRAe' 'RRAn' 'RRNn'] Electrical [nan 'FuseA' 'FuseF' 'FuseP' 'Mix' 'SBrkr'] EnclosedPorch [ 0 16 18 19 20 23 24 25 26 28 30 32 34 35 36 37 39 40 41 42 43 44 45 48 50 51 52 54 55 56 57 60 64 66 67 68 70 72 75 77 78 80 81 84 86 87 88 90 91 92 94 96 98 99 100 101 102 104 105 108 109 112 113 114 115 116 117 120 121 122 123 126 128 129 130 132 133 134 135 136 137 138 139 140 143 144 145 148 150 154 156 158 160 161 162 164 165 167 168 169 170 172 174 175 176 177 180 183 184 185 186 189 190 192 194 196 198 200 202 203 204 205 207 208 209 210 211 212 213 214 216 218 219 220 221 222 224 225 226 228 230 231 234 236 238 239 240 242 244 246 248 249 252 254 256 259 260 264 265 268 272 275 280 286 288 290 291 293 294 296 301 318 324 330 334 364 368 386 429 432 552 584 1012] ExterCond ['Ex' 'Fa' 'Gd' 'Po' 'TA'] ExterQual ['Ex' 'Fa' 'Gd' 'TA'] Exterior1st [nan 'AsbShng' 'AsphShn' 'BrkComm' 'BrkFace' 'CBlock' 'CemntBd' 'HdBoard' 'ImStucc' 'MetalSd' 'Plywood' 'Stone' 'Stucco' 'VinylSd' 'Wd Sdng' 'WdShing'] Exterior2nd [nan 'AsbShng' 'AsphShn' 'Brk Cmn' 'BrkFace' 'CBlock' 'CmentBd' 'HdBoard' 'ImStucc' 'MetalSd' 'Other' 'Plywood' 'Stone' 'Stucco' 'VinylSd' 'Wd Sdng' 'Wd Shng'] Fence [nan 'GdPrv' 'GdWo' 'MnPrv' 'MnWw'] FireplaceQu [nan 'Ex' 'Fa' 'Gd' 'Po' 'TA'] Fireplaces [0 1 2 3 4] Foundation ['BrkTil' 'CBlock' 'PConc' 'Slab' 'Stone' 'Wood'] FullBath [0 1 2 3 4] Functional [nan 'Maj1' 'Maj2' 'Min1' 'Min2' 'Mod' 'Sev' 'Typ'] GarageArea [ 0. 100. 160. 162. 164. 180. 184. 185. 186. 189. 192. 195. 198. 200. 205. 207. 208. 209. 210. 213. 215. 216. 217. 220. 224. 225. 226. 228. 230. 231. 234. 240. 242. 244. 246. 248. 249. 250. 252. 253. 254. 255. 256. 257. 258. 260. 261. 263. 264. 265. 266. 267. 270. 271. 272. 273. 275. 276. 280. 281. 282. 283. 284. 286. 287. 288. 290. 292. 293. 294. 295. 296. 297. 299. 300. 301. 303. 304. 305. 306. 307. 308. 309. 310. 311. 312. 313. 315. 316. 317. 318. 319. 320. 322. 323. 324. 325. 326. 327. 328. 330. 331. 332. 336. 338. 340. 342. 343. 344. 345. 349. 350. 351. 352. 353. 354. 355. 356. 357. 358. 360. 363. 364. 365. 366. 367. 368. 369. 370. 371. 372. 373. 374. 375. 377. 378. 379. 380. 384. 386. 388. 389. 390. 392. 393. 394. 396. 397. 398. 399. 400. 401. 402. 403. 404. 405. 406. 408. 409. 410. 412. 413. 414. 416. 418. 420. 422. 423. 424. 425. 426. 427. 428. 429. 430. 431. 432. 433. 434. 435. 436. 437. 438. 439. 440. 441. 442. 443. 444. 445. 447. 449. 450. 451. 452. 453. 454. 455. 456. 457. 458. 459. 460. 461. 462. 463. 464. 465. 466. 467. 468. 469. 470. 471. 472. 473. 474. 475. 476. 477. 478. 479. 480. 481. 482. 483. 484. 485. 486. 487. 488. 489. 490. 492. 493. 494. 495. 496. 497. 498. 499. 500. 501. 502. 504. 505. 506. 508. 509. 510. 511. 512. 513. 514. 515. 516. 517. 518. 520. 521. 522. 523. 524. 525. 526. 527. 528. 529. 530. 531. 532. 533. 534. 535. 538. 539. 540. 541. 542. 543. 544. 545. 546. 548. 549. 550. 551. 552. 554. 555. 556. 557. 558. 559. 560. 561. 562. 564. 565. 566. 567. 568. 569. 570. 571. 572. 573. 574. 575. 576. 577. 578. 579. 580. 581. 582. 583. 584. 585. 586. 588. 590. 591. 592. 594. 595. 596. 597. 598. 599. 600. 601. 602. 603. 604. 605. 606. 608. 609. 610. 611. 612. 613. 614. 615. 616. 617. 618. 619. 620. 621. 622. 623. 624. 625. 626. 627. 628. 630. 631. 632. 636. 638. 639. 640. 641. 642. 644. 645. 646. 647. 648. 649. 650. 656. 658. 660. 662. 663. 665. 666. 667. 668. 670. 671. 672. 673. 674. 675. 676. 678. 680. 682. 683. 684. 685. 686. 687. 688. 689. 690. 691. 692. 693. 694. 696. 698. 700. 701. 702. 704. 706. 708. 711. 712. 713. 714. 715. 716. 719. 720. 721. 722. 724. 725. 726. 728. 729. 730. 732. 736. 738. 739. 740. 741. 744. 746. 747. 748. 749. 750. 751. 752. 753. 754. 756. 757. 758. 760. 762. 765. 766. 768. 770. 772. 773. 774. 776. 779. 780. 782. 783. 784. 786. 787. 788. 789. 791. 792. 795. 796. 800. 803. 804. 806. 807. 808. 810. 811. 812. 813. 814. 815. 816. 818. 820. 824. 825. 826. 828. 830. 831. 832. 833. 834. 836. 839. 840. 841. 842. 843. 844. 845. 846. 848. 850. 851. 852. 853. 856. 857. 858. 859. 860. 862. 864. 865. 866. 868. 869. 870. 871. 872. 874. 876. 878. 880. 884. 885. 886. 888. 889. 890. 892. 894. 895. 896. 898. 899. 900. 902. 904. 905. 907. 908. 912. 916. 920. 923. 924. 925. 927. 928. 932. 933. 936. 938. 944. 947. 949. 954. 958. 959. 962. 963. 968. 972. 983. 984. 995. 1003. 1008. 1014. 1017. 1020. 1025. 1040. 1041. 1043. 1052. 1053. 1069. 1085. 1092. 1105. 1110. 1134. 1138. 1150. 1154. 1166. 1174. 1184. 1200. 1220. 1231. 1248. 1314. 1348. 1356. 1390. 1418. 1488. nan] GarageCars [ 0. 1. 2. 3. 4. 5. nan] GarageCond [nan 'Ex' 'Fa' 'Gd' 'Po' 'TA'] GarageFinish [nan 'Fin' 'RFn' 'Unf'] GarageQual [nan 'Ex' 'Fa' 'Gd' 'Po' 'TA'] GarageType [nan '2Types' 'Attchd' 'Basment' 'BuiltIn' 'CarPort' 'Detchd'] GarageYrBlt [ 1895. 1896. 1900. 1906. 1908. 1910. 1914. 1915. 1916. 1917. 1918. 1919. 1920. 1921. 1922. 1923. 1924. 1925. 1926. 1927. 1928. 1929. 1930. 1931. 1932. 1933. 1934. 1935. 1936. 1937. 1938. 1939. 1940. 1941. 1942. 1943. 1945. 1946. 1947. 1948. 1949. 1950. 1951. 1952. 1953. 1954. 1955. 1956. 1957. 1958. 1959. 1960. 1961. 1962. 1963. 1964. 1965. 1966. 1967. 1968. 1969. 1970. 1971. 1972. 1973. 1974. 1975. 1976. 1977. 1978. 1979. 1980. 1981. 1982. 1983. 1984. 1985. 1986. 1987. 1988. 1989. 1990. 1991. 1992. 1993. 1994. 1995. 1996. 1997. 1998. 1999. 2000. 2001. 2002. 2003. 2004. 2005. 2006. 2007. 2008. 2009. 2010. 2207. nan] GrLivArea [ 334 407 438 ..., 4676 5095 5642] HalfBath [0 1 2] Heating ['Floor' 'GasA' 'GasW' 'Grav' 'OthW' 'Wall'] HeatingQC ['Ex' 'Fa' 'Gd' 'Po' 'TA'] HouseStyle ['1.5Fin' '1.5Unf' '1Story' '2.5Fin' '2.5Unf' '2Story' 'SFoyer' 'SLvl'] Id [ 1 2 3 ..., 2917 2918 2919] KitchenAbvGr [0 1 2 3] KitchenQual [nan 'Ex' 'Fa' 'Gd' 'TA'] LandContour ['Bnk' 'HLS' 'Low' 'Lvl'] LandSlope ['Gtl' 'Mod' 'Sev'] LotArea [ 1300 1470 1476 ..., 159000 164660 215245] LotConfig ['Corner' 'CulDSac' 'FR2' 'FR3' 'Inside'] LotFrontage [ 21. 22. 24. 25. 26. 28. 30. 31. 32. 33. 34. 35. 36. 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50. 51. 52. 53. 54. 55. 56. 57. 58. 59. 60. 61. 62. 63. 64. 65. 66. 67. 68. 69. 70. 71. 72. 73. 74. 75. 76. 77. 78. 79. 80. 81. 82. 83. 84. 85. 86. 87. 88. 89. 90. 91. 92. 93. 94. 95. 96. 97. 98. 99. 100. 101. 102. 103. 104. 105. 106. 107. 108. 109. 110. 111. 112. 113. 114. 115. 116. 117. 118. 119. 120. 121. 122. 123. 124. 125. 126. 128. 129. 130. 131. 133. 134. 135. 136. 137. 138. 140. 141. 144. 149. 150. 152. 153. 155. 160. 168. 174. 182. 195. 200. 313. nan] LotShape ['IR1' 'IR2' 'IR3' 'Reg'] LowQualFinSF [ 0 53 80 108 114 120 140 144 156 205 232 234 259 312 360 362 371 384 390 392 397 420 431 436 450 473 479 481 512 513 514 515 528 572 697 1064] MSSubClass [ 20 30 40 45 50 60 70 75 80 85 90 120 150 160 180 190] MSZoning [nan 'C (all)' 'FV' 'RH' 'RL' 'RM'] MasVnrArea [ 0.00000000e+00 1.00000000e+00 3.00000000e+00 1.10000000e+01 1.40000000e+01 1.60000000e+01 1.80000000e+01 2.00000000e+01 2.20000000e+01 2.30000000e+01 2.40000000e+01 2.70000000e+01 2.80000000e+01 3.00000000e+01 3.10000000e+01 3.20000000e+01 3.40000000e+01 3.60000000e+01 3.80000000e+01 3.90000000e+01 4.00000000e+01 4.10000000e+01 4.20000000e+01 4.40000000e+01 4.50000000e+01 4.60000000e+01 4.70000000e+01 4.80000000e+01 5.00000000e+01 5.10000000e+01 5.20000000e+01 5.30000000e+01 5.40000000e+01 5.60000000e+01 5.70000000e+01 5.80000000e+01 6.00000000e+01 6.20000000e+01 6.30000000e+01 6.40000000e+01 6.50000000e+01 6.60000000e+01 6.70000000e+01 6.80000000e+01 6.90000000e+01 7.00000000e+01 7.20000000e+01 7.40000000e+01 7.50000000e+01 7.60000000e+01 8.00000000e+01 8.10000000e+01 8.20000000e+01 8.40000000e+01 8.50000000e+01 8.60000000e+01 8.70000000e+01 8.80000000e+01 8.90000000e+01 9.00000000e+01 9.10000000e+01 9.20000000e+01 9.40000000e+01 9.50000000e+01 9.60000000e+01 9.70000000e+01 9.80000000e+01 9.90000000e+01 1.00000000e+02 1.01000000e+02 1.02000000e+02 1.04000000e+02 1.05000000e+02 1.06000000e+02 1.08000000e+02 1.09000000e+02 1.10000000e+02 1.12000000e+02 1.13000000e+02 1.14000000e+02 1.15000000e+02 1.16000000e+02 1.17000000e+02 1.18000000e+02 1.19000000e+02 1.20000000e+02 1.21000000e+02 1.22000000e+02 1.23000000e+02 1.24000000e+02 1.25000000e+02 1.26000000e+02 1.27000000e+02 1.28000000e+02 1.30000000e+02 1.32000000e+02 1.34000000e+02 1.35000000e+02 1.36000000e+02 1.37000000e+02 1.38000000e+02 1.40000000e+02 1.41000000e+02 1.42000000e+02 1.43000000e+02 1.44000000e+02 1.45000000e+02 1.46000000e+02 1.47000000e+02 1.48000000e+02 1.49000000e+02 1.50000000e+02 1.51000000e+02 1.53000000e+02 1.54000000e+02 1.56000000e+02 1.57000000e+02 1.58000000e+02 1.60000000e+02 1.61000000e+02 1.62000000e+02 1.63000000e+02 1.64000000e+02 1.65000000e+02 1.66000000e+02 1.67000000e+02 1.68000000e+02 1.69000000e+02 1.70000000e+02 1.71000000e+02 1.72000000e+02 1.74000000e+02 1.75000000e+02 1.76000000e+02 1.77000000e+02 1.78000000e+02 1.79000000e+02 1.80000000e+02 1.82000000e+02 1.83000000e+02 1.84000000e+02 1.86000000e+02 1.87000000e+02 1.88000000e+02 1.89000000e+02 1.90000000e+02 1.92000000e+02 1.94000000e+02 1.96000000e+02 1.97000000e+02 1.98000000e+02 1.99000000e+02 2.00000000e+02 2.02000000e+02 2.03000000e+02 2.04000000e+02 2.05000000e+02 2.06000000e+02 2.07000000e+02 2.08000000e+02 2.09000000e+02 2.10000000e+02 2.12000000e+02 2.14000000e+02 2.15000000e+02 2.16000000e+02 2.17000000e+02 2.18000000e+02 2.19000000e+02 2.20000000e+02 2.21000000e+02 2.22000000e+02 2.23000000e+02 2.24000000e+02 2.25000000e+02 2.26000000e+02 2.27000000e+02 2.28000000e+02 2.29000000e+02 2.30000000e+02 2.32000000e+02 2.33000000e+02 2.34000000e+02 2.35000000e+02 2.36000000e+02 2.37000000e+02 2.38000000e+02 2.40000000e+02 2.42000000e+02 2.43000000e+02 2.44000000e+02 2.45000000e+02 2.46000000e+02 2.47000000e+02 2.48000000e+02 2.50000000e+02 2.51000000e+02 2.52000000e+02 2.53000000e+02 2.54000000e+02 2.55000000e+02 2.56000000e+02 2.57000000e+02 2.58000000e+02 2.59000000e+02 2.60000000e+02 2.61000000e+02 2.62000000e+02 2.63000000e+02 2.64000000e+02 2.65000000e+02 2.66000000e+02 2.68000000e+02 2.70000000e+02 2.72000000e+02 2.74000000e+02 2.75000000e+02 2.76000000e+02 2.78000000e+02 2.79000000e+02 2.80000000e+02 2.81000000e+02 2.83000000e+02 2.84000000e+02 2.85000000e+02 2.86000000e+02 2.87000000e+02 2.88000000e+02 2.89000000e+02 2.90000000e+02 2.91000000e+02 2.92000000e+02 2.93000000e+02 2.94000000e+02 2.95000000e+02 2.96000000e+02 2.97000000e+02 2.98000000e+02 2.99000000e+02 3.00000000e+02 3.02000000e+02 3.04000000e+02 3.05000000e+02 3.06000000e+02 3.08000000e+02 3.09000000e+02 3.10000000e+02 3.12000000e+02 3.15000000e+02 3.18000000e+02 3.20000000e+02 3.22000000e+02 3.23000000e+02 3.24000000e+02 3.27000000e+02 3.28000000e+02 3.32000000e+02 3.33000000e+02 3.35000000e+02 3.36000000e+02 3.37000000e+02 3.38000000e+02 3.40000000e+02 3.42000000e+02 3.44000000e+02 3.48000000e+02 3.50000000e+02 3.51000000e+02 3.52000000e+02 3.53000000e+02 3.55000000e+02 3.56000000e+02 3.59000000e+02 3.60000000e+02 3.61000000e+02 3.62000000e+02 3.64000000e+02 3.65000000e+02 3.66000000e+02 3.68000000e+02 3.70000000e+02 3.71000000e+02 3.72000000e+02 3.75000000e+02 3.76000000e+02 3.78000000e+02 3.79000000e+02 3.80000000e+02 3.81000000e+02 3.82000000e+02 3.83000000e+02 3.85000000e+02 3.87000000e+02 3.88000000e+02 3.91000000e+02 3.94000000e+02 3.96000000e+02 3.97000000e+02 3.99000000e+02 4.00000000e+02 4.02000000e+02 4.05000000e+02 4.06000000e+02 4.08000000e+02 4.10000000e+02 4.12000000e+02 4.15000000e+02 4.18000000e+02 4.20000000e+02 4.22000000e+02 4.23000000e+02 4.24000000e+02 4.25000000e+02 4.26000000e+02 4.28000000e+02 4.30000000e+02 4.32000000e+02 4.34000000e+02 4.35000000e+02 4.36000000e+02 4.38000000e+02 4.40000000e+02 4.42000000e+02 4.43000000e+02 4.44000000e+02 4.48000000e+02 4.50000000e+02 4.51000000e+02 4.52000000e+02 4.56000000e+02 4.59000000e+02 4.64000000e+02 4.66000000e+02 4.68000000e+02 4.70000000e+02 4.72000000e+02 4.73000000e+02 4.79000000e+02 4.80000000e+02 4.81000000e+02 4.91000000e+02 4.92000000e+02 4.95000000e+02 5.00000000e+02 5.01000000e+02 5.02000000e+02 5.04000000e+02 5.06000000e+02 5.09000000e+02 5.10000000e+02 5.13000000e+02 5.14000000e+02 5.15000000e+02 5.18000000e+02 5.19000000e+02 5.22000000e+02 5.25000000e+02 5.26000000e+02 5.28000000e+02 5.30000000e+02 5.32000000e+02 5.41000000e+02 5.49000000e+02 5.50000000e+02 5.54000000e+02 5.62000000e+02 5.64000000e+02 5.67000000e+02 5.68000000e+02 5.71000000e+02 5.72000000e+02 5.73000000e+02 5.76000000e+02 5.79000000e+02 5.84000000e+02 5.94000000e+02 6.00000000e+02 6.03000000e+02 6.04000000e+02 6.15000000e+02 6.16000000e+02 6.21000000e+02 6.30000000e+02 6.32000000e+02 6.34000000e+02 6.40000000e+02 6.47000000e+02 6.50000000e+02 6.51000000e+02 6.52000000e+02 6.53000000e+02 6.57000000e+02 6.60000000e+02 6.62000000e+02 6.64000000e+02 6.68000000e+02 6.73000000e+02 6.74000000e+02 6.80000000e+02 6.92000000e+02 7.05000000e+02 7.10000000e+02 7.14000000e+02 7.24000000e+02 7.26000000e+02 7.30000000e+02 7.31000000e+02 7.34000000e+02 7.38000000e+02 7.48000000e+02 7.54000000e+02 7.60000000e+02 7.62000000e+02 7.66000000e+02 7.68000000e+02 7.71000000e+02 7.72000000e+02 7.88000000e+02 7.96000000e+02 8.16000000e+02 8.60000000e+02 8.70000000e+02 8.77000000e+02 8.86000000e+02 8.94000000e+02 9.02000000e+02 9.21000000e+02 9.22000000e+02 9.45000000e+02 9.70000000e+02 9.75000000e+02 1.03100000e+03 1.04700000e+03 1.05000000e+03 1.09500000e+03 1.11000000e+03 1.11500000e+03 1.12900000e+03 1.15900000e+03 1.17000000e+03 1.22400000e+03 1.29000000e+03 1.37800000e+03 1.60000000e+03 nan] MasVnrType [nan 'BrkCmn' 'BrkFace' 'None' 'Stone'] MiscFeature [nan 'Gar2' 'Othr' 'Shed' 'TenC'] MiscVal [ 0 54 80 300 350 400 420 450 455 460 480 490 500 560 600 620 650 700 750 800 900 1000 1150 1200 1300 1400 1500 1512 2000 2500 3000 3500 4500 6500 8300 12500 15500 17000] MoSold [ 1 2 3 4 5 6 7 8 9 10 11 12] Neighborhood ['Blmngtn' 'Blueste' 'BrDale' 'BrkSide' 'ClearCr' 'CollgCr' 'Crawfor' 'Edwards' 'Gilbert' 'IDOTRR' 'MeadowV' 'Mitchel' 'NAmes' 'NPkVill' 'NWAmes' 'NoRidge' 'NridgHt' 'OldTown' 'SWISU' 'Sawyer' 'SawyerW' 'Somerst' 'StoneBr' 'Timber' 'Veenker'] OpenPorchSF [ 0 4 6 8 10 11 12 15 16 17 18 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 72 73 74 75 76 77 78 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 144 146 148 150 151 152 153 154 155 156 158 159 160 162 164 165 166 168 169 170 171 172 173 174 175 176 177 178 180 182 183 184 185 187 188 189 190 191 192 193 194 195 197 198 199 200 201 203 204 205 207 208 210 211 213 214 215 216 217 222 224 225 226 228 229 230 231 234 235 236 238 240 241 243 244 245 246 247 250 251 252 253 254 256 258 260 262 263 265 266 267 273 274 276 278 282 285 287 288 291 292 299 304 312 319 324 341 364 365 368 372 382 406 418 444 484 502 523 547 570 742] OverallCond [1 2 3 4 5 6 7 8 9] OverallQual [ 1 2 3 4 5 6 7 8 9 10] PavedDrive ['N' 'P' 'Y'] PoolArea [ 0 144 228 368 444 480 512 519 555 561 576 648 738 800] PoolQC [nan 'Ex' 'Fa' 'Gd'] RoofMatl ['ClyTile' 'CompShg' 'Membran' 'Metal' 'Roll' 'Tar&Grv' 'WdShake' 'WdShngl'] RoofStyle ['Flat' 'Gable' 'Gambrel' 'Hip' 'Mansard' 'Shed'] SaleCondition ['Abnorml' 'AdjLand' 'Alloca' 'Family' 'Normal' 'Partial'] SalePrice [ 34900. 35311. 37900. 39300. 40000. 52000. 52500. 55000. 55993. 58500. 60000. 61000. 62383. 64500. 66500. 67000. 68400. 68500. 72500. 73000. 75000. 75500. 76000. 76500. 78000. 79000. 79500. 79900. 80000. 80500. 81000. 82000. 82500. 83000. 83500. 84000. 84500. 84900. 85000. 85400. 85500. 86000. 87000. 87500. 88000. 89000. 89471. 89500. 90000. 90350. 91000. 91300. 91500. 92000. 92900. 93000. 93500. 94000. 94500. 94750. 95000. 96500. 97000. 97500. 98000. 98300. 98600. 99500. 99900. 100000. 101000. 101800. 102000. 102776. 103000. 103200. 103600. 104000. 104900. 105000. 105500. 105900. 106000. 106250. 106500. 107000. 107400. 107500. 107900. 108000. 108480. 108500. 108959. 109000. 109008. 109500. 109900. 110000. 110500. 111000. 111250. 112000. 112500. 113000. 114500. 114504. 115000. 116000. 116050. 116500. 116900. 117000. 117500. 118000. 118400. 118500. 118858. 118964. 119000. 119200. 119500. 119750. 119900. 120000. 120500. 121000. 121500. 121600. 122000. 122500. 122900. 123000. 123500. 123600. 124000. 124500. 124900. 125000. 125500. 126000. 126175. 126500. 127000. 127500. 128000. 128200. 128500. 128900. 128950. 129000. 129500. 129900. 130000. 130250. 130500. 131000. 131400. 131500. 132000. 132250. 132500. 133000. 133500. 133700. 133900. 134000. 134432. 134450. 134500. 134800. 134900. 135000. 135500. 135750. 135900. 135960. 136000. 136500. 136900. 136905. 137000. 137450. 137500. 137900. 138000. 138500. 138800. 138887. 139000. 139400. 139500. 139600. 139900. 139950. 140000. 140200. 141000. 141500. 142000. 142125. 142500. 142600. 142953. 143000. 143250. 143500. 143750. 143900. 144000. 144152. 144500. 144900. 145000. 145250. 145500. 145900. 146000. 146500. 146800. 147000. 147400. 147500. 148000. 148500. 148800. 149000. 149300. 149350. 149500. 149700. 149900. 150000. 150500. 150750. 150900. 151000. 151400. 151500. 152000. 153000. 153337. 153500. 153575. 153900. 154000. 154300. 154500. 154900. 155000. 155835. 155900. 156000. 156500. 156932. 157000. 157500. 157900. 158000. 158500. 158900. 159000. 159434. 159500. 159895. 159950. 160000. 160200. 161000. 161500. 161750. 162000. 162500. 162900. 163000. 163500. 163900. 163990. 164000. 164500. 164700. 164900. 164990. 165000. 165150. 165400. 165500. 165600. 166000. 167000. 167240. 167500. 167900. 168000. 168500. 169000. 169500. 169900. 169990. 170000. 171000. 171500. 171750. 171900. 172000. 172400. 172500. 172785. 173000. 173500. 173733. 173900. 174000. 174500. 174900. 175000. 175500. 175900. 176000. 176432. 176485. 176500. 177000. 177500. 178000. 178400. 178740. 178900. 179000. 179200. 179400. 179500. 179540. 179600. 179665. 179900. 180000. 180500. 181000. 181134. 181500. 181900. 182000. 182900. 183000. 183200. 183500. 183900. 184000. 184100. 184750. 184900. 185000. 185500. 185750. 185850. 185900. 186000. 186500. 186700. 187000. 187100. 187500. 187750. 188000. 188500. 188700. 189000. 189950. 190000. 191000. 192000. 192140. 192500. 193000. 193500. 193879. 194000. 194201. 194500. 194700. 195000. 195400. 196000. 196500. 197000. 197500. 197900. 198500. 198900. 199900. 200000. 200100. 200141. 200500. 200624. 201000. 201800. 202500. 202665. 202900. 203000. 204000. 204750. 204900. 205000. 205950. 206000. 206300. 206900. 207000. 207500. 208300. 208500. 208900. 209500. 210000. 211000. 212000. 212900. 213000. 213250. 213490. 213500. 214000. 214500. 214900. 215000. 215200. 216000. 216500. 216837. 217000. 217500. 218000. 219210. 219500. 220000. 221000. 221500. 222000. 222500. 223000. 223500. 224000. 224500. 224900. 225000. 226000. 226700. 227000. 227680. 227875. 228000. 228500. 228950. 229000. 229456. 230000. 230500. 231500. 232000. 232600. 233000. 233170. 233230. 234000. 235000. 235128. 236000. 236500. 237000. 237500. 238000. 239000. 239500. 239686. 239799. 239900. 240000. 241000. 241500. 242000. 243000. 244000. 244400. 244600. 245000. 245350. 245500. 246578. 248000. 248328. 248900. 249700. 250000. 250580. 251000. 252000. 252678. 253000. 253293. 254000. 254900. 255000. 255500. 255900. 256000. 256300. 257000. 257500. 258000. 259000. 259500. 260000. 260400. 261500. 262000. 262280. 262500. 263000. 263435. 264132. 264561. 265000. 265900. 265979. 266000. 266500. 267000. 268000. 269500. 269790. 270000. 271000. 271900. 272000. 274000. 274300. 274725. 274900. 274970. 275000. 275500. 276000. 277000. 277500. 278000. 279500. 280000. 281000. 281213. 282922. 283463. 284000. 285000. 286000. 287000. 287090. 289000. 290000. 293077. 294000. 295000. 295493. 297000. 299800. 301000. 301500. 302000. 303477. 305000. 305900. 306000. 307000. 309000. 310000. 311500. 311872. 312500. 313000. 314813. 315000. 315500. 315750. 316600. 317000. 318000. 318061. 319000. 319900. 320000. 324000. 325000. 325300. 325624. 326000. 328000. 328900. 333168. 335000. 336000. 337000. 337500. 339750. 340000. 341000. 342643. 345000. 348000. 350000. 354000. 359100. 360000. 361919. 367294. 369900. 370878. 372402. 372500. 374000. 375000. 377426. 377500. 378500. 380000. 381000. 383970. 385000. 386250. 392000. 392500. 394432. 394617. 395000. 395192. 402000. 402861. 403000. 410000. 412500. 415298. 423000. 424870. 426000. 430000. 437154. 438780. 440000. 446261. 451950. 465000. 466500. 475000. 485000. 501837. 538000. 555000. 556581. 582933. 611657. 625000. 745000. 755000. nan] SaleType [nan 'COD' 'CWD' 'Con' 'ConLD' 'ConLI' 'ConLw' 'New' 'Oth' 'WD'] ScreenPorch [ 0 40 53 60 63 64 80 84 88 90 92 94 95 99 100 104 108 109 110 111 112 113 115 116 117 119 120 121 122 123 126 128 130 135 138 140 141 142 143 144 145 147 148 150 152 153 154 155 156 160 161 162 163 164 165 166 168 170 171 174 175 176 178 180 182 184 185 189 190 192 195 196 197 198 200 201 204 208 210 216 217 220 221 222 224 225 227 228 231 233 234 240 252 255 256 259 260 263 264 265 266 270 271 273 276 280 287 288 291 312 322 342 348 374 385 396 410 440 480 490 576] Street ['Grvl' 'Pave'] TotRmsAbvGrd [ 2 3 4 5 6 7 8 9 10 11 12 13 14 15] TotalBsmtSF [ 0. 105. 160. ..., 5095. 6110. nan] Utilities [nan 'AllPub' 'NoSeWa'] WoodDeckSF [ 0 4 12 14 16 20 22 23 24 25 26 27 28 30 32 33 35 36 38 40 42 44 45 48 49 50 51 52 53 54 55 56 57 58 60 63 64 66 68 70 71 72 73 74 75 76 77 78 80 81 84 85 86 87 88 89 90 92 94 95 96 97 98 99 100 102 103 104 105 106 108 110 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 138 139 140 141 142 143 144 145 146 147 148 149 150 152 153 154 155 156 157 158 159 160 161 162 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 218 219 220 221 222 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 250 252 253 255 256 257 259 260 261 262 263 264 265 266 268 269 270 272 274 275 276 277 278 279 280 282 283 284 285 286 287 288 289 290 291 292 294 295 296 297 298 300 301 302 303 304 305 306 307 308 309 311 312 314 315 316 318 319 320 321 322 324 325 326 327 328 329 330 331 333 335 336 339 340 342 344 345 349 351 352 353 354 355 356 358 360 361 362 364 365 366 367 370 371 375 379 380 382 384 385 386 390 392 393 394 400 402 403 404 405 406 407 409 411 413 414 416 418 421 424 426 428 431 432 436 439 441 444 448 450 453 455 460 462 466 467 468 474 476 483 486 490 495 496 500 501 502 503 509 511 517 519 520 521 530 536 546 550 574 576 586 631 635 641 646 657 668 670 684 690 728 736 857 870 1424] YearBuilt [1872 1875 1879 1880 1882 1885 1890 1892 1893 1895 1896 1898 1900 1901 1902 1904 1905 1906 1907 1908 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1934 1935 1936 1937 1938 1939 1940 1941 1942 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010] YearRemodAdd [1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010] YrSold [2006 2007 2008 2009 2010]
nulls = pd.DataFrame(df.isnull().sum().sort_values(ascending=False)[:40])
nulls.columns = ['Null Count']
nulls.index.name = 'Feature'
nulls
Null Count | |
---|---|
Feature | |
PoolQC | 2909 |
MiscFeature | 2814 |
Alley | 2721 |
Fence | 2348 |
SalePrice | 1459 |
FireplaceQu | 1420 |
LotFrontage | 486 |
GarageFinish | 159 |
GarageCond | 159 |
GarageQual | 159 |
GarageYrBlt | 159 |
GarageType | 157 |
BsmtCond | 82 |
BsmtExposure | 82 |
BsmtQual | 81 |
BsmtFinType2 | 80 |
BsmtFinType1 | 79 |
MasVnrType | 24 |
MasVnrArea | 23 |
MSZoning | 4 |
BsmtFullBath | 2 |
BsmtHalfBath | 2 |
Utilities | 2 |
Functional | 2 |
Electrical | 1 |
Exterior2nd | 1 |
KitchenQual | 1 |
Exterior1st | 1 |
GarageCars | 1 |
TotalBsmtSF | 1 |
GarageArea | 1 |
BsmtUnfSF | 1 |
BsmtFinSF2 | 1 |
BsmtFinSF1 | 1 |
SaleType | 1 |
Condition2 | 0 |
FullBath | 0 |
2ndFlrSF | 0 |
3SsnPorch | 0 |
BedroomAbvGr | 0 |
categoricals = df.select_dtypes(exclude=[np.number])
categoricals.describe()
Alley | BldgType | BsmtCond | BsmtExposure | BsmtFinType1 | BsmtFinType2 | BsmtQual | CentralAir | Condition1 | Condition2 | ... | MiscFeature | Neighborhood | PavedDrive | PoolQC | RoofMatl | RoofStyle | SaleCondition | SaleType | Street | Utilities | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 198 | 2919 | 2837 | 2837 | 2840 | 2839 | 2838 | 2919 | 2919 | 2919 | ... | 105 | 2919 | 2919 | 10 | 2919 | 2919 | 2919 | 2918 | 2919 | 2917 |
unique | 2 | 5 | 4 | 4 | 6 | 6 | 4 | 2 | 9 | 8 | ... | 4 | 25 | 3 | 3 | 8 | 6 | 6 | 9 | 2 | 2 |
top | Grvl | 1Fam | TA | No | Unf | Unf | TA | Y | Norm | Norm | ... | Shed | NAmes | Y | Ex | CompShg | Gable | Normal | WD | Pave | AllPub |
freq | 120 | 2425 | 2606 | 1904 | 851 | 2493 | 1283 | 2723 | 2511 | 2889 | ... | 95 | 443 | 2641 | 4 | 2876 | 2310 | 2402 | 2525 | 2907 | 2916 |
4 rows × 43 columns
# Filling Null Values
# Alley
df.Alley.fillna('None', inplace=True) # Assuming Null values means no alley access
# BsmtCond
df.BsmtCond.fillna('None', inplace=True) # Assuming Null values means no basement
# BsmtExposure
df.BsmtExposure.fillna('None', inplace=True) # Assuming Null values means no basement
# BsmtFinSF1 & BsmtFinSF2
df.BsmtFinSF1.fillna(0, inplace=True) # Assuming Null values means no basement
df.BsmtFinSF2.fillna(0, inplace=True) # Assuming Null values means no basement
# BsmtFinType1 & BsmtFinType2
df.BsmtFinType1.fillna('None', inplace=True) # Assuming Null values means no basement
df.BsmtFinType2.fillna('None', inplace=True) # Assuming Null values means no basement
# BsmtFullBath & BsmtHalfBath
df.BsmtFullBath.fillna(0,inplace=True) # Assuming Null values means no basement
df.BsmtHalfBath.fillna(0,inplace=True) # Assuming Null values means no basement
# BsmtQual
df.BsmtQual.fillna('None', inplace=True) # Assuming Null values means no basement
# BsmtUnfSF
df.BsmtUnfSF.fillna(0, inplace=True) # Assuming Null values means no basement
# Electrical
print df.Electrical.value_counts(), '\n' # Has 1 Missing Value
df.Electrical.fillna('SBrkr', inplace=True) # Fill in with most common value
# Exterior1st & Exterior2nd
print df.Exterior1st.value_counts(), '\n'
print df.Exterior2nd.value_counts(), '\n'
df.Exterior1st.fillna('VinylSd', inplace=True) # Fill in with most common value
df.Exterior2nd.fillna('VinylSd', inplace=True) # Fill in with most common value
# Fence
df.Fence.fillna('None', inplace=True) # Assuming Null values means no fence
# FireplaceQu
df.FireplaceQu.fillna('None', inplace=True) # Assuming Null values means no fireplace
# Functional
print df.Functional.value_counts(), '\n'
df.Functional.fillna('Typ',inplace=True)
# GarageArea & GarageCars
df.GarageArea.fillna(0,inplace=True) # Assuming Null values means no garage
df.GarageCars.fillna(0,inplace=True) # Assuming Null values means no garage
# GarageCond, GarageFinish, GarageQual, GarageType
df.GarageCond.fillna('None', inplace=True) # Assuming Null values means no garage
df.GarageFinish.fillna('None', inplace=True) # Assuming Null values means no garage
df.GarageQual.fillna('None', inplace=True) # Assuming Null values means no garage
df.GarageType.fillna('None', inplace=True) # Assuming Null values means no garage
# GarageYrBlt
df.GarageYrBlt.fillna(0,inplace=True) # Assuming Null values means no garage. Therefore making the value here very old and therefore less valuable
# KitchenQual
print df[df.KitchenAbvGr==0]['KitchenQual'], '\n' # Check kitchen quality values for homes without kitchens above ground
print df.KitchenQual.value_counts(), '\n'
df.KitchenQual.fillna('TA',inplace=True) # Fill with most common value
# LotFrontage
df.LotFrontage.fillna(0,inplace=True) # Assuming Null values means no front lot
# MasVnrArea & MasVnrType
df.MasVnrType.fillna('None',inplace=True) # Assuming Null values means no masonary veneer
df.MasVnrArea.fillna(0,inplace=True) # Assuming Null values means no masonary veneer
# MiscFeature
df.MiscFeature.fillna('None',inplace=True) # Assuming Null values means no misc feature
# MSZoning
print df.MSZoning.value_counts(), '\n'
df.MSZoning.fillna('RL',inplace=True) # Fill with most common value
# PoolQC
df.PoolQC.fillna('None', inplace=True) # Assuming Null values means no pool
# SaleType
print df.SaleType.value_counts(), '\n'
df.SaleType.fillna('WD',inplace=True) # Fill with most common value
# TotalBsmtSF
df.TotalBsmtSF.fillna(0,inplace=True) # Assuming Null values mean no basement
# Utilities
print df.Utilities.value_counts(), '\n'
df.Utilities.fillna('AllPub',inplace=True) # Fill in with most common value
SBrkr 2671 FuseA 188 FuseF 50 FuseP 8 Mix 1 Name: Electrical, dtype: int64 VinylSd 1025 MetalSd 450 HdBoard 442 Wd Sdng 411 Plywood 221 CemntBd 126 BrkFace 87 WdShing 56 AsbShng 44 Stucco 43 BrkComm 6 AsphShn 2 Stone 2 CBlock 2 ImStucc 1 Name: Exterior1st, dtype: int64 VinylSd 1014 MetalSd 447 HdBoard 406 Wd Sdng 391 Plywood 270 CmentBd 126 Wd Shng 81 BrkFace 47 Stucco 47 AsbShng 38 Brk Cmn 22 ImStucc 15 Stone 6 AsphShn 4 CBlock 3 Other 1 Name: Exterior2nd, dtype: int64 Typ 2717 Min2 70 Min1 65 Mod 35 Maj1 19 Maj2 9 Sev 2 Name: Functional, dtype: int64 954 TA 1127 TA 1399 TA Name: KitchenQual, dtype: object TA 1492 Gd 1151 Ex 205 Fa 70 Name: KitchenQual, dtype: int64 RL 2265 RM 460 FV 139 RH 26 C (all) 25 Name: MSZoning, dtype: int64 WD 2525 New 239 COD 87 ConLD 26 CWD 12 ConLI 9 ConLw 8 Oth 7 Con 5 Name: SaleType, dtype: int64 AllPub 2916 NoSeWa 1 Name: Utilities, dtype: int64
# Personal Preference - Making all floats ints
df.MasVnrArea = map(int,df.MasVnrArea)
df.GarageYrBlt = map(int,df.GarageYrBlt)
df.LotFrontage = map(int,df.LotFrontage)
df.GarageArea = map(int,df.GarageArea)
df.GarageCars = map(int,df.GarageCars)
df.BsmtUnfSF = map(int,df.BsmtUnfSF)
df.BsmtHalfBath = map(int,df.BsmtHalfBath)
df.BsmtFullBath = map(int,df.BsmtFullBath)
df.TotalBsmtSF = map(int,df.TotalBsmtSF)
df.BsmtFinSF1 = map(int,df.BsmtFinSF1)
df.BsmtFinSF2 = map(int,df.BsmtFinSF2)
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 2919 entries, 0 to 1458 Data columns (total 81 columns): 1stFlrSF 2919 non-null int64 2ndFlrSF 2919 non-null int64 3SsnPorch 2919 non-null int64 Alley 2919 non-null object BedroomAbvGr 2919 non-null int64 BldgType 2919 non-null object BsmtCond 2919 non-null object BsmtExposure 2919 non-null object BsmtFinSF1 2919 non-null int64 BsmtFinSF2 2919 non-null int64 BsmtFinType1 2919 non-null object BsmtFinType2 2919 non-null object BsmtFullBath 2919 non-null int64 BsmtHalfBath 2919 non-null int64 BsmtQual 2919 non-null object BsmtUnfSF 2919 non-null int64 CentralAir 2919 non-null object Condition1 2919 non-null object Condition2 2919 non-null object Electrical 2919 non-null object EnclosedPorch 2919 non-null int64 ExterCond 2919 non-null object ExterQual 2919 non-null object Exterior1st 2919 non-null object Exterior2nd 2919 non-null object Fence 2919 non-null object FireplaceQu 2919 non-null object Fireplaces 2919 non-null int64 Foundation 2919 non-null object FullBath 2919 non-null int64 Functional 2919 non-null object GarageArea 2919 non-null int64 GarageCars 2919 non-null int64 GarageCond 2919 non-null object GarageFinish 2919 non-null object GarageQual 2919 non-null object GarageType 2919 non-null object GarageYrBlt 2919 non-null int64 GrLivArea 2919 non-null int64 HalfBath 2919 non-null int64 Heating 2919 non-null object HeatingQC 2919 non-null object HouseStyle 2919 non-null object Id 2919 non-null int64 KitchenAbvGr 2919 non-null int64 KitchenQual 2919 non-null object LandContour 2919 non-null object LandSlope 2919 non-null object LotArea 2919 non-null int64 LotConfig 2919 non-null object LotFrontage 2919 non-null int64 LotShape 2919 non-null object LowQualFinSF 2919 non-null int64 MSSubClass 2919 non-null int64 MSZoning 2919 non-null object MasVnrArea 2919 non-null int64 MasVnrType 2919 non-null object MiscFeature 2919 non-null object MiscVal 2919 non-null int64 MoSold 2919 non-null int64 Neighborhood 2919 non-null object OpenPorchSF 2919 non-null int64 OverallCond 2919 non-null int64 OverallQual 2919 non-null int64 PavedDrive 2919 non-null object PoolArea 2919 non-null int64 PoolQC 2919 non-null object RoofMatl 2919 non-null object RoofStyle 2919 non-null object SaleCondition 2919 non-null object SalePrice 1460 non-null float64 SaleType 2919 non-null object ScreenPorch 2919 non-null int64 Street 2919 non-null object TotRmsAbvGrd 2919 non-null int64 TotalBsmtSF 2919 non-null int64 Utilities 2919 non-null object WoodDeckSF 2919 non-null int64 YearBuilt 2919 non-null int64 YearRemodAdd 2919 non-null int64 YrSold 2919 non-null int64 dtypes: float64(1), int64(37), object(43) memory usage: 1.8+ MB
# Categoricals Missing MSSubClass Feature since it is an int. Similar to an area code
categoricalCols = list(categoricals.columns)
categoricalCols.append('MSSubClass')
categoricalCols
['Alley', 'BldgType', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'BsmtQual', 'CentralAir', 'Condition1', 'Condition2', 'Electrical', 'ExterCond', 'ExterQual', 'Exterior1st', 'Exterior2nd', 'Fence', 'FireplaceQu', 'Foundation', 'Functional', 'GarageCond', 'GarageFinish', 'GarageQual', 'GarageType', 'Heating', 'HeatingQC', 'HouseStyle', 'KitchenQual', 'LandContour', 'LandSlope', 'LotConfig', 'LotShape', 'MSZoning', 'MasVnrType', 'MiscFeature', 'Neighborhood', 'PavedDrive', 'PoolQC', 'RoofMatl', 'RoofStyle', 'SaleCondition', 'SaleType', 'Street', 'Utilities', 'MSSubClass']
# GetDummies
df = pd.get_dummies(df,columns=categoricalCols,drop_first=True)
df.head(15)
1stFlrSF | 2ndFlrSF | 3SsnPorch | BedroomAbvGr | BsmtFinSF1 | BsmtFinSF2 | BsmtFullBath | BsmtHalfBath | BsmtUnfSF | EnclosedPorch | ... | MSSubClass_70 | MSSubClass_75 | MSSubClass_80 | MSSubClass_85 | MSSubClass_90 | MSSubClass_120 | MSSubClass_150 | MSSubClass_160 | MSSubClass_180 | MSSubClass_190 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 856 | 854 | 0 | 3 | 706 | 0 | 1 | 0 | 150 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1262 | 0 | 0 | 3 | 978 | 0 | 0 | 1 | 284 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | 920 | 866 | 0 | 3 | 486 | 0 | 1 | 0 | 434 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 961 | 756 | 0 | 3 | 216 | 0 | 1 | 0 | 540 | 272 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 1145 | 1053 | 0 | 4 | 655 | 0 | 1 | 0 | 490 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 | 796 | 566 | 320 | 1 | 732 | 0 | 1 | 0 | 64 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
6 | 1694 | 0 | 0 | 3 | 1369 | 0 | 1 | 0 | 317 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
7 | 1107 | 983 | 0 | 3 | 859 | 32 | 1 | 0 | 216 | 228 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
8 | 1022 | 752 | 0 | 2 | 0 | 0 | 0 | 0 | 952 | 205 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
9 | 1077 | 0 | 0 | 2 | 851 | 0 | 1 | 0 | 140 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
10 | 1040 | 0 | 0 | 3 | 906 | 0 | 1 | 0 | 134 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
11 | 1182 | 1142 | 0 | 4 | 998 | 0 | 1 | 0 | 177 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
12 | 912 | 0 | 0 | 2 | 737 | 0 | 1 | 0 | 175 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
13 | 1494 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 1494 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
14 | 1253 | 0 | 0 | 2 | 733 | 0 | 1 | 0 | 520 | 176 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
15 rows × 275 columns
import statsmodels.api as sm
dfTrain = df[0:1459]
dfFinalTest = df[1460:]
dfFinalTest = sm.add_constant(dfFinalTest.drop(['SalePrice'],axis=1)) # Need constant for linear regression models
dfyTrain = dfTrain['SalePrice']
#dfXTrain = dfTrain.drop(['SalePrice'],axis=1)
dfXTrain = sm.add_constant(dfTrain.drop(['SalePrice','Id'],axis=1)) # Need constant for linear regression models
C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\statsmodels\compat\pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead. from pandas.core import datetools
print 'Skew is:', dfyTrain.skew()
plt.hist(dfyTrain)
plt.show()
Skew is: 1.88176004682
target = np.log(dfyTrain)
print 'Skew is:', target.skew()
plt.hist(target)
plt.show()
Skew is: 0.120694014457
X_train, X_test, y_train, y_test = tts(dfXTrain,target,train_size=0.8, random_state=69)
X_train.shape, y_train.shape, X_test.shape, y_test.shape
((1167, 274), (1167L,), (292, 274), (292L,))
#X_Train_const = sm.add_constant(X_train)
#est_price = sm.OLS(y_train, X_Train_const).fit()
#est_price.summary()
est_price = sm.OLS(target, dfXTrain).fit()
est_price.summary()
C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\statsmodels\base\model.py:1036: RuntimeWarning: invalid value encountered in divide return self.params / self.bse C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in greater return (self.a < x) & (x < self.b) C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\scipy\stats\_distn_infrastructure.py:879: RuntimeWarning: invalid value encountered in less return (self.a < x) & (x < self.b) C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\scipy\stats\_distn_infrastructure.py:1818: RuntimeWarning: invalid value encountered in less_equal cond2 = cond0 & (x <= self.a)
Dep. Variable: | SalePrice | R-squared: | 0.945 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.933 |
Method: | Least Squares | F-statistic: | 78.55 |
Date: | Sun, 11 Mar 2018 | Prob (F-statistic): | 0.00 |
Time: | 15:57:56 | Log-Likelihood: | 1388.8 |
No. Observations: | 1459 | AIC: | -2250. |
Df Residuals: | 1195 | BIC: | -854.2 |
Df Model: | 263 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
const | 7.4586 | 4.860 | 1.535 | 0.125 | -2.076 | 16.993 |
1stFlrSF | 6.452e-05 | 2.97e-05 | 2.175 | 0.030 | 6.33e-06 | 0.000 |
2ndFlrSF | 5.946e-05 | 2.51e-05 | 2.366 | 0.018 | 1.01e-05 | 0.000 |
3SsnPorch | 0.0002 | 0.000 | 1.590 | 0.112 | -3.8e-05 | 0.000 |
BedroomAbvGr | 0.0052 | 0.006 | 0.803 | 0.422 | -0.007 | 0.018 |
BsmtFinSF1 | 5.1e-05 | 1.33e-05 | 3.841 | 0.000 | 2.5e-05 | 7.7e-05 |
BsmtFinSF2 | 4.169e-05 | 2.74e-05 | 1.522 | 0.128 | -1.21e-05 | 9.54e-05 |
BsmtFullBath | 0.0230 | 0.009 | 2.528 | 0.012 | 0.005 | 0.041 |
BsmtHalfBath | 0.0039 | 0.014 | 0.280 | 0.780 | -0.023 | 0.031 |
BsmtUnfSF | -6.318e-06 | 1.27e-05 | -0.497 | 0.619 | -3.12e-05 | 1.86e-05 |
EnclosedPorch | 0.0001 | 5.75e-05 | 2.128 | 0.034 | 9.55e-06 | 0.000 |
Fireplaces | 0.0130 | 0.012 | 1.103 | 0.270 | -0.010 | 0.036 |
FullBath | 0.0239 | 0.010 | 2.347 | 0.019 | 0.004 | 0.044 |
GarageArea | 0.0001 | 3.62e-05 | 3.609 | 0.000 | 5.95e-05 | 0.000 |
GarageCars | 0.0193 | 0.010 | 1.847 | 0.065 | -0.001 | 0.040 |
GarageYrBlt | -0.0003 | 0.000 | -1.174 | 0.241 | -0.001 | 0.000 |
GrLivArea | 0.0002 | 2.64e-05 | 6.209 | 0.000 | 0.000 | 0.000 |
HalfBath | 0.0265 | 0.010 | 2.745 | 0.006 | 0.008 | 0.045 |
KitchenAbvGr | -0.0476 | 0.028 | -1.684 | 0.092 | -0.103 | 0.008 |
LotArea | 2.99e-06 | 5e-07 | 5.977 | 0.000 | 2.01e-06 | 3.97e-06 |
LotFrontage | 1.927e-05 | 0.000 | 0.182 | 0.855 | -0.000 | 0.000 |
LowQualFinSF | 3.976e-05 | 6.43e-05 | 0.619 | 0.536 | -8.63e-05 | 0.000 |
MasVnrArea | 1.947e-05 | 2.66e-05 | 0.731 | 0.465 | -3.28e-05 | 7.17e-05 |
MiscVal | -1.689e-05 | 2.91e-05 | -0.581 | 0.561 | -7.39e-05 | 4.01e-05 |
MoSold | -0.0007 | 0.001 | -0.623 | 0.533 | -0.003 | 0.002 |
OpenPorchSF | 4.124e-05 | 5.35e-05 | 0.771 | 0.441 | -6.37e-05 | 0.000 |
OverallCond | 0.0346 | 0.004 | 8.597 | 0.000 | 0.027 | 0.043 |
OverallQual | 0.0383 | 0.005 | 8.135 | 0.000 | 0.029 | 0.048 |
PoolArea | 0.0015 | 0.001 | 1.418 | 0.156 | -0.001 | 0.004 |
ScreenPorch | 0.0003 | 5.73e-05 | 4.756 | 0.000 | 0.000 | 0.000 |
TotRmsAbvGrd | 0.0031 | 0.004 | 0.698 | 0.486 | -0.006 | 0.012 |
TotalBsmtSF | 8.638e-05 | 1.87e-05 | 4.610 | 0.000 | 4.96e-05 | 0.000 |
WoodDeckSF | 9.296e-05 | 2.73e-05 | 3.411 | 0.001 | 3.95e-05 | 0.000 |
YearBuilt | 0.0018 | 0.000 | 4.754 | 0.000 | 0.001 | 0.003 |
YearRemodAdd | 0.0009 | 0.000 | 3.398 | 0.001 | 0.000 | 0.001 |
YrSold | -0.0026 | 0.002 | -1.085 | 0.278 | -0.007 | 0.002 |
Alley_None | -0.0083 | 0.019 | -0.428 | 0.669 | -0.046 | 0.030 |
Alley_Pave | 0.0238 | 0.028 | 0.836 | 0.403 | -0.032 | 0.080 |
BldgType_2fmCon | -0.0604 | 0.125 | -0.483 | 0.629 | -0.306 | 0.185 |
BldgType_Duplex | -0.0101 | 0.017 | -0.596 | 0.551 | -0.043 | 0.023 |
BldgType_Twnhs | -0.0260 | 0.071 | -0.364 | 0.716 | -0.166 | 0.114 |
BldgType_TwnhsE | -0.0025 | 0.068 | -0.037 | 0.970 | -0.136 | 0.131 |
BsmtCond_Gd | 0.0305 | 0.024 | 1.257 | 0.209 | -0.017 | 0.078 |
BsmtCond_None | 0.0491 | 0.056 | 0.878 | 0.380 | -0.061 | 0.159 |
BsmtCond_Po | 0.2671 | 0.138 | 1.933 | 0.053 | -0.004 | 0.538 |
BsmtCond_TA | 0.0306 | 0.020 | 1.562 | 0.119 | -0.008 | 0.069 |
BsmtExposure_Gd | 0.0323 | 0.014 | 2.329 | 0.020 | 0.005 | 0.059 |
BsmtExposure_Mn | -0.0046 | 0.014 | -0.333 | 0.739 | -0.032 | 0.023 |
BsmtExposure_No | -0.0106 | 0.010 | -1.060 | 0.290 | -0.030 | 0.009 |
BsmtExposure_None | -0.0528 | 0.105 | -0.503 | 0.615 | -0.259 | 0.153 |
BsmtFinType1_BLQ | 6.401e-05 | 0.013 | 0.005 | 0.996 | -0.025 | 0.025 |
BsmtFinType1_GLQ | 0.0119 | 0.012 | 1.025 | 0.305 | -0.011 | 0.035 |
BsmtFinType1_LwQ | -0.0285 | 0.017 | -1.654 | 0.098 | -0.062 | 0.005 |
BsmtFinType1_None | 0.0491 | 0.056 | 0.878 | 0.380 | -0.061 | 0.159 |
BsmtFinType1_Rec | -0.0048 | 0.014 | -0.347 | 0.729 | -0.032 | 0.022 |
BsmtFinType1_Unf | -0.0131 | 0.013 | -0.979 | 0.328 | -0.039 | 0.013 |
BsmtFinType2_BLQ | -0.0843 | 0.035 | -2.430 | 0.015 | -0.152 | -0.016 |
BsmtFinType2_GLQ | -0.0176 | 0.043 | -0.409 | 0.682 | -0.102 | 0.067 |
BsmtFinType2_LwQ | -0.0439 | 0.034 | -1.293 | 0.196 | -0.111 | 0.023 |
BsmtFinType2_None | -0.1437 | 0.114 | -1.258 | 0.209 | -0.368 | 0.080 |
BsmtFinType2_Rec | -0.0405 | 0.033 | -1.243 | 0.214 | -0.104 | 0.023 |
BsmtFinType2_Unf | -0.0321 | 0.035 | -0.926 | 0.355 | -0.100 | 0.036 |
BsmtQual_Fa | -0.0135 | 0.030 | -0.458 | 0.647 | -0.071 | 0.044 |
BsmtQual_Gd | -0.0291 | 0.015 | -1.906 | 0.057 | -0.059 | 0.001 |
BsmtQual_None | 0.0491 | 0.056 | 0.878 | 0.380 | -0.061 | 0.159 |
BsmtQual_TA | -0.0281 | 0.019 | -1.473 | 0.141 | -0.065 | 0.009 |
CentralAir_Y | 0.0607 | 0.018 | 3.399 | 0.001 | 0.026 | 0.096 |
Condition1_Feedr | 0.0396 | 0.023 | 1.720 | 0.086 | -0.006 | 0.085 |
Condition1_Norm | 0.0895 | 0.019 | 4.640 | 0.000 | 0.052 | 0.127 |
Condition1_PosA | 0.0534 | 0.046 | 1.172 | 0.242 | -0.036 | 0.143 |
Condition1_PosN | 0.0975 | 0.034 | 2.837 | 0.005 | 0.030 | 0.165 |
Condition1_RRAe | -0.0360 | 0.042 | -0.864 | 0.388 | -0.118 | 0.046 |
Condition1_RRAn | 0.0578 | 0.032 | 1.814 | 0.070 | -0.005 | 0.120 |
Condition1_RRNe | 0.0158 | 0.080 | 0.197 | 0.844 | -0.141 | 0.173 |
Condition1_RRNn | 0.0724 | 0.059 | 1.227 | 0.220 | -0.043 | 0.188 |
Condition2_Feedr | 0.2249 | 0.121 | 1.853 | 0.064 | -0.013 | 0.463 |
Condition2_Norm | 0.1684 | 0.110 | 1.536 | 0.125 | -0.047 | 0.384 |
Condition2_PosA | 0.3833 | 0.179 | 2.143 | 0.032 | 0.032 | 0.734 |
Condition2_PosN | -0.6976 | 0.139 | -5.028 | 0.000 | -0.970 | -0.425 |
Condition2_RRAe | -0.6496 | 0.333 | -1.953 | 0.051 | -1.302 | 0.003 |
Condition2_RRAn | 0.0738 | 0.156 | 0.473 | 0.637 | -0.233 | 0.380 |
Condition2_RRNn | 0.1650 | 0.138 | 1.197 | 0.231 | -0.105 | 0.435 |
Electrical_FuseF | -0.0210 | 0.027 | -0.789 | 0.430 | -0.073 | 0.031 |
Electrical_FuseP | -0.1094 | 0.086 | -1.276 | 0.202 | -0.278 | 0.059 |
Electrical_Mix | -0.2564 | 0.205 | -1.252 | 0.211 | -0.658 | 0.145 |
Electrical_SBrkr | -0.0238 | 0.014 | -1.720 | 0.086 | -0.051 | 0.003 |
ExterCond_Fa | -0.0852 | 0.083 | -1.030 | 0.303 | -0.248 | 0.077 |
ExterCond_Gd | -0.0593 | 0.079 | -0.751 | 0.453 | -0.214 | 0.096 |
ExterCond_Po | 0.0031 | 0.145 | 0.021 | 0.983 | -0.282 | 0.288 |
ExterCond_TA | -0.0392 | 0.079 | -0.498 | 0.618 | -0.194 | 0.115 |
ExterQual_Fa | 0.0192 | 0.056 | 0.344 | 0.731 | -0.090 | 0.129 |
ExterQual_Gd | -0.0020 | 0.022 | -0.093 | 0.926 | -0.045 | 0.041 |
ExterQual_TA | 0.0012 | 0.024 | 0.050 | 0.960 | -0.047 | 0.049 |
Exterior1st_AsphShn | -0.0421 | 0.151 | -0.279 | 0.780 | -0.339 | 0.254 |
Exterior1st_BrkComm | -0.2792 | 0.130 | -2.150 | 0.032 | -0.534 | -0.024 |
Exterior1st_BrkFace | 0.0689 | 0.059 | 1.169 | 0.243 | -0.047 | 0.184 |
Exterior1st_CBlock | -0.0341 | 0.063 | -0.540 | 0.589 | -0.158 | 0.090 |
Exterior1st_CemntBd | -0.1235 | 0.087 | -1.413 | 0.158 | -0.295 | 0.048 |
Exterior1st_HdBoard | -0.0288 | 0.060 | -0.480 | 0.631 | -0.146 | 0.089 |
Exterior1st_ImStucc | -0.0089 | 0.129 | -0.068 | 0.945 | -0.263 | 0.245 |
Exterior1st_MetalSd | 0.0241 | 0.068 | 0.355 | 0.723 | -0.109 | 0.157 |
Exterior1st_Plywood | -0.0196 | 0.059 | -0.332 | 0.740 | -0.135 | 0.096 |
Exterior1st_Stone | 0.0236 | 0.112 | 0.211 | 0.833 | -0.196 | 0.243 |
Exterior1st_Stucco | -0.0047 | 0.065 | -0.073 | 0.942 | -0.132 | 0.123 |
Exterior1st_VinylSd | -0.0175 | 0.062 | -0.284 | 0.776 | -0.139 | 0.104 |
Exterior1st_Wd Sdng | -0.0620 | 0.057 | -1.081 | 0.280 | -0.174 | 0.051 |
Exterior1st_WdShing | -0.0112 | 0.062 | -0.181 | 0.856 | -0.132 | 0.110 |
Exterior2nd_AsphShn | 0.0891 | 0.102 | 0.871 | 0.384 | -0.112 | 0.290 |
Exterior2nd_Brk Cmn | 0.1153 | 0.095 | 1.216 | 0.224 | -0.071 | 0.301 |
Exterior2nd_BrkFace | -0.0089 | 0.061 | -0.146 | 0.884 | -0.129 | 0.111 |
Exterior2nd_CBlock | -0.0341 | 0.063 | -0.540 | 0.589 | -0.158 | 0.090 |
Exterior2nd_CmentBd | 0.1782 | 0.086 | 2.074 | 0.038 | 0.010 | 0.347 |
Exterior2nd_HdBoard | 0.0447 | 0.058 | 0.776 | 0.438 | -0.068 | 0.158 |
Exterior2nd_ImStucc | 0.0550 | 0.067 | 0.827 | 0.408 | -0.075 | 0.186 |
Exterior2nd_MetalSd | 0.0229 | 0.066 | 0.347 | 0.729 | -0.107 | 0.153 |
Exterior2nd_Other | -0.0633 | 0.124 | -0.510 | 0.610 | -0.307 | 0.180 |
Exterior2nd_Plywood | 0.0410 | 0.056 | 0.734 | 0.463 | -0.069 | 0.151 |
Exterior2nd_Stone | -0.0140 | 0.079 | -0.176 | 0.860 | -0.170 | 0.142 |
Exterior2nd_Stucco | 0.0484 | 0.063 | 0.773 | 0.440 | -0.074 | 0.171 |
Exterior2nd_VinylSd | 0.0563 | 0.059 | 0.948 | 0.343 | -0.060 | 0.173 |
Exterior2nd_Wd Sdng | 0.0859 | 0.055 | 1.554 | 0.121 | -0.023 | 0.194 |
Exterior2nd_Wd Shng | 0.0408 | 0.058 | 0.707 | 0.480 | -0.072 | 0.154 |
Fence_GdWo | -0.0292 | 0.023 | -1.290 | 0.197 | -0.073 | 0.015 |
Fence_MnPrv | 0.0016 | 0.018 | 0.086 | 0.931 | -0.034 | 0.038 |
Fence_MnWw | -0.0068 | 0.038 | -0.181 | 0.857 | -0.081 | 0.067 |
Fence_None | 0.0116 | 0.017 | 0.689 | 0.491 | -0.021 | 0.045 |
FireplaceQu_Fa | -0.0075 | 0.032 | -0.237 | 0.813 | -0.069 | 0.054 |
FireplaceQu_Gd | 0.0166 | 0.024 | 0.679 | 0.497 | -0.031 | 0.065 |
FireplaceQu_None | -0.0009 | 0.029 | -0.032 | 0.974 | -0.057 | 0.055 |
FireplaceQu_Po | 0.0293 | 0.036 | 0.806 | 0.420 | -0.042 | 0.101 |
FireplaceQu_TA | 0.0177 | 0.025 | 0.696 | 0.487 | -0.032 | 0.068 |
Foundation_CBlock | 0.0120 | 0.015 | 0.813 | 0.417 | -0.017 | 0.041 |
Foundation_PConc | 0.0312 | 0.016 | 1.966 | 0.050 | 5.77e-05 | 0.062 |
Foundation_Slab | -0.0368 | 0.046 | -0.798 | 0.425 | -0.127 | 0.054 |
Foundation_Stone | 0.0790 | 0.053 | 1.503 | 0.133 | -0.024 | 0.182 |
Foundation_Wood | -0.1140 | 0.068 | -1.684 | 0.092 | -0.247 | 0.019 |
Functional_Maj2 | -0.2645 | 0.066 | -3.996 | 0.000 | -0.394 | -0.135 |
Functional_Min1 | 0.0143 | 0.040 | 0.359 | 0.720 | -0.064 | 0.092 |
Functional_Min2 | -0.0056 | 0.040 | -0.139 | 0.889 | -0.085 | 0.074 |
Functional_Mod | -0.0888 | 0.049 | -1.820 | 0.069 | -0.185 | 0.007 |
Functional_Sev | -0.3126 | 0.136 | -2.292 | 0.022 | -0.580 | -0.045 |
Functional_Typ | 0.0336 | 0.035 | 0.963 | 0.336 | -0.035 | 0.102 |
GarageCond_Fa | 0.2698 | 0.160 | 1.684 | 0.092 | -0.044 | 0.584 |
GarageCond_Gd | 0.2913 | 0.167 | 1.745 | 0.081 | -0.036 | 0.619 |
GarageCond_None | -0.1505 | 0.139 | -1.083 | 0.279 | -0.423 | 0.122 |
GarageCond_Po | 0.3659 | 0.175 | 2.096 | 0.036 | 0.023 | 0.708 |
GarageCond_TA | 0.2925 | 0.159 | 1.841 | 0.066 | -0.019 | 0.604 |
GarageFinish_None | -0.1505 | 0.139 | -1.083 | 0.279 | -0.423 | 0.122 |
GarageFinish_RFn | 0.0011 | 0.009 | 0.120 | 0.905 | -0.017 | 0.019 |
GarageFinish_Unf | -0.0103 | 0.011 | -0.919 | 0.358 | -0.032 | 0.012 |
GarageQual_Fa | -0.3951 | 0.139 | -2.841 | 0.005 | -0.668 | -0.122 |
GarageQual_Gd | -0.3368 | 0.143 | -2.355 | 0.019 | -0.617 | -0.056 |
GarageQual_None | -0.1505 | 0.139 | -1.083 | 0.279 | -0.423 | 0.122 |
GarageQual_Po | -0.3337 | 0.182 | -1.831 | 0.067 | -0.691 | 0.024 |
GarageQual_TA | -0.3463 | 0.138 | -2.514 | 0.012 | -0.617 | -0.076 |
GarageType_Attchd | 0.1280 | 0.051 | 2.534 | 0.011 | 0.029 | 0.227 |
GarageType_Basment | 0.1238 | 0.059 | 2.103 | 0.036 | 0.008 | 0.239 |
GarageType_BuiltIn | 0.1186 | 0.053 | 2.250 | 0.025 | 0.015 | 0.222 |
GarageType_CarPort | 0.1670 | 0.068 | 2.457 | 0.014 | 0.034 | 0.300 |
GarageType_Detchd | 0.1281 | 0.051 | 2.536 | 0.011 | 0.029 | 0.227 |
GarageType_None | -0.1505 | 0.139 | -1.083 | 0.279 | -0.423 | 0.122 |
Heating_GasA | 0.1364 | 0.117 | 1.164 | 0.245 | -0.094 | 0.366 |
Heating_GasW | 0.1939 | 0.121 | 1.606 | 0.109 | -0.043 | 0.431 |
Heating_Grav | -0.0503 | 0.129 | -0.389 | 0.698 | -0.304 | 0.203 |
Heating_OthW | 0.1320 | 0.145 | 0.914 | 0.361 | -0.151 | 0.416 |
Heating_Wall | 0.2089 | 0.136 | 1.534 | 0.125 | -0.058 | 0.476 |
HeatingQC_Fa | -0.0256 | 0.022 | -1.180 | 0.238 | -0.068 | 0.017 |
HeatingQC_Gd | -0.0196 | 0.009 | -2.063 | 0.039 | -0.038 | -0.001 |
HeatingQC_Po | -0.0870 | 0.122 | -0.713 | 0.476 | -0.326 | 0.152 |
HeatingQC_TA | -0.0320 | 0.009 | -3.368 | 0.001 | -0.051 | -0.013 |
HouseStyle_1.5Unf | 0.2237 | 0.113 | 1.972 | 0.049 | 0.001 | 0.446 |
HouseStyle_1Story | -0.0051 | 0.041 | -0.124 | 0.901 | -0.085 | 0.075 |
HouseStyle_2.5Fin | -0.0404 | 0.082 | -0.493 | 0.622 | -0.201 | 0.120 |
HouseStyle_2.5Unf | 0.0900 | 0.077 | 1.173 | 0.241 | -0.061 | 0.241 |
HouseStyle_2Story | 0.0107 | 0.037 | 0.285 | 0.776 | -0.063 | 0.084 |
HouseStyle_SFoyer | -0.0130 | 0.054 | -0.240 | 0.811 | -0.120 | 0.093 |
HouseStyle_SLvl | 0.0468 | 0.063 | 0.737 | 0.461 | -0.078 | 0.171 |
KitchenQual_Fa | -0.0548 | 0.029 | -1.914 | 0.056 | -0.111 | 0.001 |
KitchenQual_Gd | -0.0661 | 0.016 | -4.150 | 0.000 | -0.097 | -0.035 |
KitchenQual_TA | -0.0675 | 0.018 | -3.755 | 0.000 | -0.103 | -0.032 |
LandContour_HLS | 0.0287 | 0.024 | 1.207 | 0.228 | -0.018 | 0.075 |
LandContour_Low | -0.0214 | 0.030 | -0.717 | 0.473 | -0.080 | 0.037 |
LandContour_Lvl | 0.0248 | 0.017 | 1.451 | 0.147 | -0.009 | 0.058 |
LandSlope_Mod | 0.0317 | 0.018 | 1.720 | 0.086 | -0.004 | 0.068 |
LandSlope_Sev | -0.1991 | 0.052 | -3.810 | 0.000 | -0.302 | -0.097 |
LotConfig_CulDSac | 0.0229 | 0.015 | 1.542 | 0.123 | -0.006 | 0.052 |
LotConfig_FR2 | -0.0397 | 0.018 | -2.166 | 0.031 | -0.076 | -0.004 |
LotConfig_FR3 | -0.0913 | 0.058 | -1.584 | 0.114 | -0.204 | 0.022 |
LotConfig_Inside | -0.0194 | 0.008 | -2.411 | 0.016 | -0.035 | -0.004 |
LotShape_IR2 | 0.0235 | 0.019 | 1.219 | 0.223 | -0.014 | 0.061 |
LotShape_IR3 | 0.0281 | 0.040 | 0.696 | 0.486 | -0.051 | 0.107 |
LotShape_Reg | 0.0086 | 0.008 | 1.150 | 0.250 | -0.006 | 0.023 |
MSZoning_FV | 0.4789 | 0.056 | 8.579 | 0.000 | 0.369 | 0.588 |
MSZoning_RH | 0.4375 | 0.055 | 7.926 | 0.000 | 0.329 | 0.546 |
MSZoning_RL | 0.4442 | 0.048 | 9.320 | 0.000 | 0.351 | 0.538 |
MSZoning_RM | 0.4021 | 0.045 | 8.976 | 0.000 | 0.314 | 0.490 |
MasVnrType_BrkFace | 0.0445 | 0.031 | 1.420 | 0.156 | -0.017 | 0.106 |
MasVnrType_None | 0.0346 | 0.032 | 1.095 | 0.274 | -0.027 | 0.097 |
MasVnrType_Stone | 0.0557 | 0.033 | 1.678 | 0.094 | -0.009 | 0.121 |
MiscFeature_None | -0.3066 | 0.461 | -0.665 | 0.506 | -1.211 | 0.598 |
MiscFeature_Othr | -0.2894 | 0.422 | -0.686 | 0.493 | -1.117 | 0.538 |
MiscFeature_Shed | -0.2948 | 0.441 | -0.668 | 0.504 | -1.160 | 0.571 |
MiscFeature_TenC | -0.2998 | 0.455 | -0.659 | 0.510 | -1.193 | 0.593 |
Neighborhood_Blueste | 0.0243 | 0.091 | 0.266 | 0.790 | -0.155 | 0.204 |
Neighborhood_BrDale | -0.0192 | 0.054 | -0.356 | 0.722 | -0.125 | 0.086 |
Neighborhood_BrkSide | 0.0066 | 0.044 | 0.150 | 0.881 | -0.080 | 0.093 |
Neighborhood_ClearCr | 0.0286 | 0.043 | 0.669 | 0.503 | -0.055 | 0.112 |
Neighborhood_CollgCr | -0.0155 | 0.033 | -0.464 | 0.643 | -0.081 | 0.050 |
Neighborhood_Crawfor | 0.1033 | 0.040 | 2.605 | 0.009 | 0.026 | 0.181 |
Neighborhood_Edwards | -0.0814 | 0.037 | -2.194 | 0.028 | -0.154 | -0.009 |
Neighborhood_Gilbert | -0.0076 | 0.035 | -0.215 | 0.830 | -0.077 | 0.062 |
Neighborhood_IDOTRR | -0.0196 | 0.050 | -0.392 | 0.695 | -0.118 | 0.078 |
Neighborhood_MeadowV | -0.1449 | 0.056 | -2.567 | 0.010 | -0.256 | -0.034 |
Neighborhood_Mitchel | -0.0543 | 0.038 | -1.442 | 0.149 | -0.128 | 0.020 |
Neighborhood_NAmes | -0.0405 | 0.036 | -1.120 | 0.263 | -0.111 | 0.030 |
Neighborhood_NPkVill | -9.05e-05 | 0.064 | -0.001 | 0.999 | -0.126 | 0.126 |
Neighborhood_NWAmes | -0.0399 | 0.037 | -1.081 | 0.280 | -0.112 | 0.033 |
Neighborhood_NoRidge | 0.0437 | 0.039 | 1.123 | 0.262 | -0.033 | 0.120 |
Neighborhood_NridgHt | 0.0832 | 0.034 | 2.410 | 0.016 | 0.015 | 0.151 |
Neighborhood_OldTown | -0.0448 | 0.045 | -1.003 | 0.316 | -0.133 | 0.043 |
Neighborhood_SWISU | -0.0088 | 0.045 | -0.196 | 0.845 | -0.096 | 0.079 |
Neighborhood_Sawyer | -0.0277 | 0.037 | -0.740 | 0.459 | -0.101 | 0.046 |
Neighborhood_SawyerW | 0.0017 | 0.036 | 0.046 | 0.963 | -0.069 | 0.072 |
Neighborhood_Somerst | 0.0252 | 0.042 | 0.608 | 0.543 | -0.056 | 0.107 |
Neighborhood_StoneBr | 0.1444 | 0.038 | 3.776 | 0.000 | 0.069 | 0.219 |
Neighborhood_Timber | 0.0083 | 0.037 | 0.222 | 0.825 | -0.065 | 0.081 |
Neighborhood_Veenker | 0.0490 | 0.048 | 1.019 | 0.308 | -0.045 | 0.143 |
PavedDrive_P | -0.0118 | 0.026 | -0.460 | 0.645 | -0.062 | 0.039 |
PavedDrive_Y | 0.0112 | 0.016 | 0.705 | 0.481 | -0.020 | 0.043 |
PoolQC_Fa | -0.1550 | 0.188 | -0.826 | 0.409 | -0.523 | 0.213 |
PoolQC_Gd | 0.0164 | 0.170 | 0.096 | 0.923 | -0.317 | 0.349 |
PoolQC_None | 0.7255 | 0.572 | 1.269 | 0.205 | -0.396 | 1.847 |
RoofMatl_CompShg | 2.4966 | 0.241 | 10.361 | 0.000 | 2.024 | 2.969 |
RoofMatl_Membran | 2.9315 | 0.286 | 10.245 | 0.000 | 2.370 | 3.493 |
RoofMatl_Metal | 2.7836 | 0.285 | 9.771 | 0.000 | 2.225 | 3.343 |
RoofMatl_Roll | 2.4899 | 0.268 | 9.286 | 0.000 | 1.964 | 3.016 |
RoofMatl_Tar&Grv | 2.5244 | 0.259 | 9.755 | 0.000 | 2.017 | 3.032 |
RoofMatl_WdShake | 2.4350 | 0.252 | 9.678 | 0.000 | 1.941 | 2.929 |
RoofMatl_WdShngl | 2.5513 | 0.245 | 10.417 | 0.000 | 2.071 | 3.032 |
RoofStyle_Gable | 0.0139 | 0.085 | 0.164 | 0.869 | -0.152 | 0.180 |
RoofStyle_Gambrel | -0.0133 | 0.093 | -0.143 | 0.886 | -0.195 | 0.169 |
RoofStyle_Hip | 0.0150 | 0.085 | 0.176 | 0.860 | -0.152 | 0.182 |
RoofStyle_Mansard | 0.0661 | 0.099 | 0.671 | 0.503 | -0.127 | 0.259 |
RoofStyle_Shed | 0.5501 | 0.176 | 3.127 | 0.002 | 0.205 | 0.895 |
SaleCondition_AdjLand | 0.1137 | 0.067 | 1.700 | 0.089 | -0.017 | 0.245 |
SaleCondition_Alloca | 0.0372 | 0.042 | 0.896 | 0.371 | -0.044 | 0.119 |
SaleCondition_Family | 0.0140 | 0.028 | 0.502 | 0.615 | -0.041 | 0.069 |
SaleCondition_Normal | 0.0599 | 0.013 | 4.500 | 0.000 | 0.034 | 0.086 |
SaleCondition_Partial | 0.0095 | 0.068 | 0.140 | 0.889 | -0.124 | 0.143 |
SaleType_CWD | 0.0582 | 0.059 | 0.987 | 0.324 | -0.057 | 0.174 |
SaleType_Con | 0.0991 | 0.080 | 1.233 | 0.218 | -0.059 | 0.257 |
SaleType_ConLD | 0.1384 | 0.045 | 3.052 | 0.002 | 0.049 | 0.227 |
SaleType_ConLI | -0.0335 | 0.053 | -0.633 | 0.527 | -0.137 | 0.070 |
SaleType_ConLw | 0.0075 | 0.056 | 0.135 | 0.893 | -0.102 | 0.117 |
SaleType_New | 0.0780 | 0.071 | 1.101 | 0.271 | -0.061 | 0.217 |
SaleType_Oth | 0.0469 | 0.066 | 0.707 | 0.480 | -0.083 | 0.177 |
SaleType_WD | -0.0141 | 0.019 | -0.736 | 0.462 | -0.052 | 0.024 |
Street_Pave | 0.0973 | 0.057 | 1.693 | 0.091 | -0.015 | 0.210 |
Utilities_NoSeWa | -0.2753 | 0.131 | -2.109 | 0.035 | -0.531 | -0.019 |
MSSubClass_30 | -0.0578 | 0.022 | -2.661 | 0.008 | -0.100 | -0.015 |
MSSubClass_40 | -0.0926 | 0.080 | -1.151 | 0.250 | -0.251 | 0.065 |
MSSubClass_45 | -0.2325 | 0.115 | -2.022 | 0.043 | -0.458 | -0.007 |
MSSubClass_50 | 0.0065 | 0.040 | 0.163 | 0.871 | -0.072 | 0.085 |
MSSubClass_60 | -0.0384 | 0.035 | -1.086 | 0.278 | -0.108 | 0.031 |
MSSubClass_70 | 0.0112 | 0.038 | 0.295 | 0.768 | -0.063 | 0.086 |
MSSubClass_75 | -0.0544 | 0.075 | -0.729 | 0.466 | -0.201 | 0.092 |
MSSubClass_80 | -0.0577 | 0.058 | -0.993 | 0.321 | -0.172 | 0.056 |
MSSubClass_85 | -0.0048 | 0.049 | -0.098 | 0.922 | -0.101 | 0.091 |
MSSubClass_90 | -0.0101 | 0.017 | -0.596 | 0.551 | -0.043 | 0.023 |
MSSubClass_120 | -0.0473 | 0.067 | -0.706 | 0.480 | -0.179 | 0.084 |
MSSubClass_150 | 0 | 0 | nan | nan | 0 | 0 |
MSSubClass_160 | -0.1498 | 0.081 | -1.860 | 0.063 | -0.308 | 0.008 |
MSSubClass_180 | -0.0672 | 0.090 | -0.743 | 0.457 | -0.245 | 0.110 |
MSSubClass_190 | 0.0388 | 0.128 | 0.303 | 0.762 | -0.213 | 0.290 |
Omnibus: | 395.732 | Durbin-Watson: | 1.925 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 5634.954 |
Skew: | -0.857 | Prob(JB): | 0.00 |
Kurtosis: | 12.474 | Cond. No. | 1.30e+16 |
y_pred_OLS = est_price.predict(dfXTrain)
rmse_OLS = mean_squared_error(target, y_pred_OLS)**0.5
r2_OLS = r2_score(target, y_pred_OLS)
print 'RMSE:', rmse_OLS
print 'R2 Score:', r2_OLS
RMSE: 0.0934055998265 R2 Score: 0.945317934136
# Linear Regression
linreg = linear_model.LinearRegression()
linreg.fit(X_train, y_train)
y_pred_lin = linreg.predict(X_test)
rmse_lin = mean_squared_error(y_test, y_pred_lin)**0.5
r2_lin = r2_score(y_test, y_pred_lin)
print 'RMSE:', rmse_lin
print 'R2 Score:', r2_lin
RMSE: 0.193812161543 R2 Score: 0.75166313022
plt.scatter(y_pred_lin, y_test, alpha=0.75)
plt.plot([11,13], [11,13], 'r--')
plt.xlabel('Predicted Price (Natural Log)')
plt.ylabel('Actual Price (Natural Log)')
plt.title('Linear Regression Model')
overlay = 'R^2 is: {}\nRMSE is: {}'.format(r2_lin, rmse_lin)
plt.annotate(s=overlay, xy=(12.1, 11.1))
plt.show()
print 'Estimated Intercept Coefficient:', linreg.intercept_
pd.DataFrame(zip(X_train.columns, linreg.coef_), columns=['Features', 'Estimated Coefficients'])
Estimated Intercept Coefficient: 9.28593348031
Features | Estimated Coefficients | |
---|---|---|
0 | const | 8.742466e-12 |
1 | 1stFlrSF | 2.063752e-05 |
2 | 2ndFlrSF | 3.331042e-05 |
3 | 3SsnPorch | 3.413177e-04 |
4 | BedroomAbvGr | 1.312744e-02 |
5 | BsmtFinSF1 | 5.463368e-05 |
6 | BsmtFinSF2 | 3.186460e-05 |
7 | BsmtFullBath | 2.723053e-02 |
8 | BsmtHalfBath | 1.792452e-02 |
9 | BsmtUnfSF | 8.191365e-06 |
10 | EnclosedPorch | 1.518203e-04 |
11 | Fireplaces | 9.869554e-03 |
12 | FullBath | 1.361272e-02 |
13 | GarageArea | 1.389872e-04 |
14 | GarageCars | 2.277805e-02 |
15 | GarageYrBlt | 1.250351e-04 |
16 | GrLivArea | 1.818444e-04 |
17 | HalfBath | 3.129251e-02 |
18 | KitchenAbvGr | -1.949635e-02 |
19 | LotArea | 3.143372e-06 |
20 | LotFrontage | -2.497678e-05 |
21 | LowQualFinSF | 1.278963e-04 |
22 | MasVnrArea | 4.733941e-05 |
23 | MiscVal | 2.343580e-06 |
24 | MoSold | -3.505495e-04 |
25 | OpenPorchSF | 1.093257e-05 |
26 | OverallCond | 3.479756e-02 |
27 | OverallQual | 4.127441e-02 |
28 | PoolArea | 4.940483e-03 |
29 | ScreenPorch | 3.054257e-04 |
... | ... | ... |
244 | SaleCondition_AdjLand | 9.445715e-02 |
245 | SaleCondition_Alloca | -1.197131e-02 |
246 | SaleCondition_Family | 1.600923e-02 |
247 | SaleCondition_Normal | 4.799301e-02 |
248 | SaleCondition_Partial | 9.378706e-03 |
249 | SaleType_CWD | 9.967190e-02 |
250 | SaleType_Con | 3.461150e-02 |
251 | SaleType_ConLD | 8.578715e-02 |
252 | SaleType_ConLI | -7.662338e-02 |
253 | SaleType_ConLw | 1.342122e-02 |
254 | SaleType_New | 5.267556e-02 |
255 | SaleType_Oth | 6.203358e-02 |
256 | SaleType_WD | -2.200300e-02 |
257 | Street_Pave | 2.881701e-02 |
258 | Utilities_NoSeWa | -3.439280e-01 |
259 | MSSubClass_30 | -3.079691e-02 |
260 | MSSubClass_40 | 9.658902e-02 |
261 | MSSubClass_45 | -2.657223e-01 |
262 | MSSubClass_50 | 1.763725e-02 |
263 | MSSubClass_60 | -4.863564e-02 |
264 | MSSubClass_70 | -8.369859e-03 |
265 | MSSubClass_75 | -3.426370e-02 |
266 | MSSubClass_80 | -7.726157e-02 |
267 | MSSubClass_85 | -4.149958e-02 |
268 | MSSubClass_90 | -2.269921e-02 |
269 | MSSubClass_120 | -8.155566e-03 |
270 | MSSubClass_150 | 0.000000e+00 |
271 | MSSubClass_160 | -1.305526e-01 |
272 | MSSubClass_180 | -4.782924e-02 |
273 | MSSubClass_190 | 5.588236e-04 |
274 rows × 2 columns
# Ridge Regression
ridgereg = linear_model.Ridge()
ridgereg.fit(X_train, y_train)
y_pred_ridge = ridgereg.predict(X_test)
rmse_ridge = mean_squared_error(y_test, y_pred_ridge)**0.5
r2_ridge = r2_score(y_test, y_pred_ridge)
print 'RMSE:', rmse_ridge
print 'R2 Score:', r2_ridge
RMSE: 0.125338089334 R2 Score: 0.896140723647
plt.scatter(y_pred_ridge, y_test, alpha=0.75)
plt.plot([11,13], [11,13], 'r--')
plt.xlabel('Predicted Price (Natural Log)')
plt.ylabel('Actual Price (Natural Log)')
plt.title('Ridge Regression Model')
overlay = 'R^2 is: {}\nRMSE is: {}'.format(r2_ridge, rmse_ridge)
plt.annotate(s=overlay, xy=(12.1, 11.1))
plt.show()
# Lasso Regression
lassoreg = linear_model.Lasso()
lassoreg.fit(X_train, y_train)
y_pred_lasso = lassoreg.predict(X_test)
rmse_lasso = mean_squared_error(y_test, y_pred_lasso)**0.5
r2_lasso = r2_score(y_test, y_pred_lasso)
#print 'R^2 is:', lassoreg.score(X_test, y_test)
print 'RMSE:', rmse_lasso
print 'R2 Score:', r2_lasso
RMSE: 0.180619478801 R2 Score: 0.784320771285
plt.scatter(y_pred_lasso, y_test, alpha=0.75)
plt.plot([11,13], [11,13], 'r--')
plt.xlabel('Predicted Price (Natural Log)')
plt.ylabel('Actual Price (Natural Log)')
plt.title('Lasso Regression Model')
overlay = 'R^2 is: {}\nRMSE is: {}'.format(r2_lasso, rmse_lasso)
plt.annotate(s=overlay, xy=(12.1, 11.1))
plt.show()
# Create Submission DataFrame per Algorithm
submission_OLS = pd.DataFrame()
submission_lin = pd.DataFrame()
submission_ridge = pd.DataFrame()
submission_lasso = pd.DataFrame()
# Include 'Id' Column in submission per Algorithm
submission_OLS['Id'] = dfFinalTest.Id
submission_lin['Id'] = dfFinalTest.Id
submission_ridge['Id'] = dfFinalTest.Id
submission_lasso['Id'] = dfFinalTest.Id
# Make Predictions without 'Id' Column per Algorithm
predictions_OLS = est_price.predict(dfFinalTest.drop(['Id'],axis=1))
predictions_lin = linreg.predict(dfFinalTest.drop(['Id'],axis=1))
predictions_ridge = ridgereg.predict(dfFinalTest.drop(['Id'],axis=1))
predictions_lasso = lassoreg.predict(dfFinalTest.drop(['Id'],axis=1))
# Convert prediction back out of natural log per Algorithm
final_predictions_OLS = np.exp(predictions_OLS)
final_predictions_lin = np.exp(predictions_lin)
final_predictions_ridge = np.exp(predictions_ridge)
final_predictions_lasso = np.exp(predictions_lasso)
# Place predictions in submission DataFrame per Algorithm
submission_OLS['SalePrice'] = final_predictions_OLS
submission_lin['SalePrice'] = final_predictions_lin
submission_ridge['SalePrice'] = final_predictions_ridge
submission_lasso['SalePrice'] = final_predictions_lasso
# Convert DataFrame to CSV per Algorithm
submission_OLS.to_csv('submission_OLS.csv', index=False)
submission_lin.to_csv('submission_lin.csv', index=False)
submission_ridge.to_csv('submission_ridge.csv', index=False)
submission_lasso.to_csv('submission_lasso.csv', index=False)