#import all necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.cross_validation import train_test_split
C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20. "This module will be removed in 0.20.", DeprecationWarning)
ratings = pd.read_csv('u.data',header=None,sep='\t') #'\t' = separate on the slash and then tab
ratings.head()
0 | 1 | 2 | 3 | |
---|---|---|---|---|
0 | 0 | 50 | 5 | 881250949 |
1 | 0 | 172 | 5 | 881250949 |
2 | 0 | 133 | 1 | 881250949 |
3 | 196 | 242 | 3 | 881250949 |
4 | 186 | 302 | 3 | 891717742 |
r_cols = ['user_id','movie_id','rating']
ratings = pd.read_csv('u.data',sep='\t',names=r_cols,usecols=range(3))
print ratings.head()
m_cols=['movie_id','title']
movies = pd.read_csv('u.item',sep='|',names=m_cols,usecols=range(2))
print movies.head()
user_id movie_id rating 0 0 50 5 1 0 172 5 2 0 133 1 3 196 242 3 4 186 302 3 movie_id title 0 1 Toy Story (1995) 1 2 GoldenEye (1995) 2 3 Four Rooms (1995) 3 4 Get Shorty (1995) 4 5 Copycat (1995)
# Merging the dataframes
df = pd.merge(ratings,movies,on='movie_id')
df.head()
#df['movie_id'].unique()
user_id | movie_id | rating | title | |
---|---|---|---|---|
0 | 0 | 50 | 5 | Star Wars (1977) |
1 | 290 | 50 | 5 | Star Wars (1977) |
2 | 79 | 50 | 4 | Star Wars (1977) |
3 | 2 | 50 | 5 | Star Wars (1977) |
4 | 8 | 50 | 5 | Star Wars (1977) |
userRatings = df.pivot_table(index=['user_id'],columns=['title'],values='rating')
userRatings.head()
title | 'Til There Was You (1997) | 1-900 (1994) | 101 Dalmatians (1996) | 12 Angry Men (1957) | 187 (1997) | 2 Days in the Valley (1996) | 20,000 Leagues Under the Sea (1954) | 2001: A Space Odyssey (1968) | 3 Ninjas: High Noon At Mega Mountain (1998) | 39 Steps, The (1935) | ... | Yankee Zulu (1994) | Year of the Horse (1997) | You So Crazy (1994) | Young Frankenstein (1974) | Young Guns (1988) | Young Guns II (1990) | Young Poisoner's Handbook, The (1995) | Zeus and Roxanne (1997) | unknown | � k�ldum klaka (Cold Fever) (1994) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
user_id | |||||||||||||||||||||
0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | NaN | NaN | 2.0 | 5.0 | NaN | NaN | 3.0 | 4.0 | NaN | NaN | ... | NaN | NaN | NaN | 5.0 | 3.0 | NaN | NaN | NaN | 4.0 | NaN |
2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | NaN | NaN | NaN | NaN | 2.0 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 1664 columns
corrMatrix = userRatings.corr()
corrMatrix.head()
title | 'Til There Was You (1997) | 1-900 (1994) | 101 Dalmatians (1996) | 12 Angry Men (1957) | 187 (1997) | 2 Days in the Valley (1996) | 20,000 Leagues Under the Sea (1954) | 2001: A Space Odyssey (1968) | 3 Ninjas: High Noon At Mega Mountain (1998) | 39 Steps, The (1935) | ... | Yankee Zulu (1994) | Year of the Horse (1997) | You So Crazy (1994) | Young Frankenstein (1974) | Young Guns (1988) | Young Guns II (1990) | Young Poisoner's Handbook, The (1995) | Zeus and Roxanne (1997) | unknown | � k�ldum klaka (Cold Fever) (1994) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
title | |||||||||||||||||||||
'Til There Was You (1997) | 1.0 | NaN | -1.000000 | -0.500000 | -0.500000 | 0.522233 | NaN | -0.426401 | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1-900 (1994) | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | -0.981981 | NaN | NaN | ... | NaN | NaN | NaN | -0.944911 | NaN | NaN | NaN | NaN | NaN | NaN |
101 Dalmatians (1996) | -1.0 | NaN | 1.000000 | -0.049890 | 0.269191 | 0.048973 | 0.266928 | -0.043407 | NaN | 0.111111 | ... | NaN | -1.000000 | NaN | 0.158840 | 0.119234 | 0.680414 | 0.000000 | 0.707107 | NaN | NaN |
12 Angry Men (1957) | -0.5 | NaN | -0.049890 | 1.000000 | 0.666667 | 0.256625 | 0.274772 | 0.178848 | NaN | 0.457176 | ... | NaN | NaN | NaN | 0.096546 | 0.068944 | -0.361961 | 0.144338 | 1.000000 | 1.0 | NaN |
187 (1997) | -0.5 | NaN | 0.269191 | 0.666667 | 1.000000 | 0.596644 | NaN | -0.554700 | NaN | 1.000000 | ... | NaN | 0.866025 | NaN | 0.455233 | -0.500000 | 0.500000 | 0.475327 | NaN | NaN | NaN |
5 rows × 1664 columns
userRatings[['12 Angry Men (1957)',"'Til There Was You (1997)"]].corr()
title | 12 Angry Men (1957) | 'Til There Was You (1997) |
---|---|---|
title | ||
12 Angry Men (1957) | 1.0 | -0.5 |
'Til There Was You (1997) | -0.5 | 1.0 |
corrMatrix = userRatings.corr(method='pearson',min_periods=100) #pearson correlation used for continuous variables
corrMatrix.head(10)
title | 'Til There Was You (1997) | 1-900 (1994) | 101 Dalmatians (1996) | 12 Angry Men (1957) | 187 (1997) | 2 Days in the Valley (1996) | 20,000 Leagues Under the Sea (1954) | 2001: A Space Odyssey (1968) | 3 Ninjas: High Noon At Mega Mountain (1998) | 39 Steps, The (1935) | ... | Yankee Zulu (1994) | Year of the Horse (1997) | You So Crazy (1994) | Young Frankenstein (1974) | Young Guns (1988) | Young Guns II (1990) | Young Poisoner's Handbook, The (1995) | Zeus and Roxanne (1997) | unknown | � k�ldum klaka (Cold Fever) (1994) |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
title | |||||||||||||||||||||
'Til There Was You (1997) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1-900 (1994) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
101 Dalmatians (1996) | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
12 Angry Men (1957) | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
187 (1997) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 Days in the Valley (1996) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
20,000 Leagues Under the Sea (1954) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2001: A Space Odyssey (1968) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | ... | NaN | NaN | NaN | -0.001307 | NaN | NaN | NaN | NaN | NaN | NaN |
3 Ninjas: High Noon At Mega Mountain (1998) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
39 Steps, The (1935) | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
10 rows × 1664 columns
userRatings.loc[0] #1st user
title 'Til There Was You (1997) NaN 1-900 (1994) NaN 101 Dalmatians (1996) NaN 12 Angry Men (1957) NaN 187 (1997) NaN 2 Days in the Valley (1996) NaN 20,000 Leagues Under the Sea (1954) NaN 2001: A Space Odyssey (1968) NaN 3 Ninjas: High Noon At Mega Mountain (1998) NaN 39 Steps, The (1935) NaN 8 1/2 (1963) NaN 8 Heads in a Duffel Bag (1997) NaN 8 Seconds (1994) NaN A Chef in Love (1996) NaN Above the Rim (1994) NaN Absolute Power (1997) NaN Abyss, The (1989) NaN Ace Ventura: Pet Detective (1994) NaN Ace Ventura: When Nature Calls (1995) NaN Across the Sea of Time (1995) NaN Addams Family Values (1993) NaN Addicted to Love (1997) NaN Addiction, The (1995) NaN Adventures of Pinocchio, The (1996) NaN Adventures of Priscilla, Queen of the Desert, The (1994) NaN Adventures of Robin Hood, The (1938) NaN Affair to Remember, An (1957) NaN African Queen, The (1951) NaN Afterglow (1997) NaN Age of Innocence, The (1993) NaN .. Window to Paris (1994) NaN Wings of Courage (1995) NaN Wings of Desire (1987) NaN Wings of the Dove, The (1997) NaN Winnie the Pooh and the Blustery Day (1968) NaN Winter Guest, The (1997) NaN Wishmaster (1997) NaN With Honors (1994) NaN Withnail and I (1987) NaN Witness (1985) NaN Wizard of Oz, The (1939) NaN Wolf (1994) NaN Woman in Question, The (1950) NaN Women, The (1939) NaN Wonderful, Horrible Life of Leni Riefenstahl, The (1993) NaN Wonderland (1997) NaN Wooden Man's Bride, The (Wu Kui) (1994) NaN World of Apu, The (Apur Sansar) (1959) NaN Wrong Trousers, The (1993) NaN Wyatt Earp (1994) NaN Yankee Zulu (1994) NaN Year of the Horse (1997) NaN You So Crazy (1994) NaN Young Frankenstein (1974) NaN Young Guns (1988) NaN Young Guns II (1990) NaN Young Poisoner's Handbook, The (1995) NaN Zeus and Roxanne (1997) NaN unknown NaN � k�ldum klaka (Cold Fever) (1994) NaN Name: 0, Length: 1664, dtype: float64
myRatings = userRatings.loc[0].dropna()
myRatings
title Empire Strikes Back, The (1980) 5.0 Gone with the Wind (1939) 1.0 Star Wars (1977) 5.0 Name: 0, dtype: float64
myRatings.index
Index([u'Empire Strikes Back, The (1980)', u'Gone with the Wind (1939)', u'Star Wars (1977)'], dtype='object', name=u'title')
myRatings.values
array([ 5., 1., 5.])
simCandidates = pd.Series()
for i in range(0,len(myRatings.index)):
print "Adding sims for "+ myRatings.index[i] +'...'
# Retrieve similar movies to this one that I rated
sims = corrMatrix[myRatings.index[i]].dropna()
print sims
# Now scale its similarity by how well I rated this movie
sims = sims.map(lambda x: x*myRatings[i])
# Add the score to the list of similarity candidates
simCandidates = simCandidates.append(sims)
# Glance at our results so far:
print simCandidates.head(20)
Adding sims for Empire Strikes Back, The (1980)... title 2001: A Space Odyssey (1968) 0.141598 Abyss, The (1989) 0.277867 African Queen, The (1951) 0.231657 Air Force One (1997) 0.165620 Aladdin (1992) 0.311063 Alien (1979) 0.201669 Aliens (1986) 0.292577 Amadeus (1984) 0.149328 American President, The (1995) 0.213057 Annie Hall (1977) -0.002235 Apocalypse Now (1979) 0.084026 Apollo 13 (1995) 0.196901 Babe (1995) 0.109333 Back to the Future (1985) 0.345285 Batman (1989) 0.300169 Batman Forever (1995) 0.112007 Batman Returns (1992) 0.133523 Beauty and the Beast (1991) 0.157386 Ben-Hur (1959) 0.210589 Birdcage, The (1996) 0.092188 Birds, The (1963) 0.067393 Blade Runner (1982) 0.193791 Blues Brothers, The (1980) 0.219766 Boot, Das (1981) 0.164315 Bram Stoker's Dracula (1992) 0.142719 Braveheart (1995) 0.257074 Brazil (1985) -0.089110 Bridge on the River Kwai, The (1957) 0.356743 Broken Arrow (1996) 0.193709 Butch Cassidy and the Sundance Kid (1969) 0.244651 ... Star Trek: Generations (1994) 0.302635 Star Trek: The Motion Picture (1979) 0.180343 Star Trek: The Wrath of Khan (1982) 0.311410 Star Wars (1977) 0.748353 Stargate (1994) 0.100042 Sting, The (1973) 0.274387 Taxi Driver (1976) 0.115306 Terminator 2: Judgment Day (1991) 0.333532 Terminator, The (1984) 0.280766 This Is Spinal Tap (1984) -0.046499 Time to Kill, A (1996) 0.019384 Tin Cup (1996) 0.205358 Titanic (1997) 0.184539 To Kill a Mockingbird (1962) 0.097268 Top Gun (1986) 0.162103 Toy Story (1995) 0.232355 Trainspotting (1996) 0.051306 True Lies (1994) 0.246698 Truth About Cats & Dogs, The (1996) 0.095777 Twelve Monkeys (1995) 0.115975 Twister (1996) -0.010236 Under Siege (1992) 0.139255 Unforgiven (1992) 0.144706 Usual Suspects, The (1995) 0.208092 Vertigo (1958) 0.140744 When Harry Met Sally... (1989) 0.154222 While You Were Sleeping (1995) 0.266557 Willy Wonka and the Chocolate Factory (1971) 0.191770 Wizard of Oz, The (1939) 0.287675 Young Frankenstein (1974) 0.185887 Name: Empire Strikes Back, The (1980), Length: 197, dtype: float64 Adding sims for Gone with the Wind (1939)... title 2001: A Space Odyssey (1968) 0.004890 Alien (1979) -0.057730 Aliens (1986) 0.056156 Amadeus (1984) 0.323631 Apollo 13 (1995) 0.030318 Back to the Future (1985) 0.257319 Blade Runner (1982) 0.048333 Blues Brothers, The (1980) 0.124627 Braveheart (1995) 0.121169 Casablanca (1942) 0.291137 Dances with Wolves (1990) 0.140462 Dead Poets Society (1989) 0.139189 E.T. the Extra-Terrestrial (1982) 0.361463 Empire Strikes Back, The (1980) 0.135687 Fargo (1996) 0.124841 Field of Dreams (1989) 0.153802 Fish Called Wanda, A (1988) -0.006385 Forrest Gump (1994) 0.095262 Fugitive, The (1993) -0.017365 Godfather, The (1972) 0.065994 Gone with the Wind (1939) 1.000000 Graduate, The (1967) 0.326215 Groundhog Day (1993) 0.079423 Independence Day (ID4) (1996) 0.180982 Indiana Jones and the Last Crusade (1989) 0.171311 It's a Wonderful Life (1946) 0.305688 Jaws (1975) 0.133886 Jurassic Park (1993) 0.138319 Lion King, The (1994) 0.185623 M*A*S*H (1970) 0.313956 Mary Poppins (1964) 0.244310 Monty Python and the Holy Grail (1974) 0.180938 One Flew Over the Cuckoo's Nest (1975) 0.072976 Princess Bride, The (1987) -0.012742 Psycho (1960) 0.193419 Pulp Fiction (1994) 0.034826 Raiders of the Lost Ark (1981) 0.145818 Raising Arizona (1987) -0.160481 Return of the Jedi (1983) 0.209247 Schindler's List (1993) 0.344765 Silence of the Lambs, The (1991) 0.032078 Sound of Music, The (1965) 0.244235 Star Wars (1977) 0.129207 Sting, The (1973) 0.120216 Terminator 2: Judgment Day (1991) 0.133215 Terminator, The (1984) 0.066005 Toy Story (1995) 0.199741 When Harry Met Sally... (1989) 0.197863 Willy Wonka and the Chocolate Factory (1971) 0.053934 Wizard of Oz, The (1939) 0.430219 Name: Gone with the Wind (1939), dtype: float64 Adding sims for Star Wars (1977)... title 12 Angry Men (1957) 0.184289 2001: A Space Odyssey (1968) 0.230884 Absolute Power (1997) 0.085440 Abyss, The (1989) 0.203709 African Queen, The (1951) 0.230540 Air Force One (1997) 0.113164 Aladdin (1992) 0.191621 Alien (1979) 0.248991 Aliens (1986) 0.254444 Amadeus (1984) 0.190280 American President, The (1995) 0.113189 Annie Hall (1977) -0.100120 Apocalypse Now (1979) 0.028576 Apollo 13 (1995) 0.222006 Army of Darkness (1993) 0.103842 Austin Powers: International Man of Mystery (1997) 0.377433 Babe (1995) 0.184512 Back to the Future (1985) 0.274839 Batman (1989) 0.289344 Batman Forever (1995) 0.113667 Batman Returns (1992) 0.059691 Beauty and the Beast (1991) 0.106199 Beavis and Butt-head Do America (1996) 0.132943 Ben-Hur (1959) 0.043553 Big Night (1996) 0.152359 Birdcage, The (1996) 0.060544 Birds, The (1963) 0.083739 Blade Runner (1982) 0.196715 Blues Brothers, The (1980) 0.192560 Bonnie and Clyde (1967) 0.084781 ... Swingers (1996) 0.289310 Taxi Driver (1976) 0.029863 Terminator 2: Judgment Day (1991) 0.207599 Terminator, The (1984) 0.262255 That Thing You Do! (1996) 0.190198 This Is Spinal Tap (1984) -0.080796 Time to Kill, A (1996) -0.042790 Tin Cup (1996) 0.117280 Titanic (1997) 0.081928 To Kill a Mockingbird (1962) 0.129012 Tomorrow Never Dies (1997) 0.255210 Top Gun (1986) 0.174815 Toy Story (1995) 0.180020 Trainspotting (1996) 0.021504 True Lies (1994) 0.198306 Truth About Cats & Dogs, The (1996) 0.145821 Twelve Monkeys (1995) 0.155530 Twister (1996) 0.038232 Under Siege (1992) 0.026704 Unforgiven (1992) 0.055059 Usual Suspects, The (1995) 0.211075 Vertigo (1958) 0.080635 Volcano (1997) 0.045947 What's Eating Gilbert Grape (1993) -0.060495 When Harry Met Sally... (1989) 0.119849 While You Were Sleeping (1995) 0.156119 Willy Wonka and the Chocolate Factory (1971) 0.221902 Wizard of Oz, The (1939) 0.266335 Wrong Trousers, The (1993) 0.216204 Young Frankenstein (1974) 0.192589 Name: Star Wars (1977), Length: 268, dtype: float64 2001: A Space Odyssey (1968) 0.707991 Abyss, The (1989) 1.389334 African Queen, The (1951) 1.158286 Air Force One (1997) 0.828101 Aladdin (1992) 1.555313 Alien (1979) 1.008343 Aliens (1986) 1.462883 Amadeus (1984) 0.746641 American President, The (1995) 1.065284 Annie Hall (1977) -0.011175 Apocalypse Now (1979) 0.420130 Apollo 13 (1995) 0.984504 Babe (1995) 0.546663 Back to the Future (1985) 1.726427 Batman (1989) 1.500844 Batman Forever (1995) 0.560036 Batman Returns (1992) 0.667613 Beauty and the Beast (1991) 0.786928 Ben-Hur (1959) 1.052943 Birdcage, The (1996) 0.460942 dtype: float64
print 'sorting...'
simCandidates.sort_values(inplace=True,ascending=False)
print simCandidates.head(20)
sorting... Empire Strikes Back, The (1980) 5.000000 Star Wars (1977) 5.000000 Empire Strikes Back, The (1980) 3.741763 Star Wars (1977) 3.741763 Return of the Jedi (1983) 3.606146 Return of the Jedi (1983) 3.362779 Raiders of the Lost Ark (1981) 2.693297 Raiders of the Lost Ark (1981) 2.680586 Austin Powers: International Man of Mystery (1997) 1.887164 Sting, The (1973) 1.837692 Bridge on the River Kwai, The (1957) 1.783717 Indiana Jones and the Last Crusade (1989) 1.750535 Cinderella (1950) 1.749598 Back to the Future (1985) 1.726427 Terminator 2: Judgment Day (1991) 1.667662 Frighteners, The (1996) 1.663647 Field of Dreams (1989) 1.642076 Chasing Amy (1997) 1.633938 L.A. Confidential (1997) 1.595323 Dumbo (1941) 1.588281 dtype: float64
simCandidates.index
Index([u'Empire Strikes Back, The (1980)', u'Star Wars (1977)', u'Empire Strikes Back, The (1980)', u'Star Wars (1977)', u'Return of the Jedi (1983)', u'Return of the Jedi (1983)', u'Raiders of the Lost Ark (1981)', u'Raiders of the Lost Ark (1981)', u'Austin Powers: International Man of Mystery (1997)', u'Sting, The (1973)', ... u'Courage Under Fire (1996)', u'What's Eating Gilbert Grape (1993)', u'Murder at 1600 (1997)', u'This Is Spinal Tap (1984)', u'Brazil (1985)', u'Real Genius (1985)', u'Annie Hall (1977)', u'Remains of the Day, The (1993)', u'Piano, The (1993)', u'First Wives Club, The (1996)'], dtype='object', length=515)
simCandidates = simCandidates.groupby(simCandidates.index).sum()
simCandidates
12 Angry Men (1957) 0.921447 2001: A Space Odyssey (1968) 1.867302 Absolute Power (1997) 0.427199 Abyss, The (1989) 2.407877 African Queen, The (1951) 2.310987 Air Force One (1997) 1.393921 Aladdin (1992) 2.513417 Alien (1979) 2.195566 Aliens (1986) 2.791258 Amadeus (1984) 2.021675 American President, The (1995) 1.631229 Annie Hall (1977) -0.511775 Apocalypse Now (1979) 0.563009 Apollo 13 (1995) 2.124853 Army of Darkness (1993) 0.519211 Austin Powers: International Man of Mystery (1997) 1.887164 Babe (1995) 1.469222 Back to the Future (1985) 3.357941 Batman (1989) 2.947566 Batman Forever (1995) 1.128369 Batman Returns (1992) 0.966069 Beauty and the Beast (1991) 1.317924 Beavis and Butt-head Do America (1996) 0.664713 Ben-Hur (1959) 1.270708 Big Night (1996) 0.761795 Birdcage, The (1996) 0.763660 Birds, The (1963) 0.755656 Blade Runner (1982) 2.000863 Blues Brothers, The (1980) 2.186257 Bonnie and Clyde (1967) 0.423907 ... Swingers (1996) 1.446551 Taxi Driver (1976) 0.725845 Terminator 2: Judgment Day (1991) 2.838871 Terminator, The (1984) 2.781109 That Thing You Do! (1996) 0.950990 This Is Spinal Tap (1984) -0.636474 Time to Kill, A (1996) -0.117032 Tin Cup (1996) 1.613187 Titanic (1997) 1.332340 To Kill a Mockingbird (1962) 1.131400 Tomorrow Never Dies (1997) 1.276052 Top Gun (1986) 1.684591 Toy Story (1995) 2.261614 Trainspotting (1996) 0.364047 True Lies (1994) 2.225018 Truth About Cats & Dogs, The (1996) 1.207991 Twelve Monkeys (1995) 1.357525 Twister (1996) 0.139979 Under Siege (1992) 0.829797 Unforgiven (1992) 0.998825 Usual Suspects, The (1995) 2.095832 Vertigo (1958) 1.106896 Volcano (1997) 0.229737 What's Eating Gilbert Grape (1993) -0.302473 When Harry Met Sally... (1989) 1.568219 While You Were Sleeping (1995) 2.113377 Willy Wonka and the Chocolate Factory (1971) 2.122292 Wizard of Oz, The (1939) 3.200268 Wrong Trousers, The (1993) 1.081020 Young Frankenstein (1974) 1.892380 Length: 268, dtype: float64
simCandidates.sort_values(inplace=True,ascending=False)
simCandidates.head(10)
Empire Strikes Back, The (1980) 8.877450 Star Wars (1977) 8.870971 Return of the Jedi (1983) 7.178172 Raiders of the Lost Ark (1981) 5.519700 Indiana Jones and the Last Crusade (1989) 3.488028 Bridge on the River Kwai, The (1957) 3.366616 Back to the Future (1985) 3.357941 Sting, The (1973) 3.329843 Cinderella (1950) 3.245412 Field of Dreams (1989) 3.222311 dtype: float64
# Dropping films the user has previously seen
filteredSims = simCandidates.drop(myRatings.index)
filteredSims.head(10)
Return of the Jedi (1983) 7.178172 Raiders of the Lost Ark (1981) 5.519700 Indiana Jones and the Last Crusade (1989) 3.488028 Bridge on the River Kwai, The (1957) 3.366616 Back to the Future (1985) 3.357941 Sting, The (1973) 3.329843 Cinderella (1950) 3.245412 Field of Dreams (1989) 3.222311 Wizard of Oz, The (1939) 3.200268 Dumbo (1941) 2.981645 dtype: float64