Recommendation System - Based on User

Day #9 - Recommendation System Based on User

Recommendation Based on User

In [1]:
#import all necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.cross_validation import train_test_split
C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)
In [2]:
ratings = pd.read_csv('u.data',header=None,sep='\t') #'\t' = separate on the slash and then tab
ratings.head()
Out[2]:
0 1 2 3
0 0 50 5 881250949
1 0 172 5 881250949
2 0 133 1 881250949
3 196 242 3 881250949
4 186 302 3 891717742
In [3]:
r_cols = ['user_id','movie_id','rating']

ratings = pd.read_csv('u.data',sep='\t',names=r_cols,usecols=range(3))
print ratings.head()
m_cols=['movie_id','title']
movies = pd.read_csv('u.item',sep='|',names=m_cols,usecols=range(2))
print movies.head()
   user_id  movie_id  rating
0        0        50       5
1        0       172       5
2        0       133       1
3      196       242       3
4      186       302       3
   movie_id              title
0         1   Toy Story (1995)
1         2   GoldenEye (1995)
2         3  Four Rooms (1995)
3         4  Get Shorty (1995)
4         5     Copycat (1995)
In [4]:
# Merging the dataframes 

df = pd.merge(ratings,movies,on='movie_id')
df.head()
#df['movie_id'].unique()
Out[4]:
user_id movie_id rating title
0 0 50 5 Star Wars (1977)
1 290 50 5 Star Wars (1977)
2 79 50 4 Star Wars (1977)
3 2 50 5 Star Wars (1977)
4 8 50 5 Star Wars (1977)
In [7]:
movieRatings = df.pivot_table(index=['user_id'],columns=['title'],values='rating')
movieRatings.head()
Out[7]:
title 'Til There Was You (1997) 1-900 (1994) 101 Dalmatians (1996) 12 Angry Men (1957) 187 (1997) 2 Days in the Valley (1996) 20,000 Leagues Under the Sea (1954) 2001: A Space Odyssey (1968) 3 Ninjas: High Noon At Mega Mountain (1998) 39 Steps, The (1935) ... Yankee Zulu (1994) Year of the Horse (1997) You So Crazy (1994) Young Frankenstein (1974) Young Guns (1988) Young Guns II (1990) Young Poisoner's Handbook, The (1995) Zeus and Roxanne (1997) unknown � k�ldum klaka (Cold Fever) (1994)
user_id
0 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 NaN NaN 2.0 5.0 NaN NaN 3.0 4.0 NaN NaN ... NaN NaN NaN 5.0 3.0 NaN NaN NaN 4.0 NaN
2 NaN NaN NaN NaN NaN NaN NaN NaN 1.0 NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN 2.0 NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 1664 columns

In [8]:
starWarsRatings = movieRatings['Star Wars (1977)']
starWarsRatings.head(20)
Out[8]:
user_id
0     5.0
1     5.0
2     5.0
3     NaN
4     5.0
5     4.0
6     4.0
7     5.0
8     5.0
9     5.0
10    5.0
11    NaN
12    4.0
13    5.0
14    5.0
15    5.0
16    NaN
17    NaN
18    4.0
19    NaN
Name: Star Wars (1977), dtype: float64
In [10]:
starWarsRatings.value_counts(dropna=False)
Out[10]:
NaN     360
 5.0    326
 4.0    176
 3.0     57
 2.0     16
 1.0      9
Name: Star Wars (1977), dtype: int64
In [13]:
movieRatings[['101 Dalmatians (1996)','Star Wars (1977)']].corr()
Out[13]:
title 101 Dalmatians (1996) Star Wars (1977)
title
101 Dalmatians (1996) 1.000000 0.211132
Star Wars (1977) 0.211132 1.000000
In [14]:
# corrwith calculates the correlation between 1 feature with the rest of the features

movieRatings.corrwith(starWarsRatings)
C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\numpy\lib\function_base.py:2995: RuntimeWarning: Degrees of freedom <= 0 for slice
  c = cov(x, y, rowvar)
C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\numpy\lib\function_base.py:2929: RuntimeWarning: divide by zero encountered in double_scalars
  c *= 1. / np.float64(fact)
Out[14]:
title
'Til There Was You (1997)                                   0.872872
1-900 (1994)                                               -0.645497
101 Dalmatians (1996)                                       0.211132
12 Angry Men (1957)                                         0.184289
187 (1997)                                                  0.027398
2 Days in the Valley (1996)                                 0.066654
20,000 Leagues Under the Sea (1954)                         0.289768
2001: A Space Odyssey (1968)                                0.230884
3 Ninjas: High Noon At Mega Mountain (1998)                      NaN
39 Steps, The (1935)                                        0.106453
8 1/2 (1963)                                               -0.142977
8 Heads in a Duffel Bag (1997)                             -0.577350
8 Seconds (1994)                                           -0.755929
A Chef in Love (1996)                                       0.868599
Above the Rim (1994)                                       -0.645497
Absolute Power (1997)                                       0.085440
Abyss, The (1989)                                           0.203709
Ace Ventura: Pet Detective (1994)                           0.062689
Ace Ventura: When Nature Calls (1995)                       0.094514
Across the Sea of Time (1995)                              -0.132453
Addams Family Values (1993)                                 0.132264
Addicted to Love (1997)                                     0.028173
Addiction, The (1995)                                       0.507093
Adventures of Pinocchio, The (1996)                         0.111616
Adventures of Priscilla, Queen of the Desert, The (1994)    0.054740
Adventures of Robin Hood, The (1938)                        0.144587
Affair to Remember, An (1957)                               0.225451
African Queen, The (1951)                                   0.230540
Afterglow (1997)                                            0.420084
Age of Innocence, The (1993)                               -0.037176
                                                              ...
Window to Paris (1994)                                           NaN
Wings of Courage (1995)                                          NaN
Wings of Desire (1987)                                     -0.104771
Wings of the Dove, The (1997)                               0.000000
Winnie the Pooh and the Blustery Day (1968)                 0.142924
Winter Guest, The (1997)                                    0.353553
Wishmaster (1997)                                          -0.066932
With Honors (1994)                                         -0.111205
Withnail and I (1987)                                       0.083045
Witness (1985)                                                   NaN
Wizard of Oz, The (1939)                                    0.266335
Wolf (1994)                                                 0.113214
Woman in Question, The (1950)                                    NaN
Women, The (1939)                                          -0.175608
Wonderful, Horrible Life of Leni Riefenstahl, The (1993)    0.707107
Wonderland (1997)                                                NaN
Wooden Man's Bride, The (Wu Kui) (1994)                          NaN
World of Apu, The (Apur Sansar) (1959)                     -0.522233
Wrong Trousers, The (1993)                                  0.216204
Wyatt Earp (1994)                                           0.059560
Yankee Zulu (1994)                                               NaN
Year of the Horse (1997)                                   -1.000000
You So Crazy (1994)                                              NaN
Young Frankenstein (1974)                                   0.192589
Young Guns (1988)                                           0.186377
Young Guns II (1990)                                        0.228615
Young Poisoner's Handbook, The (1995)                      -0.007374
Zeus and Roxanne (1997)                                     0.818182
unknown                                                     0.723123
� k�ldum klaka (Cold Fever) (1994)                               NaN
Length: 1664, dtype: float64
In [15]:
similarMovies = movieRatings.corrwith(starWarsRatings)
#print similarRatings
similarMovies = similarMovies.dropna()
similarMovies
Out[15]:
title
'Til There Was You (1997)                                   0.872872
1-900 (1994)                                               -0.645497
101 Dalmatians (1996)                                       0.211132
12 Angry Men (1957)                                         0.184289
187 (1997)                                                  0.027398
2 Days in the Valley (1996)                                 0.066654
20,000 Leagues Under the Sea (1954)                         0.289768
2001: A Space Odyssey (1968)                                0.230884
39 Steps, The (1935)                                        0.106453
8 1/2 (1963)                                               -0.142977
8 Heads in a Duffel Bag (1997)                             -0.577350
8 Seconds (1994)                                           -0.755929
A Chef in Love (1996)                                       0.868599
Above the Rim (1994)                                       -0.645497
Absolute Power (1997)                                       0.085440
Abyss, The (1989)                                           0.203709
Ace Ventura: Pet Detective (1994)                           0.062689
Ace Ventura: When Nature Calls (1995)                       0.094514
Across the Sea of Time (1995)                              -0.132453
Addams Family Values (1993)                                 0.132264
Addicted to Love (1997)                                     0.028173
Addiction, The (1995)                                       0.507093
Adventures of Pinocchio, The (1996)                         0.111616
Adventures of Priscilla, Queen of the Desert, The (1994)    0.054740
Adventures of Robin Hood, The (1938)                        0.144587
Affair to Remember, An (1957)                               0.225451
African Queen, The (1951)                                   0.230540
Afterglow (1997)                                            0.420084
Age of Innocence, The (1993)                               -0.037176
Air Bud (1997)                                              0.012557
                                                              ...
Whole Wide World, The (1996)                                0.000000
Widows' Peak (1994)                                         0.136743
Wild America (1997)                                         0.040656
Wild Bill (1995)                                           -0.286299
Wild Bunch, The (1969)                                     -0.041299
Wild Reeds (1994)                                          -0.377964
Wild Things (1998)                                         -0.259437
William Shakespeare's Romeo and Juliet (1996)               0.098861
Willy Wonka and the Chocolate Factory (1971)                0.221902
Wings of Desire (1987)                                     -0.104771
Wings of the Dove, The (1997)                               0.000000
Winnie the Pooh and the Blustery Day (1968)                 0.142924
Winter Guest, The (1997)                                    0.353553
Wishmaster (1997)                                          -0.066932
With Honors (1994)                                         -0.111205
Withnail and I (1987)                                       0.083045
Wizard of Oz, The (1939)                                    0.266335
Wolf (1994)                                                 0.113214
Women, The (1939)                                          -0.175608
Wonderful, Horrible Life of Leni Riefenstahl, The (1993)    0.707107
World of Apu, The (Apur Sansar) (1959)                     -0.522233
Wrong Trousers, The (1993)                                  0.216204
Wyatt Earp (1994)                                           0.059560
Year of the Horse (1997)                                   -1.000000
Young Frankenstein (1974)                                   0.192589
Young Guns (1988)                                           0.186377
Young Guns II (1990)                                        0.228615
Young Poisoner's Handbook, The (1995)                      -0.007374
Zeus and Roxanne (1997)                                     0.818182
unknown                                                     0.723123
Length: 1410, dtype: float64
In [16]:
similarMovies.sort_values(ascending=False)
Out[16]:
title
No Escape (1994)                                                                     1.000000
Man of the Year (1995)                                                               1.000000
Hollow Reed (1996)                                                                   1.000000
Commandments (1997)                                                                  1.000000
Cosi (1996)                                                                          1.000000
Stripes (1981)                                                                       1.000000
Golden Earrings (1947)                                                               1.000000
Mondo (1996)                                                                         1.000000
Line King: Al Hirschfeld, The (1996)                                                 1.000000
Outlaw, The (1943)                                                                   1.000000
Hurricane Streets (1998)                                                             1.000000
Scarlet Letter, The (1926)                                                           1.000000
Safe Passage (1994)                                                                  1.000000
Good Man in Africa, A (1994)                                                         1.000000
Full Speed (1996)                                                                    1.000000
Old Lady Who Walked in the Sea, The (Vieille qui marchait dans la mer, La) (1991)    1.000000
Star Wars (1977)                                                                     1.000000
Ed's Next Move (1996)                                                                1.000000
Twisted (1996)                                                                       1.000000
Beans of Egypt, Maine, The (1994)                                                    1.000000
Last Time I Saw Paris, The (1954)                                                    1.000000
Maya Lin: A Strong Clear Vision (1994)                                               1.000000
Designated Mourner, The (1997)                                                       0.970725
Albino Alligator (1996)                                                              0.968496
Angel Baby (1995)                                                                    0.962250
Prisoner of the Mountains (Kavkazsky Plennik) (1996)                                 0.927173
Love in the Afternoon (1957)                                                         0.923381
'Til There Was You (1997)                                                            0.872872
A Chef in Love (1996)                                                                0.868599
Guantanamera (1994)                                                                  0.866025
                                                                                       ...
Pushing Hands (1992)                                                                -1.000000
Lamerica (1994)                                                                     -1.000000
Year of the Horse (1997)                                                            -1.000000
Collectionneuse, La (1967)                                                          -1.000000
Dream Man (1995)                                                                    -1.000000
S.F.W. (1994)                                                                       -1.000000
Nightwatch (1997)                                                                   -1.000000
Squeeze (1996)                                                                      -1.000000
Glass Shield, The (1994)                                                            -1.000000
Slingshot, The (1993)                                                               -1.000000
Lover's Knot (1996)                                                                 -1.000000
Tough and Deadly (1995)                                                             -1.000000
Sliding Doors (1998)                                                                -1.000000
Show, The (1995)                                                                    -1.000000
Nil By Mouth (1997)                                                                 -1.000000
Fall (1997)                                                                         -1.000000
Sudden Manhattan (1996)                                                             -1.000000
Salut cousin! (1996)                                                                -1.000000
Neon Bible, The (1995)                                                              -1.000000
Crossfire (1947)                                                                    -1.000000
Love and Death on Long Island (1997)                                                -1.000000
For Ever Mozart (1996)                                                              -1.000000
Swept from the Sea (1997)                                                           -1.000000
Fille seule, La (A Single Girl) (1995)                                              -1.000000
American Dream (1990)                                                               -1.000000
Theodore Rex (1995)                                                                 -1.000000
I Like It Like That (1994)                                                          -1.000000
Two Deaths (1995)                                                                   -1.000000
Roseanna's Grave (For Roseanna) (1997)                                              -1.000000
Frankie Starlight (1995)                                                            -1.000000
Length: 1410, dtype: float64
In [19]:
movieStats = df.groupby('title').agg({'rating':[np.size,np.mean]})
movieStats.head(10)
Out[19]:
rating
size mean
title
'Til There Was You (1997) 9 2.333333
1-900 (1994) 5 2.600000
101 Dalmatians (1996) 109 2.908257
12 Angry Men (1957) 125 4.344000
187 (1997) 41 3.024390
2 Days in the Valley (1996) 93 3.225806
20,000 Leagues Under the Sea (1954) 72 3.500000
2001: A Space Odyssey (1968) 259 3.969112
3 Ninjas: High Noon At Mega Mountain (1998) 5 1.000000
39 Steps, The (1935) 59 4.050847
In [21]:
popularMovies = movieStats[movieStats['rating']['size']>=100] #Threshold of 100 is arbitrary
popularMovies.sort_values(('rating','mean'),ascending=False)
Out[21]:
rating
size mean
title
Close Shave, A (1995) 112 4.491071
Schindler's List (1993) 298 4.466443
Wrong Trousers, The (1993) 118 4.466102
Casablanca (1942) 243 4.456790
Shawshank Redemption, The (1994) 283 4.445230
Rear Window (1954) 209 4.387560
Usual Suspects, The (1995) 267 4.385768
Star Wars (1977) 584 4.359589
12 Angry Men (1957) 125 4.344000
Citizen Kane (1941) 198 4.292929
To Kill a Mockingbird (1962) 219 4.292237
One Flew Over the Cuckoo's Nest (1975) 264 4.291667
Silence of the Lambs, The (1991) 390 4.289744
North by Northwest (1959) 179 4.284916
Godfather, The (1972) 413 4.283293
Secrets & Lies (1996) 162 4.265432
Good Will Hunting (1997) 198 4.262626
Manchurian Candidate, The (1962) 131 4.259542
Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963) 194 4.252577
Raiders of the Lost Ark (1981) 420 4.252381
Vertigo (1958) 179 4.251397
Titanic (1997) 350 4.245714
Lawrence of Arabia (1962) 173 4.231214
Maltese Falcon, The (1941) 138 4.210145
Empire Strikes Back, The (1980) 368 4.206522
Boot, Das (1981) 201 4.203980
Sling Blade (1996) 136 4.198529
As Good As It Gets (1997) 112 4.196429
Godfather: Part II, The (1974) 209 4.186603
African Queen, The (1951) 152 4.184211
... ... ...
Ace Ventura: Pet Detective (1994) 103 3.048544
Conan the Barbarian (1981) 107 3.046729
Grumpier Old Men (1995) 148 3.040541
Star Trek: The Motion Picture (1979) 117 3.034188
Broken Arrow (1996) 254 3.031496
First Wives Club, The (1996) 160 3.018750
Net, The (1995) 120 3.008333
Evita (1996) 259 2.980695
Natural Born Killers (1994) 128 2.953125
Lost World: Jurassic Park, The (1997) 158 2.943038
Dante's Peak (1997) 240 2.933333
Nutty Professor, The (1996) 163 2.914110
101 Dalmatians (1996) 109 2.908257
Father of the Bride Part II (1995) 128 2.898438
Mars Attacks! (1996) 217 2.847926
Multiplicity (1996) 134 2.835821
Alien 3 (1992) 100 2.830000
Volcano (1997) 219 2.808219
Waterworld (1995) 102 2.803922
Beavis and Butt-head Do America (1996) 156 2.788462
Mimic (1997) 101 2.742574
Down Periscope (1996) 101 2.702970
George of the Jungle (1997) 162 2.685185
Batman Returns (1992) 142 2.683099
Batman Forever (1995) 114 2.666667
Spawn (1997) 143 2.615385
Event Horizon (1997) 127 2.574803
Crash (1996) 128 2.546875
Jungle2Jungle (1997) 132 2.439394
Cable Guy, The (1996) 106 2.339623

338 rows × 2 columns

In [22]:
df = popularMovies.join(pd.DataFrame(similarMovies,columns=['similarity']))
df.head(10)
C:\Users\nwerner\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\reshape\merge.py:551: UserWarning: merging between different levels can give an unintended result (2 levels on the left, 1 on the right)
  warnings.warn(msg, UserWarning)
Out[22]:
(rating, size) (rating, mean) similarity
title
101 Dalmatians (1996) 109 2.908257 0.211132
12 Angry Men (1957) 125 4.344000 0.184289
2001: A Space Odyssey (1968) 259 3.969112 0.230884
Absolute Power (1997) 127 3.370079 0.085440
Abyss, The (1989) 151 3.589404 0.203709
Ace Ventura: Pet Detective (1994) 103 3.048544 0.062689
Adventures of Priscilla, Queen of the Desert, The (1994) 111 3.594595 0.054740
African Queen, The (1951) 152 4.184211 0.230540
Air Force One (1997) 431 3.631090 0.113164
Aladdin (1992) 219 3.812785 0.191621
In [23]:
df.sort_values(['similarity'],ascending=False)[:15]
Out[23]:
(rating, size) (rating, mean) similarity
title
Star Wars (1977) 584 4.359589 1.000000
Empire Strikes Back, The (1980) 368 4.206522 0.748353
Return of the Jedi (1983) 507 4.007890 0.672556
Raiders of the Lost Ark (1981) 420 4.252381 0.536117
Austin Powers: International Man of Mystery (1997) 130 3.246154 0.377433
Sting, The (1973) 241 4.058091 0.367538
Indiana Jones and the Last Crusade (1989) 331 3.930514 0.350107
Pinocchio (1940) 101 3.673267 0.347868
Frighteners, The (1996) 115 3.234783 0.332729
L.A. Confidential (1997) 297 4.161616 0.319065
Wag the Dog (1997) 137 3.510949 0.318645
Dumbo (1941) 123 3.495935 0.317656
Bridge on the River Kwai, The (1957) 165 4.175758 0.316580
Philadelphia Story, The (1940) 104 4.115385 0.314272
Miracle on 34th Street (1994) 101 3.722772 0.310921
In [ ]:

rss facebook twitter github youtube mail spotify lastfm instagram linkedin google google-plus pinterest medium vimeo stackoverflow reddit quora quora