Contents
ECサイト | 映画 | ||
---|---|---|---|
商品を購入した | 1 | 映画を見た | 1.0 – 5.0 |
商品を購入していない | 0 | 映画を見ていない | 0 |
#!/usr/bin/python from database import critics # Read evaluation print critics['Lisa Rose']['Lady in the Water'] # Set value critics['Toby']['Snakes on a Plane'] = 5.0 print critics['Toby']
#!/usr/bin/python from database import critics from math import sqrt # # Calculate similarity between person1, person2 # using euclid distance # def sim_distance(prefs, person1, person2): # Get the list item evaluated by twosome items = [1 for item in prefs[person1] if item in prefs[person2]] # Nothing items if len(items) == 0: return 0 # Calculate euclid distance distance = sum([pow(prefs[person1][item]-prefs[person2][item],2) for item in prefs[person1] if item in prefs[person2]]) return 1.0/(1.0 + distance) # main if __name__ == '__main__': print sim_distance(critics, 'Lisa Rose', 'Gene Seymour')
#!/usr/bin/python from database import critics from math import sqrt # # Calculate pearson correlation between p1 and p2 # def sim_pearson(prefs,p1,p2): # Get item list items = {} for item in prefs[p1]: if item in prefs[p2]: items[item]=1 n = len(items) # return 0 when items are nothing if n == 0: return 0 # calculate p's variance ave1 = sum([prefs[p1][it] for it in items])/float(n) var1 = sqrt(sum([pow(prefs[p1][it]-ave1,2) for it in items])) ave2 = sum([prefs[p2][it] for it in items])/float(n) var2 = sqrt(sum([pow(prefs[p2][it]-ave1,2) for it in items])) # calculate covariance cov = sum([(prefs[p1][it]-ave1)*(prefs[p2][it]-ave2) for it in items]) # calculate similarity if var1*var2 == 0: return 0 return cov/(var1*var2) if __name__ == '__main__': print sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')
#!/usr/bin/python from database import critics from pearson import sim_pearson from euclid import sim_distance # # Calculate weighed mean, and recommend # def getRecommendations(prefs,person,similarity=sim_pearson): totals = {} simSums = {} for other in prefs: if other == person: continue sim = similarity(prefs, person, other) # ignore similarity is less than 0 if sim <= 0: continue for item in prefs[other]: if item not in prefs[person] or prefs[person][item] == 0: totals.setdefault(item,0) totals[item] += prefs[other][item] * sim # similarity simSums.setdefault(item,0) simSums[item] += sim # make normalized list rankings = [(total/simSums[item],item) for item,total in totals.items()] rankings.sort() rankings.reverse() return rankings if __name__ == '__main__': print getRecommendations(critics, 'Toby')
#!/usr/bin/python from database import critics from pearson import sim_pearson from euclid import sim_distance # # Transform key and value # def transformPrefs(prefs): result = {} for person in prefs: for item in prefs[person]: result.setdefault(item,{}) result[item][person]=prefs[person][item] return result # # Make ranking # def topMatches(prefs,person,n=5,similarity=sim_pearson): scores=[(similarity(prefs,person,other),other) for other in prefs if other!=person] scores.sort() scores.reverse() return scores[0:n] if __name__ == '__main__': movies = transformPrefs(critics) print topMatches(movies, 'Superman Returns')
アイテムベースのフィルタリングは、一般的に、疎なデータセットに対して#!/usr/bin/python from database import critics from itemsim import transformPrefs,topMatches from euclid import sim_distance # # calculate similarity between item and item # def calculateSimilarItems(prefs,n=10): result = {} itemPrefs = transformPrefs(prefs) c = 0 for item in itemPrefs: c += 1 if c % 100 == 0: print "%d / %d" % (c,len(itemPrefs)) scores=topMatches(itemPrefs,item,n=n,similarity=sim_distance) result[item]=scores return result # # Recommend on a item bases # def getRecommendedItems(prefs, itemMatch, user): userRatings = prefs[user] scores = {} totalSim = {} for (item,rating) in userRatings.items(): for (similarity,item2) in itemMatch[item]: if item2 in userRatings: continue scores.setdefault(item2,0) scores[item2] += similarity*rating totalSim.setdefault(item2,0) totalSim[item2] += similarity rankings = [(score/totalSim[item],item) for item,score in scores.items()] rankings.sort() rankings.reverse() return rankings if __name__ == '__main__': itemsim = calculateSimilarItems(critics) print getRecommendedItems(critics,itemsim,'Toby')
【楽天ブックスならいつでも送料無料】集合知プログラミング [ トビ-・セガラン ] 価格:3,672円(税込、送料込) |