$$\hat{r}_{ui} = \mu_i + \frac{ \sum\limits_{j \in N^k_u(i)} \text{sim}(i, j) \cdot (r_{uj} - \mu_j)} {\sum\limits_{j \in N^k_u(i)} \text{sim}(i, j)}$$
$$\mathrm{cosine}(x, y) =\frac{x \cdot y}{\|x\| \, \|y\|} = \frac{\sum_{i=1}^{n} x_i y_i} {\sqrt{\sum_{i=1}^{n} x_i^2}\;\sqrt{\sum_{i=1}^{n} y_i^2}}$$
$$\rho_{x,y} = \frac{\sum_{i=1}^{n} (x_i - \bar{x})(y_i - \bar{y})} {\sqrt{\sum_{i=1}^{n} (x_i - \bar{x})^2} \sqrt{\sum_{i=1}^{n} (y_i - \bar{y})^2}}$$
| User | Item 0 | Item 1 | Item 2 | Item 3 | Item 4 |
|---|---|---|---|---|---|
| User 0 | 4 | 3 | 2 | 5 | ? |
| User 1 | 5 | 4 | ? | 4 | 3 |
| User 2 | 4 | 5 | 3 | 5 | 4 |
| User 3 | 5 | 3 | 2 | 4 | 5 |
conda install -c conda-forge scikit-surprise
from surprise import Dataset, Reader, KNNWithMeans
ratings = {
'user': [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4],
'item': [0, 1, 2, 3, 0, 1, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4],
'rating': [4, 3, 2, 5, 5, 4, 4, 3, 4, 5, 3, 5, 4, 5, 3, 2, 4, 5]
}
df = pd.DataFrame(ratings)
reader = Reader(rating_scale = (1, 5))
data = Dataset.load_from_df(df, reader)
trainset = data.build_full_trainset()
knn = KNNWithMeans(k = 2, sim_options={'name': 'cosine', 'user_base': True})
knn.fit(trainset)
result = knn.predict(uid=0, iid=4)
result.est, knn.means, knn.sim