SLIM  1.0
Sparse Linear Methods (SLIM) for top-n recommender systems
 All Data Structures Files Functions Variables Typedefs Macros Pages
slim_predict.c
Go to the documentation of this file.
1 /**************************************************************/
2 /*! \file
3 
4  \brief This file contains all the routines for SLIM testing
5 
6 */
7 /**************************************************************/
8 
9 
10 #include<slim.h>
11 
12 
13 /**************************************************************/
14 /*! \brief SLIM testing
15 
16  \details This routine contains the testing method for SLIM
17 
18  \param[in] ctrl A ctrl structure which contains all the
19  Parameters for SLIM testing
20  \param[in] train The training data, which has been used
21  to learn the model
22  \param[in] test The testing data
23  \param[in] model The model
24  */
25 /**************************************************************/
26 void slim_predict(ctrl_t * ctrl, gk_csr_t * train, gk_csr_t * test, gk_csr_t * model){
27 
28  printf("model->nrows = %d, model->ncols = %d\n", model->nrows, model->ncols);
29  /* sanity check */
30  model->ncols = model->nrows;
31  gk_csr_CreateIndex(model, GK_CSR_COL);
32 
33  double * eval = slim_test(ctrl, model, train, test);
34 
35  /* print the results */
36  for (int j = 0; j < ctrl->nratings; j ++)
37  printf("For rating value %3d HR = %.5f ARHR = %.5f cumulative HR = %.5f ARHR = %.5f\n",
38  j+1, eval[j*4], eval[j*4+1], eval[j*4+2], eval[j*4+3]);
39 
40 
41  /* clean up */
42  gk_free((void **)&eval, LTERM);
43 
44 
45 }
46 
47 
48 
49 /**************************************************************/
50 /*! \brief Top-N recommendations and evaluations
51 
52  \param[in] ctrl A ctrl structure
53  \param[in] model A model
54  \param[in] train The training data from which the model is
55  learned
56  \param[in] test The testing data
57  \return eval A set of evaluations
58  */
59 /**************************************************************/
60 double * slim_test(ctrl_t * ctrl, gk_csr_t * model,
61  gk_csr_t * train, gk_csr_t * test){
62 
63  int nu = test->nrows;
64  int nhits = 0;
65  double arh = 0;
66  int n = 0;
67 
68  ctimer_t * timer = gk_malloc(sizeof(ctimer_t), "malloc timer");
69  start_timer(timer);
70 
71  /* evaluation results for return */
72  double * eval = gk_malloc(sizeof(double)*(ctrl->nratings)*4, "malloc eval");
73  gk_dset(ctrl->nratings*4, 0, eval);
74 
75  /* number of testing instances for each rating value */
76  int * nr = gk_malloc(sizeof(int)*ctrl->nratings, "malloc nr");
77  gk_iset(ctrl->nratings, 0, nr);
78 
79  int ncols = gk_max(train->ncols, model->ncols);
80  int * nc = gk_malloc(sizeof(int)*ncols, "malloc nc");
81  gk_iset(ncols, 0, nc);
82  int * nhc = gk_malloc(sizeof(int)*ncols, "malloc nhc");
83  gk_iset(ncols, 0, nhc);
84 
85  /* auxiliary_space */
86  int * iidx = NULL;
87 
88  /* output file for predictions */
89  FILE * pfile = NULL;
90  if (ctrl->pred_file){
91  pfile = gk_fopen(ctrl->pred_file, "w", "pred file");
92  printf("Output predictions to %s file...\n", ctrl->pred_file);
93  }
94 
95  /* predictions for all the users */
96  for (int u = 0; u < nu; u ++){
97 
98  /* show the process */
99  if (u % 1000 == 0) {
100  if (ctrl->dbglvl == 0){
101  printf("."); fflush(stdout);
102  }
103  }
104 
105  /* no testing instances for this user */
106  if (test->rowptr[u+1] - test->rowptr[u] == 0) {
107  if (ctrl->pred_file)
108  fprintf(pfile, "\n");
109  continue;
110  }
111  n ++;
112 
113 
114  /* top-n recommendation */
115  gk_dkv_t * rcmd = NULL;
116  int nrcmd = 0;
117  nrcmd = suggest_predict(ctrl, model, &iidx, train, u, &rcmd);
118 
119  /* stats for the recommendation */
120  for (int kk = test->rowptr[u]; kk < test->rowptr[u+1]; kk ++){
121 
122  int r = (int)(test->rowval[kk]); /* assume all ratings are integers [1, 2, ..., nratings] */
123  nr[r-1] ++ ;
124 
125  nc[test->rowind[kk]] ++;
126  }
127 
128 
129  /* evaluations */
130  for (int jj = 0; jj < nrcmd; jj ++){
131 
132  /* output the predictions */
133  if (ctrl->pred_file)
134  fprintf(pfile, "%d %.5f ", (int)rcmd[jj].val+1, rcmd[jj].key);
135 
136 
137  for (int kk = test->rowptr[u]; kk < test->rowptr[u+1]; kk ++){
138 
139  int r = (int)(test->rowval[kk]); /* assume all ratings are integers [1, 2, ..., nratings] */
140 
141 
142  /* hit hit */
143  if (rcmd[jj].val == test->rowind[kk]){
144 
145  nhc[test->rowind[kk]] ++;
146 
147  /* overall hit rates */
148  nhits ++; arh += 1.0/(double)(jj + 1) ;
149  /* hit rates on different ratings */
150  eval[(r - 1)*4 + 0] += 1.0; /* hit rate on rating r */
151  eval[(r - 1)*4 + 1] += 1.0/(double)(jj + 1) ; /* arh on rating r */
152  eval[(r - 1)*4 + 2] = eval[(r - 1)*4 + 0];
153  eval[(r - 1)*4 + 3] = eval[(r - 1)*4 + 1];
154 
155  }
156  }
157 
158  }
159 
160  /* finalize the prediction output */
161  if (ctrl->pred_file)
162  fprintf(pfile, "\n");
163 
164 
165  /* clean up */
166  gk_free((void **)&rcmd, LTERM);
167 
168  }
169 
170  /* end timing */
171  printf("\n");
172  end_timer(timer);
173  display_timer(timer, "SLIM prediction");
174 
175 
176  /* all stats */
177  for (int i = 0; i < ctrl->nratings; i ++){
178  if (nr[i] > 0){
179  eval[i*4 + 0] /= (double)nr[i];
180  eval[i*4 + 1] /= (double)nr[i];
181  }
182  }
183  /* cumulative stats */
184  for (int i = ctrl->nratings - 2; i >= 0; i --){
185  nr[i] += nr[i+1]; /* cumulative counts */
186  eval[i*4 + 2] += eval[(i+1)*4 + 2]; /* cumulative hit counts */
187  eval[i*4 + 3] += eval[(i+1)*4 + 3]; /* cumulative rhr counts */
188  }
189  for (int i = 0; i < ctrl->nratings; i ++){
190  if (nr[i] > 0){
191  eval[i*4 + 2] /= (double)nr[i];
192  eval[i*4 + 3] /= (double)nr[i];
193  }
194  }
195 
196 
197  /* finish up */
198  if (ctrl->pred_file)
199  gk_fclose(pfile);
200  gk_free((void **)&nc, LTERM);
201  gk_free((void **)&nhc, LTERM);
202  gk_free((void **)&nr, LTERM);
203  gk_free((void **)&timer, LTERM);
204  gk_free((void **)&iidx, LTERM);
205 
206 
207  return eval;
208 
209 }
210 
211 
212 
213 /**************************************************************/
214 /*! \brief Top-N recommendation for a user
215 
216  \param[in] ctrl A ctrl structure
217  \param[in] model A model
218  \param[in] iidx An auxiliary array for efficient recommendations
219  \param[in] train Training data from which the model is learned
220  \param[in] u The index of the user for which the top-n
221  recommendations are generated
222  \param[out] rcmd The list of recommendations, in which the
223  keys are the recommendation scores and the
224  values are the item indices
225  \return int The actual number of recommendations
226  */
227 /**************************************************************/
228 int suggest_predict(ctrl_t * ctrl, gk_csr_t * model, int ** iidx,
229  gk_csr_t * train, int u, gk_dkv_t ** rcmd){
230 
231 
232  if (model->colptr == NULL)
233  gk_csr_CreateIndex(model, GK_CSR_COL);
234 
235  int ni = train->ncols;
236 
237  if (*iidx == NULL)
238  *iidx = gk_malloc(sizeof(int)*ni, "malloc *iidx");
239 
240  gk_iset(ni, -1, *iidx);
241 
242  int nuitrn = train->rowptr[u+1] - train->rowptr[u];
243  /* special case when no training data, thus no recommendations */
244  if (nuitrn == 0){
245  *rcmd = NULL;
246  return 0;
247  }
248 
249  for (int ii = 0; ii < nuitrn; ii ++)
250  *(*iidx + *(train->rowptr[u] + ii + train->rowind)) -= 1;
251 
252 
253  gk_dkv_t * ccandb = gk_malloc(sizeof(gk_dkv_t)*ni, "malloc ccandb");
254  int nrcmd = 0;
255 
256  /* efficient recommendations */
257  nuitrn = train->rowptr[u+1] - train->rowptr[u];
258  for (int i = 0; i < nuitrn; i ++){
259  int ii = *(train->rowptr[u] + i + train->rowind);
260  for (int j = 0; j < model->colptr[ii+1] - model->colptr[ii]; j ++){
261  int jj = *(model->colptr[ii] + j + model->colind);
262  if ((*iidx)[jj] < -1) continue;
263  if ((*iidx)[jj] == -1){
264  (*iidx)[jj] = nrcmd;
265  ccandb[nrcmd].key = *(model->colptr[ii] + j + model->colval) * 1.0;
266  ccandb[nrcmd].val = jj;
267  nrcmd ++;
268  }else{
269  ccandb[(*iidx)[jj]].key += *(model->colptr[ii] + j + model->colval) * 1.0;
270  }
271  }
272  }
273 
274  /* sorting */
275  gk_dkvsortd(nrcmd, ccandb);
276  int nrcmd2 = gk_min(nrcmd, ctrl->topn);
277  *rcmd = ccandb;
278 
279 
280  return nrcmd2;
281 
282 }
283