Skip to content

Commit 74a0c3e

Browse files
author
Jim Avery
committed
Query time is improved, can now filter by score, system now predicts ratings for items already rated
1 parent cbbd353 commit 74a0c3e

25 files changed

+603
-28435
lines changed

PostgreSQL/recdb_regression_test.sql

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/* ItemCosCF. */
2+
CREATE RECOMMENDER MovieRec ON ml_ratings USERS FROM userid ITEMS FROM itemid EVENTS FROM ratingval USING itemcoscf;
3+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itemcoscf WHERE userid = 1;
4+
DROP RECOMMENDER MovieRec;
5+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itemcoscf WHERE userid = 1;
6+
7+
/* ItemPearCF. */
8+
CREATE RECOMMENDER MovieRec ON ml_ratings USERS FROM userid ITEMS FROM itemid EVENTS FROM ratingval USING itempearcf;
9+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itempearcf WHERE userid = 1;
10+
DROP RECOMMENDER MovieRec;
11+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itempearcf WHERE userid = 1;
12+
13+
/* UserCosCF. */
14+
CREATE RECOMMENDER MovieRec ON ml_ratings USERS FROM userid ITEMS FROM itemid EVENTS FROM ratingval USING usercoscf;
15+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING usercoscf WHERE userid = 1;
16+
DROP RECOMMENDER MovieRec;
17+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING usercoscf WHERE userid = 1;
18+
19+
/* UserPearCF. */
20+
CREATE RECOMMENDER MovieRec ON ml_ratings USERS FROM userid ITEMS FROM itemid EVENTS FROM ratingval USING userpearcf;
21+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING userpearcf WHERE userid = 1;
22+
DROP RECOMMENDER MovieRec;
23+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING userpearcf WHERE userid = 1;
24+
25+
/* SVD. */
26+
CREATE RECOMMENDER MovieRec ON ml_ratings USERS FROM userid ITEMS FROM itemid EVENTS FROM ratingval USING svd;
27+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING svd WHERE userid = 1;
28+
DROP RECOMMENDER MovieRec;
29+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING svd WHERE userid = 1;
30+
31+
/* Miscellaneous. */
32+
CREATE RECOMMENDER MovieRec ON ml_ratings USERS FROM userid ITEMS FROM itemid EVENTS FROM ratingval USING itemcoscf;
33+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itemcoscf WHERE userid IN (1,2,3,5,9) AND itemid < 7;
34+
SELECT r.itemid,r.ratingval,i.name,i.genre FROM ml_ratings r, ml_items i RECOMMEND r.itemid TO r.userid ON r.ratingval USING itemcoscf WHERE r.userid = 1 AND r.itemid = i.itemid AND i.genre ILIKE '%drama%';
35+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itemcoscf WHERE userid = 1 ORDER BY ratingval DESC LIMIT 10;
36+
SELECT r.itemid,r.ratingval,i.name,i.genre FROM ml_ratings r, ml_items i RECOMMEND r.itemid TO r.userid ON r.ratingval USING itemcoscf WHERE r.userid = 1 AND r.itemid = i.itemid AND i.genre ILIKE '%action%' ORDER BY ratingval DESC LIMIT 5;
37+
SELECT * FROM ml_ratings RECOMMEND itemid TO userid ON ratingval USING itemcoscf WHERE userid = 1 AND ratingval >= 4.5;
38+
DROP RECOMMENDER MovieRec;

PostgreSQL/src/backend/executor/execRecommend.c

+54-63
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ ExecFilterRecommend(RecScanState *recnode,
305305
ExecScanRecheckMtd recheckMtd)
306306
{
307307
ExprContext *econtext;
308-
List *qual;
308+
List *qual, *userqual;
309309
ProjectionInfo *projInfo;
310310
ExprDoneCond isDone;
311311
TupleTableSlot *resultSlot;
@@ -319,6 +319,7 @@ ExecFilterRecommend(RecScanState *recnode,
319319
* Fetch data from node
320320
*/
321321
qual = node->ps.qual;
322+
userqual = recnode->userqual;
322323
projInfo = node->ps.ps_ProjInfo;
323324
econtext = node->ps.ps_ExprContext;
324325

@@ -373,7 +374,7 @@ ExecFilterRecommend(RecScanState *recnode,
373374
if (recnode->finished) {
374375
recnode->finished = false;
375376
recnode->userNum = 0;
376-
recnode->itemNum = 0;
377+
recnode->fullItemNum = 0;
377378
return NULL;
378379
}
379380

@@ -434,51 +435,36 @@ ExecFilterRecommend(RecScanState *recnode,
434435

435436
/*
436437
* We now have a blank tuple slot that we need to fill with data.
437-
* We have a working user ID, but not a valid item list. We'd like to
438-
* use the filter to determine if this is a good user, but we can't
439-
* do that without an item, in many cases. The solution is to add in
440-
* dummy items, then compare it against the filter. If a given user ID
441-
* doesn't make it past the filter with any item ID, then that user is
442-
* being filtered out, and we'll move on to the next.
438+
* We have a working user ID, but not a valid item list. This is where
439+
* we use our custom user-focused WHERE clause to filter out users that
440+
* do not pass our qualifications.
443441
*/
444442
if (recnode->newUser) {
445-
recnode->fullItemNum = 0;
446-
itemindex = recnode->fullItemNum;
447-
itemID = recnode->fullItemList[itemindex];
448-
449443
slot->tts_values[recnode->useratt] = Int32GetDatum(userID);
450-
slot->tts_values[recnode->itematt] = Int32GetDatum(itemID);
444+
slot->tts_values[recnode->itematt] = Int32GetDatum(-1);
451445
slot->tts_values[recnode->eventatt] = Int32GetDatum(-1);
452446

453447
/* We have a preliminary slot - let's test it. */
454-
while (qual && !ExecQual(qual, econtext, false)) {
455-
/* We failed the test. Try the next item. */
456-
recnode->fullItemNum++;
457-
if (recnode->fullItemNum >= recnode->fullTotalItems) {
458-
/* If we've reached the last item, move onto the next user.
459-
* If we've reached the last user, we're done. */
460-
InstrCountFiltered1(node, recnode->fullTotalItems);
461-
recnode->userNum++;
462-
recnode->newUser = true;
448+
while (userqual && !ExecQual(userqual, econtext, false)) {
449+
/* We failed the test. Try the next user.
450+
* If we've reached the last user, we're done. */
451+
InstrCountFiltered1(node, recnode->fullTotalItems);
452+
recnode->userNum++;
453+
recnode->newUser = true;
454+
455+
if (recnode->userNum >= recnode->totalUsers) {
456+
recnode->userNum = 0;
463457
recnode->fullItemNum = 0;
464-
if (recnode->userNum >= recnode->totalUsers) {
465-
recnode->userNum = 0;
466-
recnode->itemNum = 0;
467-
return NULL;
468-
}
469-
userindex = recnode->userNum;
470-
userID = recnode->userList[userindex];
458+
return NULL;
471459
}
472460

473-
itemindex = recnode->fullItemNum;
474-
itemID = recnode->fullItemList[itemindex];
461+
userindex = recnode->userNum;
462+
userID = recnode->userList[userindex];
475463
slot->tts_values[recnode->useratt] = Int32GetDatum(userID);
476-
slot->tts_values[recnode->itematt] = Int32GetDatum(itemID);
477464
}
478465

479466
/* If we get here, then we found a user who will be actually
480-
* returned in the results. One quick reset here. */
481-
recnode->fullItemNum = 0;
467+
* returned in the results. */
482468
}
483469

484470
/* Mark the user ID and index. */
@@ -495,14 +481,26 @@ ExecFilterRecommend(RecScanState *recnode,
495481
}
496482

497483
/* Now replace the item ID, if the user is valid. Otherwise,
498-
* leave the item ID as is, as it doesn't matter what it is. */
499-
if (recnode->validUser)
484+
* leave the item ID as is, as it doesn't matter what it is. We'll
485+
* move on to the next user, as well. */
486+
if (recnode->validUser) {
487+
itemID = recnode->fullItemList[recnode->fullItemNum];
488+
itemindex = recnode->fullItemNum;
489+
} else {
490+
recnode->userNum++;
491+
recnode->newUser = true;
492+
recnode->fullItemNum = 0;
493+
if (recnode->userNum >= recnode->totalUsers)
494+
recnode->finished = true;
495+
continue;
496+
}
497+
/* if (recnode->validUser)
500498
itemID = recnode->itemList[recnode->itemNum];
501499
while (recnode->fullItemList[recnode->fullItemNum] < itemID)
502500
recnode->fullItemNum++;
503501
itemindex = recnode->fullItemNum;
504502
if (recnode->fullItemList[itemindex] > itemID)
505-
elog(ERROR, "critical item mismatch in ExecRecommend");
503+
elog(ERROR, "critical item mismatch in ExecRecommend");*/
506504

507505
/* Plug in the data, marking those columns full. We also need to
508506
* mark the rating column with something temporary. */
@@ -514,21 +512,20 @@ ExecFilterRecommend(RecScanState *recnode,
514512
* If that's the case, we need to calculate it before we do the
515513
* qual filtering. Also, if we're doing a JoinRecommend, we should
516514
* not calculate the RecScore in this node. In the current version
517-
* of RecDB, an OP_NOFILTER shouldn't be allowed. */
518-
if (attributes->opType == OP_NOFILTER)
515+
* of RecDB, special joins don't exist, so that's no problem. */
516+
if (attributes->noFilter)
519517
applyRecScore(recnode, slot, itemID, itemindex);
520518

521519
/* Move onto the next item, for next time. If we're doing a RecJoin,
522520
* though, we'll move onto the next user instead. */
523-
recnode->itemNum++;
524-
if (recnode->itemNum >= recnode->totalItems ||
521+
recnode->fullItemNum++;
522+
if (recnode->fullItemNum >= recnode->fullTotalItems ||
525523
attributes->opType == OP_JOIN ||
526524
attributes->opType == OP_GENERATEJOIN) {
527525
/* If we've reached the last item, move onto the next user.
528526
* If we've reached the last user, we're done. */
529527
recnode->userNum++;
530528
recnode->newUser = true;
531-
recnode->itemNum = 0;
532529
recnode->fullItemNum = 0;
533530
if (recnode->userNum >= recnode->totalUsers)
534531
recnode->finished = true;
@@ -558,7 +555,7 @@ ExecFilterRecommend(RecScanState *recnode,
558555
* Found a satisfactory scan tuple. This is usually when
559556
* we will calculate and apply the RecScore.
560557
*/
561-
if (attributes->opType == OP_FILTER || attributes->opType == OP_GENERATE)
558+
if (!attributes->noFilter)
562559
applyRecScore(recnode, slot, itemID, itemindex);
563560

564561
if (projInfo)
@@ -701,18 +698,16 @@ InitializeRecommender(RecScanState *recstate) {
701698
recstate->totalUsers = getTupleInt(hslot,"count");
702699
recathon_queryEnd(queryDesc,recathoncontext);
703700

704-
/* In the event that there are no user IDs, our ratings table is empty, so
705-
* we can't do anything. */
701+
/* In the event that there are no user IDs, we can't do anything. */
706702
if (recstate->totalUsers <= 0)
707-
elog(ERROR, "no ratings in table %s, cannot predict ratings",
708-
attributes->eventtable);
703+
elog(ERROR, "no users found, cannot predict ratings");
709704

710705
recstate->userList = (int*) palloc(recstate->totalUsers*sizeof(int));
711706
recstate->userNum = 0;
712707

713708
/* Now for the actual query. */
714-
sprintf(querystring,"select distinct %s from %s order by %s;",
715-
attributes->userkey,attributes->eventtable,attributes->userkey);
709+
sprintf(querystring,"select distinct %s from %s;",
710+
attributes->userkey,attributes->eventtable);
716711
queryDesc = recathon_queryStart(querystring,&recathoncontext);
717712
planstate = queryDesc->planstate;
718713

@@ -734,16 +729,14 @@ InitializeRecommender(RecScanState *recstate) {
734729
/* Quick error protection. */
735730
recstate->totalUsers = i;
736731
if (recstate->totalUsers <= 0)
737-
elog(ERROR, "no ratings in table %s, cannot predict ratings",
738-
attributes->eventtable);
732+
elog(ERROR, "no users found, cannot predict ratings");
739733

740734
/* Lastly, initialize the attributes->userID. */
741735
attributes->userID = recstate->userList[0] - 1;
742736
}
743737

744-
/* Next, for annoying and convoluted reasons, we need a full list of all the
745-
* items in the rating table. This will help us circumvent some filter issues
746-
* while remaining as efficient as we can manage. */
738+
/* Next, we need a full list of all the items in the rating table. This will tell
739+
* us what items to generate ratings for. */
747740
if ((attributes->opType != OP_GENERATE && attributes->opType != OP_GENERATEJOIN) ||
748741
attributes->method == userCosCF ||
749742
attributes->method == userPearCF) {
@@ -755,11 +748,9 @@ InitializeRecommender(RecScanState *recstate) {
755748
recstate->fullTotalItems = getTupleInt(hslot,"count");
756749
recathon_queryEnd(queryDesc,recathoncontext);
757750

758-
/* In the event that there are no item IDs, our ratings table is empty, so
759-
* we can't do anything. */
751+
/* In the event that there are no item IDs, we can't do anything. */
760752
if (recstate->fullTotalItems <= 0)
761-
elog(ERROR, "no ratings in table %s, cannot predict ratings",
762-
attributes->eventtable);
753+
elog(ERROR, "no items found, cannot predict ratings");
763754

764755
recstate->fullItemList = (int*) palloc(recstate->fullTotalItems*sizeof(int));
765756
recstate->fullItemNum = 0;
@@ -788,8 +779,7 @@ InitializeRecommender(RecScanState *recstate) {
788779
/* Quick error protection. */
789780
recstate->fullTotalItems = i;
790781
if (recstate->fullTotalItems <= 0)
791-
elog(ERROR, "no ratings in table %s, cannot predict ratings",
792-
attributes->eventtable);
782+
elog(ERROR, "no items found, cannot predict ratings");
793783
}
794784

795785
recstate->finished = false;
@@ -803,7 +793,6 @@ InitializeRecommender(RecScanState *recstate) {
803793
recstate->ratedTable = NULL;
804794
recstate->pendingTable = NULL;
805795
recstate->simTable = NULL;
806-
recstate->itemList = NULL;
807796
recstate->userFeatures = NULL;
808797

809798
/* In case we don't have a pre-built recommender, we need to assemble
@@ -903,6 +892,10 @@ ExecInitRecScan(RecScan *node, EState *estate, int eflags)
903892
* stuff out of Init and into Execute, to make EXPLAIN go faster. */
904893
recstate->initialized = false;
905894

895+
/* Next we need to prep our user WHERE clause. */
896+
recstate->userqual = (List *)
897+
ExecInitExpr((Expr *) attributes->userWhereClause, NULL);
898+
906899
/* Code for a future version of RecDB. */
907900
/* switch(attributes->cellType) {
908901
case CELL_ALPHA:
@@ -992,8 +985,6 @@ ExecEndRecScan(RecScanState *node)
992985
}
993986

994987
/* Now for extra stuff. */
995-
if (node->itemList)
996-
pfree(node->itemList);
997988
if (node->fullItemList)
998989
pfree(node->fullItemList);
999990
if (node->userFeatures)

PostgreSQL/src/backend/executor/nodeRecjoin.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,9 @@ ExecRecJoin(RecJoinState *recjoin)
152152
/* Otherwise, we need to construct our hash table, since
153153
* we need info from the previous operator to do so. */
154154
freeHash(recjoin->itemTable);
155-
recjoin->itemTable = hashCreate(recjoin->recnode->totalItems);
156-
for (i = 0; i < recjoin->recnode->totalItems; i++) {
157-
int currentItem = recjoin->recnode->itemList[i];
155+
recjoin->itemTable = hashCreate(recjoin->recnode->fullTotalItems);
156+
for (i = 0; i < recjoin->recnode->fullTotalItems; i++) {
157+
int currentItem = recjoin->recnode->fullItemList[i];
158158

159159
tempItem = (GenRating*) palloc(sizeof(GenRating));
160160
tempItem->ID = currentItem;

PostgreSQL/src/backend/nodes/copyfuncs.c

+4-6
Original file line numberDiff line numberDiff line change
@@ -1016,6 +1016,7 @@ _copyRangeVar(const RangeVar *from)
10161016
COPY_SCALAR_FIELD(relpersistence);
10171017
COPY_NODE_FIELD(alias);
10181018
COPY_LOCATION_FIELD(location);
1019+
COPY_NODE_FIELD(recommender);
10191020

10201021
return newnode;
10211022
}
@@ -2422,7 +2423,7 @@ _copyQuery(const Query *from)
24222423
COPY_SCALAR_FIELD(hasRecursive);
24232424
COPY_SCALAR_FIELD(hasModifyingCTE);
24242425
COPY_SCALAR_FIELD(hasForUpdate);
2425-
COPY_SCALAR_FIELD(isRecommendStmt);
2426+
COPY_NODE_FIELD(recommendStmt);
24262427
COPY_NODE_FIELD(cteList);
24272428
COPY_NODE_FIELD(rtable);
24282429
COPY_NODE_FIELD(jointree);
@@ -2525,7 +2526,6 @@ _copyRecommendInfo(const RecommendInfo *from)
25252526
COPY_NODE_FIELD(recommender);
25262527
COPY_NODE_FIELD(attributes);
25272528
COPY_SCALAR_FIELD(opType);
2528-
COPY_NODE_FIELD(next);
25292529

25302530
return newnode;
25312531
}
@@ -2548,13 +2548,11 @@ _copyAttributeInfo(const AttributeInfo *from)
25482548
COPY_STRING_FIELD(recModelName);
25492549
COPY_STRING_FIELD(recModelName2);
25502550
COPY_STRING_FIELD(recViewName);
2551-
COPY_SCALAR_FIELD(numAtts);
2552-
COPY_POINTER_FIELD(attNames, from->numAtts*sizeof(char*));
2553-
COPY_POINTER_FIELD(attValues, from->numAtts*sizeof(char*));
2554-
COPY_NODE_FIELD(target_val);
2551+
COPY_NODE_FIELD(userWhereClause);
25552552
COPY_SCALAR_FIELD(IDfound);
25562553
COPY_SCALAR_FIELD(cellType);
25572554
COPY_SCALAR_FIELD(opType);
2555+
COPY_SCALAR_FIELD(noFilter);
25582556

25592557
return newnode;
25602558
}

PostgreSQL/src/backend/nodes/equalfuncs.c

+4-6
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ _equalRangeVar(const RangeVar *a, const RangeVar *b)
107107
COMPARE_SCALAR_FIELD(relpersistence);
108108
COMPARE_NODE_FIELD(alias);
109109
COMPARE_LOCATION_FIELD(location);
110+
COMPARE_NODE_FIELD(recommender);
110111

111112
return true;
112113
}
@@ -909,7 +910,7 @@ _equalQuery(const Query *a, const Query *b)
909910
COMPARE_SCALAR_FIELD(hasRecursive);
910911
COMPARE_SCALAR_FIELD(hasModifyingCTE);
911912
COMPARE_SCALAR_FIELD(hasForUpdate);
912-
COMPARE_SCALAR_FIELD(isRecommendStmt);
913+
COMPARE_NODE_FIELD(recommendStmt);
913914
COMPARE_NODE_FIELD(cteList);
914915
COMPARE_NODE_FIELD(rtable);
915916
COMPARE_NODE_FIELD(jointree);
@@ -1002,7 +1003,6 @@ _equalRecommendInfo(const RecommendInfo *a, const RecommendInfo *b)
10021003
COMPARE_NODE_FIELD(recommender);
10031004
COMPARE_NODE_FIELD(attributes);
10041005
COMPARE_SCALAR_FIELD(opType);
1005-
COMPARE_NODE_FIELD(next);
10061006

10071007
return true;
10081008
}
@@ -1023,13 +1023,11 @@ _equalAttributeInfo(const AttributeInfo *a, const AttributeInfo *b)
10231023
COMPARE_STRING_FIELD(recModelName);
10241024
COMPARE_STRING_FIELD(recModelName2);
10251025
COMPARE_STRING_FIELD(recViewName);
1026-
COMPARE_SCALAR_FIELD(numAtts);
1027-
COMPARE_POINTER_FIELD(attNames, a->numAtts*sizeof(char*));
1028-
COMPARE_POINTER_FIELD(attValues, a->numAtts*sizeof(char*));
1029-
COMPARE_NODE_FIELD(target_val);
1026+
COMPARE_NODE_FIELD(userWhereClause);
10301027
COMPARE_SCALAR_FIELD(IDfound);
10311028
COMPARE_SCALAR_FIELD(cellType);
10321029
COMPARE_SCALAR_FIELD(opType);
1030+
COMPARE_SCALAR_FIELD(noFilter);
10331031

10341032
return true;
10351033
}

0 commit comments

Comments
 (0)