Note: Need to sign up for academic license for the use of Graphlab's Library & Python 2 only.
import pandas as pd
import graphlab as gl
Note: print the data to check if the dataset has been imported correctly
df = pd.read_csv('dataset\dataset02_master.csv', sep = ',')
print(df.head(5)) ## check data
Note 2: Slice and dice the dataset to keep relevant data only
Note 3: Convert panda's dataframe to SFrame, a requirement for Graphlab.
df_seg2_growable = df[df.SegmentNo == 2]
df_seg2_growable = df_seg2_growable[['Card_ID', 'pdt_type']]
df_seg2_growable_SFrame = gl.SFrame(df_seg2_growable)## convert into S-Frame
df_seg2_growable_SFrame.save('dataset\dataset_recommender_2.csv', format='csv')
Note : This is done with 50% splitting between test and train dataset.
train_s2, test_s2 = gl.recommender.util.random_split_by_user (df_seg2_growable_SFrame,\
user_id = 'Card_ID', item_id = 'pdt_type', \
item_test_proportion = 0.5, random_seed = 2017)
# Step 4: Create Model 2 - Pearson Similarity Score
train_s2_model_pearson = gl.recommender.item_similarity_recommender.create \
(train_s2, user_id='Card_ID', item_id='pdt_type',\
similarity_type='pearson')
# Step 4: Create Model 2 - Jaccard Similarity Score
train_s2_model_jaccard = gl.recommender.item_similarity_recommender.create \
(train_s2, user_id='Card_ID', item_id='pdt_type',\
similarity_type='jaccard')
# Step 5: Create Model 3 - Factorization
train_s2_model_factorization = gl.recommender.ranking_factorization_recommender.\
create(train_s2,\
user_id='Card_ID', item_id='pdt_type',\
random_seed = 2017, solver = 'ials')
x2 = gl.recommender.util.compare_models(test_s2 , \
[train_s2_model_pearson, train_s2_model_jaccard, train_s2_model_factorization ], model_names=["m1", "m2", "m3"])
train_s2_model_final= gl.recommender.item_similarity_recommender.create \
(df_seg2_growable_SFrame, user_id='Card_ID', item_id='pdt_type',\
similarity_type='jaccard')
## Output final dataset for visualization
recs_final = train_s2_model_final.recommend()
recs_final.save('dataset\dataset_final_recs2.csv', format='csv')