|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
import pickle |
|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
import kagglehub |
|
|
from surprise import SVDpp, Dataset, Reader |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
import joblib |
|
|
|
|
|
|
|
|
user = os.getenv("KAGGLE_USERNAME") |
|
|
model_slug = 'book-recommender-svd' |
|
|
variation_slug = 'v2' |
|
|
framework = "keras" |
|
|
|
|
|
model_handle = f"{user}/{model_slug}/{framework}/{variation_slug}" |
|
|
model_dir = kagglehub.model_download(model_handle) |
|
|
|
|
|
|
|
|
with open(f"{model_dir}/svd_model.pkl", "rb") as f: |
|
|
svdpp = pickle.load(f) |
|
|
|
|
|
with open(f"{model_dir}/content_features.pkl", "rb") as f: |
|
|
content_features = pickle.load(f) |
|
|
|
|
|
with open(f"{model_dir}/mappings.pkl", "rb") as f: |
|
|
mappings = pickle.load(f) |
|
|
|
|
|
|
|
|
ratings_df = mappings['ratings_df'] |
|
|
feature_df = mappings['feature_df'] |
|
|
user_encoder = mappings['user_encoder'] |
|
|
item_encoder = mappings['item_encoder'] |
|
|
|
|
|
|
|
|
item_id_to_idx = {bid: idx for idx, bid in enumerate(features_df['book_id'])} |
|
|
global_mean_rating = ratings_df['rating'].mean() |
|
|
|
|
|
|
|
|
user_liked_books = ratings_df[ratings_df['rating'] >= 4].groupby('user_id')['book_id'].apply(list).to_dict() |
|
|
|
|
|
|
|
|
def hybrid_predict(user_id, book_id, alpha=0.7): |
|
|
try: |
|
|
cf_score = svdpp.predict(user_id, book_id).est |
|
|
except: |
|
|
cf_score = global_mean_rating |
|
|
|
|
|
try: |
|
|
idx_target = item_id_to_idx[book_id] |
|
|
liked_books = user_liked_books.get(user_id, []) |
|
|
if not liked_books: |
|
|
content_score = global_mean_rating |
|
|
else: |
|
|
liked_indices = [item_id_to_idx[b] for b in liked_books if b in item_id_to_idx] |
|
|
if not liked_indices: |
|
|
content_score = global_mean_rating |
|
|
else: |
|
|
sims = cosine_similarity(content_features[idx_target], content_features[liked_indices]) |
|
|
content_score = sims.mean() |
|
|
except: |
|
|
content_score = global_mean_rating |
|
|
|
|
|
return alpha * cf_score + (1 - alpha) * content_score |
|
|
|
|
|
|
|
|
|
|
|
def recommend_books(user_id, top_n): |
|
|
if user_id not in ratings_df['user_id'].unique(): |
|
|
return f"User {user_id} not found." |
|
|
|
|
|
rated_books = set(ratings_df[ratings_df['user_id'] == user_id]['book_id']) |
|
|
unseen_books = set(features_df['book_id']) - rated_books |
|
|
|
|
|
if not unseen_books: |
|
|
return "No unseen books left to recommend." |
|
|
|
|
|
recommendations = [] |
|
|
for book_id in unseen_books: |
|
|
score = hybrid_predict(user_id, book_id) |
|
|
title = features_df.loc[features_df['book_id'] == book_id, 'title'].values[0] |
|
|
recommendations.append((title, round(score, 3))) |
|
|
|
|
|
recommendations.sort(key=lambda x: x[1], reverse=True) |
|
|
return pd.DataFrame(recommendations[:top_n], columns=["Book Title", "Predicted Rating"]) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## π Hybrid Book Recommendation System") |
|
|
user_id_input = gr.Textbox(label="Enter User ID", placeholder="e.g. 123") |
|
|
top_n_input = gr.Slider(5, 20, value=10, step=1, label="Number of Recommendations") |
|
|
recommend_button = gr.Button("Get Recommendations") |
|
|
output_table = gr.Dataframe(headers=["Book Title", "Predicted Rating"], datatype=["str", "number"]) |
|
|
|
|
|
recommend_button.click( |
|
|
recommend_books, |
|
|
inputs=[user_id_input, top_n_input], |
|
|
outputs=output_table |
|
|
) |
|
|
|
|
|
demo.launch() |
|
|
|