khalednabawi11's picture
Update app.py
c8fdefc verified
raw
history blame
6.11 kB
# import gradio as gr
# import kagglehub
# import pickle
# import numpy as np
# import os
# from surprise import SVDpp
# from sklearn.metrics.pairwise import cosine_similarity
# # --- Download model from KaggleHub ---
# def download_kagglehub_model():
# user = os.getenv("KAGGLE_USERNAME")
# MODEL_SLUG = 'book-recommender-svd'
# VARIATION_SLUG = 'v1'
# framework = "keras"
# model_handle = f"{user}/{MODEL_SLUG}/{framework}/{VARIATION_SLUG}"
# print("πŸ“₯ Downloading model from KaggleHub:", model_handle)
# model_path = kagglehub.model_download(model_handle)
# print("βœ… Model downloaded to:", model_path)
# return model_path
# # --- Load Models ---
# def load_models(model_dir):
# with open(f"{model_dir}/svd_model.pkl", "rb") as f:
# svdpp_model = pickle.load(f)
# with open(f"{model_dir}/content_features.pkl", "rb") as f:
# content_features = pickle.load(f)
# with open(f"{model_dir}/book_metadata.pkl", "rb") as f:
# mappings = pickle.load(f)
# return svdpp_model, content_features, mappings
# # --- Hybrid Prediction ---
# def hybrid_predict(user_id, book_id, alpha=0.7):
# try:
# # uid = user_encoder.transform([user_id])[0]
# # iid = item_encoder.transform([book_id])[0]
# uid = user_id
# iid = book_id
# except:
# return "❌ Unknown user_id or book_id"
# svd_pred = svdpp_model.predict(uid, iid).est
# user_liked = np.where(svdpp_model.trainset.ur[uid])[0]
# if len(user_liked) == 0:
# content_score = 0
# else:
# similarities = cosine_similarity(content_features[iid], content_features[user_liked])
# content_score = np.mean(similarities)
# hybrid_score = alpha * svd_pred + (1 - alpha) * content_score * 5
# return round(hybrid_score, 2)
# # --- Gradio Interface ---
# def recommend(user_id, book_id, alpha=0.7):
# return f"⭐ Predicted Rating: {hybrid_predict(user_id, book_id, alpha)}"
# # Download and load model
# model_dir = download_kagglehub_model()
# svdpp_model, content_features, mappings = load_models(model_dir)
# # user_encoder = mappings["user_encoder"]
# # item_encoder = mappings["item_encoder"]
# # Start Gradio app
# demo = gr.Interface(
# fn=recommend,
# inputs=[
# gr.Textbox(label="User ID"),
# gr.Textbox(label="Book ID"),
# gr.Slider(0, 1, value=0.7, step=0.1, label="Hybrid Weight (alpha)")
# ],
# outputs="text",
# title="πŸ“š Hybrid Book Recommender",
# description="Enter a user_id and book_id to get a predicted rating using a Hybrid SVD++ and Content-based model."
# )
# demo.launch()
import os
import pickle
import gradio as gr
import pandas as pd
import kagglehub
from surprise import SVDpp, Dataset, Reader
from sklearn.metrics.pairwise import cosine_similarity
import joblib
# πŸ”₯ Load Hybrid Model from KaggleHub
user = os.getenv("KAGGLE_USERNAME")
model_slug = 'book-recommender-svd'
variation_slug = 'v2'
framework = "keras"
model_handle = f"{user}/{model_slug}/{framework}/{variation_slug}"
model_dir = kagglehub.model_download(model_handle)
with open(f"{model_dir}/svd_model.pkl", "rb") as f:
svdpp = pickle.load(f)
with open(f"{model_dir}/content_features.pkl", "rb") as f:
content_features = pickle.load(f)
with open(f"{model_dir}/mappings.pkl", "rb") as f:
mappings = pickle.load(f)
ratings_df = mappings['ratings_df']
feature_df = mappings['feature_df']
user_encoder = mappings['user_encoder']
item_encoder = mappings['item_encoder']
# Create mappings
item_id_to_idx = {bid: idx for idx, bid in enumerate(features_df['book_id'])}
global_mean_rating = ratings_df['rating'].mean()
# User liked books mapping (for content-based filtering)
user_liked_books = ratings_df[ratings_df['rating'] >= 4].groupby('user_id')['book_id'].apply(list).to_dict()
# πŸ”₯ Hybrid Predict Function
def hybrid_predict(user_id, book_id, alpha=0.7):
try:
cf_score = svdpp.predict(user_id, book_id).est
except:
cf_score = global_mean_rating
try:
idx_target = item_id_to_idx[book_id]
liked_books = user_liked_books.get(user_id, [])
if not liked_books:
content_score = global_mean_rating
else:
liked_indices = [item_id_to_idx[b] for b in liked_books if b in item_id_to_idx]
if not liked_indices:
content_score = global_mean_rating
else:
sims = cosine_similarity(content_features[idx_target], content_features[liked_indices])
content_score = sims.mean()
except:
content_score = global_mean_rating
return alpha * cf_score + (1 - alpha) * content_score
# πŸ”₯ Recommend Top-N Books
def recommend_books(user_id, top_n):
if user_id not in ratings_df['user_id'].unique():
return f"User {user_id} not found."
rated_books = set(ratings_df[ratings_df['user_id'] == user_id]['book_id'])
unseen_books = set(features_df['book_id']) - rated_books
if not unseen_books:
return "No unseen books left to recommend."
recommendations = []
for book_id in unseen_books:
score = hybrid_predict(user_id, book_id)
title = features_df.loc[features_df['book_id'] == book_id, 'title'].values[0]
recommendations.append((title, round(score, 3)))
recommendations.sort(key=lambda x: x[1], reverse=True)
return pd.DataFrame(recommendations[:top_n], columns=["Book Title", "Predicted Rating"])
# 🌟 Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## πŸ“š Hybrid Book Recommendation System")
user_id_input = gr.Textbox(label="Enter User ID", placeholder="e.g. 123")
top_n_input = gr.Slider(5, 20, value=10, step=1, label="Number of Recommendations")
recommend_button = gr.Button("Get Recommendations")
output_table = gr.Dataframe(headers=["Book Title", "Predicted Rating"], datatype=["str", "number"])
recommend_button.click(
recommend_books,
inputs=[user_id_input, top_n_input],
outputs=output_table
)
demo.launch()