Spaces:

Ankerkraut
/

chatbot-test

Sleeping

App Files Files Community

Ankerkraut commited on Mar 19

Commit

c5c778a

1 Parent(s): da45a72

setup

Browse files

Files changed (1) hide show

app.py +99 -109

app.py CHANGED Viewed

@@ -12,116 +12,106 @@ For more information on `huggingface_hub` Inference API support, please check th
 """
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def init():
-    product_strings = []
-    with open('../Data/products.json', 'r', encoding='utf-8') as f:
-        for product in json.load(f)['products']:
-            product_json = json.dumps(product, indent=4, ensure_ascii=False)
-            tags_ohne = [tag for tag in product['tags'] if "Eigenschaften_ohne" in tag]
-            tags_ohne = [tag.split(" ")[1] for tag in tags_ohne]
-            tags_zutaten = [tag for tag in product['tags'] if "Zutaten_" in tag]
-            tags_zutaten = [tag.split("_")[1] for tag in tags_zutaten]
-            tags_geeignet = [tag for tag in product['tags'] if "Geeignet zum_" in tag]
-            tags_geeignet = [tag.split("_")[1] for tag in tags_geeignet]
-            tags_landestypisch = [tag for tag in product['tags'] if "Landestypisch für_" in tag]
-            tags_landestypisch = [tag.split("_")[1] for tag in tags_landestypisch]
-            tags_geschmack = [tag for tag in product['tags'] if "Geschmack_" in tag]
-            tags_geschmack = [tag.split("_")[1] for tag in tags_geschmack]
-            tags_passtzu = [tag for tag in product['tags'] if "Passt zu_" in tag]
-            tags_passtzu = [tag.split("_")[1] for tag in tags_passtzu]
-            tags_merkmale = [tag for tag in product['tags'] if "Merkmale_" in tag]
-            tags_merkmale = [tag.split("_")[1] for tag in tags_merkmale]
-            tags_sonstige = [tag for tag in product['tags'] if not any(sub in tag for sub in ["Eigenschaften_ohne", "Zutaten_", "Geeignet zum_", "Landestypisch für_", "Geschmack_", "Passt zu_", "Merkmale_"])]
-            tags_ohne_str = ",".join(tags_ohne) if tags_ohne else "nicht bekannt"
-            tags_zutaten = ",".join(tags_zutaten) if tags_zutaten else "nicht bekannt"
-            tags_geeignet = ",".join(tags_geeignet) if tags_geeignet else "nicht bekannt"
-            tags_landestypisch = ",".join(tags_landestypisch) if tags_landestypisch else "nicht bekannt"
-            tags_geschmack = ",".join(tags_geschmack) if tags_geschmack else "nicht bekannt"
-            tags_passtzu = ",".join(tags_passtzu) if tags_passtzu else "nicht bekannt"
-            tags_merkmale = ",".join(tags_merkmale) if tags_merkmale else "nicht bekannt"
-            tags_sonstige = ",".join(tags_sonstige) if tags_sonstige else "nicht bekannt"
-            product_string = f"""{product["title"]}; Beschreibung: {product["description"]}; Eigenschaften: Ohne: {tags_ohne_str}; Zutaten: {tags_zutaten}; Geeignet zum: {tags_geeignet}; Landestypisch für: {tags_landestypisch}; Geschmack: {tags_geschmack}; Passt zu: {tags_passtzu}; Merkmale: {tags_merkmale}; Sonstige: {tags_sonstige}; Erstellt am: {product["createdAt"]}"""
-            product_strings.append(product_string)
-    product_strings
-    blogs = []
-    recipe_strings = []
-    with open('../Data/blogs_and_recipes.json', 'r', encoding='utf-8') as f:
-        data = json.load(f)
-        for blog in data['blogs']:
-            if 'Rezepte' in blog['title']:
-                for recipe in blog['articles']:
-                    new_recipe = ""
-                    recipe["body"] = bs4.BeautifulSoup(recipe["body"], 'html.parser')
-                    for metafield in recipe['metafields']:
-                        if metafield['namespace'] == 'recipekit':
-                            metafield['value'] = bs4.BeautifulSoup(metafield['value'], 'html.parser')
-                            value_json = json.loads(metafield['value'].text.replace(",,", ",").replace(",]", "]").replace(",}", "}").strip(","))
-                            title = value_json['recipe_title']
-                            description = value_json['recipe_description']
-                            ingredients = value_json['recipe_ingredients']
-                            category = value_json.get('recipe_category', 'Unbekannt')
-                            cuisine = value_json.get('recipe_cuisine', 'Unbekannt')
-                            ingredients = [ingredient['ingredient'] for ingredient in value_json['recipe_ingredients'] if 'ingredient' in ingredient]
-                            directions = [direction['direction'] for direction in value_json['recipe_directions']]
-                            serving_size = value_json['serving_size']
-                            prep_time = value_json['prep_time']
-                            cook_time = value_json['cook_time']
-                            rating = value_json.get('recipe_rating', 'Keine Bewertung')
-                            new_recipe = f"Titel:{title},\n{description}\nZutaten:{','.join(ingredients)},\nAnweisungen:{' '.join(directions)},\nKategorie:{category},\nKüche:{cuisine},\nPortionen:{serving_size},\nVorbereitungszeit:{prep_time},\nKochzeit:{cook_time},\nBewertung:{rating}"
-                    recipe_strings.append(new_recipe)
-    recipe_strings
-    client = QdrantClient(":memory:") #QdrantClient("localhost:6333")
-    client.set_model("sentence-transformers/all-MiniLM-L6-v2")
-    client.set_sparse_model("prithivida/Splade_PP_en_v1")
-    client.delete_collection(collection_name="products")
-    client.create_collection(
-        collection_name="products",
-        vectors_config=client.get_fastembed_vector_params(),
-        sparse_vectors_config=client.get_fastembed_sparse_vector_params(),
-    )
-    client.delete_collection(collection_name="recipes")
-    client.create_collection(
-        collection_name="recipes",
-        vectors_config=client.get_fastembed_vector_params(),
-        sparse_vectors_config=client.get_fastembed_sparse_vector_params(),
-    )
-    client.add(collection_name="products",
-           documents=product_strings)
-    client.add(collection_name="recipes",
-           documents=recipe_strings)
-    model_name = "LeoLM/leo-hessianai-13b-chat"
-    bnb_config = BitsAndBytesConfig(
-        load_in_4bit=True,  # Use 4-bit quantization
-        bnb_4bit_compute_dtype=torch.float16,  # Reduce memory usage
-        bnb_4bit_use_double_quant=True,
-        llm_int8_enable_fp32_cpu_offload=True
-    )
-    ankerbot_model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        quantization_config=bnb_config,
-        device_map="cuda:0",
-        torch_dtype=torch.float16,
-        use_cache=True,
-        offload_folder="../offload"
-    )
-    ankerbot_tokenizer = AutoTokenizer.from_pretrained(model_name,
-        torch_dtype=torch.float16,
-        truncation=True,
-        padding=True, )
-    prompt_format = "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
-    generator = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, trust_remote_code=True) # True for flash-attn2 else False
-    generator_mini = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, trust_remote_code=True) # True for flash-attn2 else False
 def generate_response(query, context, prompts, max_tokens, temperature, top_p):
     system_message_support = f"""<|im_start|>system

 """
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+product_strings = []
+with open('../Data/products.json', 'r', encoding='utf-8') as f:
+    for product in json.load(f)['products']:
+        product_json = json.dumps(product, indent=4, ensure_ascii=False)
+        tags_ohne = [tag for tag in product['tags'] if "Eigenschaften_ohne" in tag]
+        tags_ohne = [tag.split(" ")[1] for tag in tags_ohne]
+        tags_zutaten = [tag for tag in product['tags'] if "Zutaten_" in tag]
+        tags_zutaten = [tag.split("_")[1] for tag in tags_zutaten]
+        tags_geeignet = [tag for tag in product['tags'] if "Geeignet zum_" in tag]
+        tags_geeignet = [tag.split("_")[1] for tag in tags_geeignet]
+        tags_landestypisch = [tag for tag in product['tags'] if "Landestypisch für_" in tag]
+        tags_landestypisch = [tag.split("_")[1] for tag in tags_landestypisch]
+        tags_geschmack = [tag for tag in product['tags'] if "Geschmack_" in tag]
+        tags_geschmack = [tag.split("_")[1] for tag in tags_geschmack]
+        tags_passtzu = [tag for tag in product['tags'] if "Passt zu_" in tag]
+        tags_passtzu = [tag.split("_")[1] for tag in tags_passtzu]
+        tags_merkmale = [tag for tag in product['tags'] if "Merkmale_" in tag]
+        tags_merkmale = [tag.split("_")[1] for tag in tags_merkmale]
+        tags_sonstige = [tag for tag in product['tags'] if not any(sub in tag for sub in ["Eigenschaften_ohne", "Zutaten_", "Geeignet zum_", "Landestypisch für_", "Geschmack_", "Passt zu_", "Merkmale_"])]
+        tags_ohne_str = ",".join(tags_ohne) if tags_ohne else "nicht bekannt"
+        tags_zutaten = ",".join(tags_zutaten) if tags_zutaten else "nicht bekannt"
+        tags_geeignet = ",".join(tags_geeignet) if tags_geeignet else "nicht bekannt"
+        tags_landestypisch = ",".join(tags_landestypisch) if tags_landestypisch else "nicht bekannt"
+        tags_geschmack = ",".join(tags_geschmack) if tags_geschmack else "nicht bekannt"
+        tags_passtzu = ",".join(tags_passtzu) if tags_passtzu else "nicht bekannt"
+        tags_merkmale = ",".join(tags_merkmale) if tags_merkmale else "nicht bekannt"
+        tags_sonstige = ",".join(tags_sonstige) if tags_sonstige else "nicht bekannt"
+        product_string = f"""{product["title"]}; Beschreibung: {product["description"]}; Eigenschaften: Ohne: {tags_ohne_str}; Zutaten: {tags_zutaten}; Geeignet zum: {tags_geeignet}; Landestypisch für: {tags_landestypisch}; Geschmack: {tags_geschmack}; Passt zu: {tags_passtzu}; Merkmale: {tags_merkmale}; Sonstige: {tags_sonstige}; Erstellt am: {product["createdAt"]}"""
+        product_strings.append(product_string)
+product_strings
+blogs = []
+recipe_strings = []
+with open('../Data/blogs_and_recipes.json', 'r', encoding='utf-8') as f:
+    data = json.load(f)
+    for blog in data['blogs']:
+        if 'Rezepte' in blog['title']:
+            for recipe in blog['articles']:
+                new_recipe = ""
+                recipe["body"] = bs4.BeautifulSoup(recipe["body"], 'html.parser')
+                for metafield in recipe['metafields']:
+                    if metafield['namespace'] == 'recipekit':
+                        metafield['value'] = bs4.BeautifulSoup(metafield['value'], 'html.parser')
+                        value_json = json.loads(metafield['value'].text.replace(",,", ",").replace(",]", "]").replace(",}", "}").strip(","))
+                        title = value_json['recipe_title']
+                        description = value_json['recipe_description']
+                        ingredients = value_json['recipe_ingredients']
+                        category = value_json.get('recipe_category', 'Unbekannt')
+                        cuisine = value_json.get('recipe_cuisine', 'Unbekannt')
+                        ingredients = [ingredient['ingredient'] for ingredient in value_json['recipe_ingredients'] if 'ingredient' in ingredient]
+                        directions = [direction['direction'] for direction in value_json['recipe_directions']]
+                        serving_size = value_json['serving_size']
+                        prep_time = value_json['prep_time']
+                        cook_time = value_json['cook_time']
+                        rating = value_json.get('recipe_rating', 'Keine Bewertung')
+                        new_recipe = f"Titel:{title},\n{description}\nZutaten:{','.join(ingredients)},\nAnweisungen:{' '.join(directions)},\nKategorie:{category},\nKüche:{cuisine},\nPortionen:{serving_size},\nVorbereitungszeit:{prep_time},\nKochzeit:{cook_time},\nBewertung:{rating}"
+                recipe_strings.append(new_recipe)
+recipe_strings
+client = QdrantClient(":memory:") #QdrantClient("localhost:6333")
+client.set_model("sentence-transformers/all-MiniLM-L6-v2")
+client.set_sparse_model("prithivida/Splade_PP_en_v1")
+client.delete_collection(collection_name="products")
+client.create_collection(
+    collection_name="products",
+    vectors_config=client.get_fastembed_vector_params(),
+    sparse_vectors_config=client.get_fastembed_sparse_vector_params(),
+)
+client.delete_collection(collection_name="recipes")
+client.create_collection(
+    collection_name="recipes",
+    vectors_config=client.get_fastembed_vector_params(),
+    sparse_vectors_config=client.get_fastembed_sparse_vector_params(),
+)
+client.add(collection_name="products",
+        documents=product_strings)
+client.add(collection_name="recipes",
+        documents=recipe_strings)
+model_name = "LeoLM/leo-hessianai-13b-chat"
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,  # Use 4-bit quantization
+    bnb_4bit_compute_dtype=torch.float16,  # Reduce memory usage
+    bnb_4bit_use_double_quant=True,
+    llm_int8_enable_fp32_cpu_offload=True
+)
+ankerbot_model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    quantization_config=bnb_config,
+    device_map="cuda:0",
+    torch_dtype=torch.float16,
+    use_cache=True,
+    offload_folder="../offload"
+)
+ankerbot_tokenizer = AutoTokenizer.from_pretrained(model_name,
+    torch_dtype=torch.float16,
+    truncation=True,
+    padding=True, )
+prompt_format = "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
+generator = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, trust_remote_code=True) # True for flash-attn2 else False
+generator_mini = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, trust_remote_code=True) # True for flash-attn2 else False
 def generate_response(query, context, prompts, max_tokens, temperature, top_p):
     system_message_support = f"""<|im_start|>system