Spaces:

alonsosilva
/

song-finder-graph

Running

App Files Files Community

alonsosilva commited on Sep 24, 2024

Commit

a3cec77

1 Parent(s): 07b634d

Add app

Browse files

Files changed (19) hide show

Dockerfile +25 -0
app.py +234 -0
requirements.txt +7 -0
test_kuzudb/.lock +0 -0
test_kuzudb/.shadow +0 -0
test_kuzudb/.wal +0 -0
test_kuzudb/catalog.kz +0 -0
test_kuzudb/data.kz +0 -0
test_kuzudb/metadata.kz +0 -0
test_kuzudb/n-0.hindex +0 -0
test_kuzudb/n-0.hindex.ovf +0 -0
test_kuzudb/n-1.hindex +0 -0
test_kuzudb/n-1.hindex.ovf +0 -0
test_kuzudb/n-2.hindex +0 -0
test_lancedb/Songs.lance/_transactions/0-792a2d61-740e-4405-ac65-9bfd39f8045e.txn +1 -0
test_lancedb/Songs.lance/_transactions/1-11446c4d-5b76-4653-b0b5-0e515412fdcb.txn +0 -0
test_lancedb/Songs.lance/_versions/1.manifest +0 -0
test_lancedb/Songs.lance/_versions/2.manifest +0 -0
test_lancedb/Songs.lance/data/535b0fe7-3004-4639-a5c4-303489599295.lance +0 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.11
+# Set up a new user named "user" with user ID 1000 for permission
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+# Upgreade pip
+RUN pip install --no-cache-dir --upgrade pip
+COPY --chown=user requirements.txt requirements.txt
+# Install requirements
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user app.py app.py
+COPY --chown=user test_lancedb/ test_lancedb/
+COPY --chown=user test_kuzudb/ test_kuzudb/
+ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,234 @@

+# from dotenv import find_dotenv, load_dotenv
+# _ = load_dotenv(find_dotenv())
+import solara
+import polars as pl
+df = pl.read_csv(
+    "https://drive.google.com/uc?export=download&id=1uD3h7xYxr9EoZ0Ggoh99JtQXa3AxtxyU"
+)
+import string
+df = df.with_columns(
+    pl.Series("Album", [string.capwords(album) for album in df["Album"]])
+)
+df = df.with_columns(pl.Series("Song", [string.capwords(song) for song in df["Song"]]))
+df = df.with_columns(pl.col("Lyrics").fill_null("None"))
+df = df.with_columns(
+    text=pl.lit("# ")
+    + pl.col("Album")
+    + pl.lit(": ")
+    + pl.col("Song")
+    + pl.lit("\n\n")
+    + pl.col("Lyrics")
+)
+import shutil
+import lancedb
+shutil.rmtree("test_lancedb", ignore_errors=True)
+db = lancedb.connect("test_lancedb")
+from lancedb.embeddings import get_registry
+embeddings = (
+    get_registry()
+    .get("sentence-transformers")
+    .create(name="TaylorAI/gte-tiny", device="cpu")
+)
+from lancedb.pydantic import LanceModel, Vector
+class Songs(LanceModel):
+    Song: str
+    Lyrics: str
+    Album: str
+    Artist: str
+    text: str = embeddings.SourceField()
+    vector: Vector(embeddings.ndims()) = embeddings.VectorField()
+table = db.create_table("Songs", schema=Songs)
+table.add(data=df)
+import os
+from typing import Optional
+from langchain_community.chat_models import ChatOpenAI
+class ChatOpenRouter(ChatOpenAI):
+    openai_api_base: str
+    openai_api_key: str
+    model_name: str
+    def __init__(
+        self,
+        model_name: str,
+        openai_api_key: Optional[str] = None,
+        openai_api_base: str = "https://openrouter.ai/api/v1",
+        **kwargs,
+    ):
+        openai_api_key = os.getenv("OPENROUTER_API_KEY")
+        super().__init__(
+            openai_api_base=openai_api_base,
+            openai_api_key=openai_api_key,
+            model_name=model_name,
+            **kwargs,
+        )
+llm_openrouter = ChatOpenRouter(model_name="meta-llama/llama-3.1-405b-instruct", temperature=0.1)
+def get_relevant_texts(query, table=table):
+    results = (
+        table.search(query)
+             .limit(5)
+             .to_polars()
+    )
+    return " ".join([results["text"][i] + "\n\n---\n\n" for i in range(5)])
+def generate_prompt(query, table=table):
+    return (
+        "Answer the question based only on the following context:\n\n"
+        + get_relevant_texts(query, table)
+        + "\n\nQuestion: "
+        + query
+    )
+def generate_response(query, table=table):
+    prompt = generate_prompt(query, table)
+    response = llm_openrouter.invoke(input=prompt)
+    return response.content
+import kuzu
+shutil.rmtree("test_kuzudb", ignore_errors=True)
+db = kuzu.Database("test_kuzudb")
+conn = kuzu.Connection(db)
+# Create schema
+conn.execute("CREATE NODE TABLE ARTIST(name STRING, PRIMARY KEY (name))")
+conn.execute("CREATE NODE TABLE ALBUM(name STRING, PRIMARY KEY (name))")
+conn.execute("CREATE NODE TABLE SONG(ID SERIAL, name STRING, lyrics STRING, PRIMARY KEY(ID))")
+conn.execute("CREATE REL TABLE IN_ALBUM(FROM SONG TO ALBUM)")
+conn.execute("CREATE REL TABLE FROM_ARTIST(FROM ALBUM TO ARTIST)");
+# Insert nodes
+for artist in df["Artist"].unique():
+    conn.execute(f"CREATE (artist:ARTIST {{name: '{artist}'}})")
+for album in df["Album"].unique():
+    conn.execute(f"""CREATE (album:ALBUM {{name: "{album}"}})""")
+for song, lyrics in df.select(["Song", "text"]).unique().rows():
+    replaced_lyrics = lyrics.replace('"', "'")
+    conn.execute(
+        f"""CREATE (song:SONG {{name: "{song}", lyrics: "{replaced_lyrics}"}})"""
+    )
+# Insert edges
+for song, album, lyrics in df.select(["Song", "Album", "text"]).rows():
+    replaced_lyrics = lyrics.replace('"', "'")
+    conn.execute(
+        f"""
+        MATCH (song:SONG), (album:ALBUM)
+        WHERE song.name = "{song}" AND song.lyrics = "{replaced_lyrics}" AND album.name = "{album}"
+        CREATE (song)-[:IN_ALBUM]->(album)
+        """
+    )
+for album, artist in df.select(["Album", "Artist"]).unique().rows():
+  conn.execute(
+    f"""
+    MATCH (album:ALBUM), (artist:ARTIST) WHERE album.name = "{album}" AND artist.name = "{artist}"
+    CREATE (album)-[:FROM_ARTIST]->(artist)
+    """
+  )
+response = conn.execute(
+    """
+    MATCH (a:ALBUM {name: 'The Black Album'})<-[:IN_ALBUM]-(s:SONG) RETURN s.name
+    """
+  )
+df_response = response.get_as_pl()
+from langchain_community.graphs import KuzuGraph
+graph = KuzuGraph(db)
+def generate_kuzu_prompt(user_query):
+    return """Task: Generate Kùzu Cypher statement to query a graph database.
+Instructions:
+Generate the Kùzu dialect of Cypher with the following rules in mind:
+1. Do not omit the relationship pattern. Always use `()-[]->()` instead of `()->()`.
+2. Do not include triple backticks ``` in your response. Return only Cypher.
+3. Do not return any notes or comments in your response.
+Use only the provided relationship types and properties in the schema.
+Do not use any other relationship types or properties that are not provided.
+Schema:\n""" + graph.get_schema + """\nExample:
+The question is:\n"Which songs does the load album have?"
+MATCH (a:ALBUM {name: 'Load'})<-[:IN_ALBUM]-(s:SONG) RETURN s.name
+Note: Do not include any explanations or apologies in your responses.
+Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
+Do not include any text except the generated Cypher statement.
+The question is:\n""" + user_query
+def generate_final_prompt(query,cypher_query,col_name,_values):
+    return f"""You are an assistant that helps to form nice and human understandable answers.
+The information part contains the provided information that you must use to construct an answer.
+The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
+Make the answer sound as a response to the question. Do not mention that you based the result on the given information.
+Here is an example:
+Question: Which managers own Neo4j stocks?
+Context:[manager:CTL LLC, manager:JANE STREET GROUP LLC]
+Helpful Answer: CTL LLC, JANE STREET GROUP LLC owns Neo4j stocks.
+Follow this example when generating answers.
+If the provided information is empty, say that you don't know the answer.
+Query:\n{cypher_query}
+Information:
+[{col_name}: {_values}]
+Question: {query}
+Helpful Answer:
+"""
+def generate_kg_response(query):
+    prompt = generate_kuzu_prompt(query)
+    cypher_query_response = llm_openrouter.invoke(input=prompt)
+    cypher_query = cypher_query_response.content
+    response = conn.execute(
+        f"""
+        {cypher_query}
+        """
+    )
+    df = response.get_as_pl()
+    col_name = df.columns[0]
+    _values = df[col_name].to_list()
+    final_prompt = generate_final_prompt(query,cypher_query,col_name,_values)
+    final_response = llm_openrouter.invoke(input=final_prompt)
+    final_response = final_response.content
+    return final_response, cypher_query
+query = solara.reactive("How many songs does the black album have?")
+@solara.component
+def Page():
+    with solara.Column(margin=10):
+        solara.Markdown("# Metallica Song Finder graph-only")
+        solara.InputText("Enter some query:", query, continuous_update=False)
+        if query.value != "":
+            response, cypher_query = generate_kg_response(query.value)
+            solara.Markdown("## Answer:")
+            solara.Markdown(response)
+            solara.Markdown("## Cypher query:")
+            solara.Markdown(cypher_query)

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+solara==1.39.0
+polars==1.7.1
+lancedb==0.13.0
+sentence-transformers==3.1.1
+langchain-community==0.3.0
+openai==1.47.1
+kuzu==0.6.0

test_kuzudb/.lock ADDED Viewed

File without changes

test_kuzudb/.shadow ADDED Viewed

File without changes

test_kuzudb/.wal ADDED Viewed

Binary file (191 kB). View file

test_kuzudb/catalog.kz ADDED Viewed

Binary file (76 Bytes). View file

test_kuzudb/data.kz ADDED Viewed

File without changes

test_kuzudb/metadata.kz ADDED Viewed

File without changes

test_kuzudb/n-0.hindex ADDED Viewed

File without changes

test_kuzudb/n-0.hindex.ovf ADDED Viewed

File without changes

test_kuzudb/n-1.hindex ADDED Viewed

File without changes

test_kuzudb/n-1.hindex.ovf ADDED Viewed

File without changes

test_kuzudb/n-2.hindex ADDED Viewed

File without changes

test_lancedb/Songs.lance/_transactions/0-792a2d61-740e-4405-ac65-9bfd39f8045e.txn ADDED Viewed

	@@ -0,0 +1 @@


1	+ $792a2d61-740e-4405-ac65-9bfd39f8045e��Song ��string8Lyrics ��string8Album ��string8Artist ��string8text ��string82vector ��fixed_size_list:float:3848

test_lancedb/Songs.lance/_transactions/1-11446c4d-5b76-4653-b0b5-0e515412fdcb.txn ADDED Viewed

Binary file (104 Bytes). View file

test_lancedb/Songs.lance/_versions/1.manifest ADDED Viewed

Binary file (616 Bytes). View file

test_lancedb/Songs.lance/_versions/2.manifest ADDED Viewed

Binary file (676 Bytes). View file

test_lancedb/Songs.lance/data/535b0fe7-3004-4639-a5c4-303489599295.lance ADDED Viewed

Binary file (521 kB). View file