Spaces:
Running
Running
Commit
·
a3cec77
1
Parent(s):
07b634d
Add app
Browse files- Dockerfile +25 -0
- app.py +234 -0
- requirements.txt +7 -0
- test_kuzudb/.lock +0 -0
- test_kuzudb/.shadow +0 -0
- test_kuzudb/.wal +0 -0
- test_kuzudb/catalog.kz +0 -0
- test_kuzudb/data.kz +0 -0
- test_kuzudb/metadata.kz +0 -0
- test_kuzudb/n-0.hindex +0 -0
- test_kuzudb/n-0.hindex.ovf +0 -0
- test_kuzudb/n-1.hindex +0 -0
- test_kuzudb/n-1.hindex.ovf +0 -0
- test_kuzudb/n-2.hindex +0 -0
- test_lancedb/Songs.lance/_transactions/0-792a2d61-740e-4405-ac65-9bfd39f8045e.txn +1 -0
- test_lancedb/Songs.lance/_transactions/1-11446c4d-5b76-4653-b0b5-0e515412fdcb.txn +0 -0
- test_lancedb/Songs.lance/_versions/1.manifest +0 -0
- test_lancedb/Songs.lance/_versions/2.manifest +0 -0
- test_lancedb/Songs.lance/data/535b0fe7-3004-4639-a5c4-303489599295.lance +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11
|
| 2 |
+
|
| 3 |
+
# Set up a new user named "user" with user ID 1000 for permission
|
| 4 |
+
RUN useradd -m -u 1000 user
|
| 5 |
+
# Switch to the "user" user
|
| 6 |
+
USER user
|
| 7 |
+
# Set home to the user's home directory
|
| 8 |
+
ENV HOME=/home/user \
|
| 9 |
+
PATH=/home/user/.local/bin:$PATH
|
| 10 |
+
|
| 11 |
+
# Upgreade pip
|
| 12 |
+
RUN pip install --no-cache-dir --upgrade pip
|
| 13 |
+
|
| 14 |
+
COPY --chown=user requirements.txt requirements.txt
|
| 15 |
+
|
| 16 |
+
# Install requirements
|
| 17 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 18 |
+
|
| 19 |
+
COPY --chown=user app.py app.py
|
| 20 |
+
|
| 21 |
+
COPY --chown=user test_lancedb/ test_lancedb/
|
| 22 |
+
|
| 23 |
+
COPY --chown=user test_kuzudb/ test_kuzudb/
|
| 24 |
+
|
| 25 |
+
ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]
|
app.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# from dotenv import find_dotenv, load_dotenv
|
| 2 |
+
# _ = load_dotenv(find_dotenv())
|
| 3 |
+
|
| 4 |
+
import solara
|
| 5 |
+
|
| 6 |
+
import polars as pl
|
| 7 |
+
|
| 8 |
+
df = pl.read_csv(
|
| 9 |
+
"https://drive.google.com/uc?export=download&id=1uD3h7xYxr9EoZ0Ggoh99JtQXa3AxtxyU"
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
import string
|
| 13 |
+
|
| 14 |
+
df = df.with_columns(
|
| 15 |
+
pl.Series("Album", [string.capwords(album) for album in df["Album"]])
|
| 16 |
+
)
|
| 17 |
+
df = df.with_columns(pl.Series("Song", [string.capwords(song) for song in df["Song"]]))
|
| 18 |
+
df = df.with_columns(pl.col("Lyrics").fill_null("None"))
|
| 19 |
+
|
| 20 |
+
df = df.with_columns(
|
| 21 |
+
text=pl.lit("# ")
|
| 22 |
+
+ pl.col("Album")
|
| 23 |
+
+ pl.lit(": ")
|
| 24 |
+
+ pl.col("Song")
|
| 25 |
+
+ pl.lit("\n\n")
|
| 26 |
+
+ pl.col("Lyrics")
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
import shutil
|
| 30 |
+
import lancedb
|
| 31 |
+
|
| 32 |
+
shutil.rmtree("test_lancedb", ignore_errors=True)
|
| 33 |
+
db = lancedb.connect("test_lancedb")
|
| 34 |
+
|
| 35 |
+
from lancedb.embeddings import get_registry
|
| 36 |
+
|
| 37 |
+
embeddings = (
|
| 38 |
+
get_registry()
|
| 39 |
+
.get("sentence-transformers")
|
| 40 |
+
.create(name="TaylorAI/gte-tiny", device="cpu")
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
from lancedb.pydantic import LanceModel, Vector
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class Songs(LanceModel):
|
| 47 |
+
Song: str
|
| 48 |
+
Lyrics: str
|
| 49 |
+
Album: str
|
| 50 |
+
Artist: str
|
| 51 |
+
text: str = embeddings.SourceField()
|
| 52 |
+
vector: Vector(embeddings.ndims()) = embeddings.VectorField()
|
| 53 |
+
|
| 54 |
+
table = db.create_table("Songs", schema=Songs)
|
| 55 |
+
table.add(data=df)
|
| 56 |
+
|
| 57 |
+
import os
|
| 58 |
+
from typing import Optional
|
| 59 |
+
|
| 60 |
+
from langchain_community.chat_models import ChatOpenAI
|
| 61 |
+
|
| 62 |
+
class ChatOpenRouter(ChatOpenAI):
|
| 63 |
+
openai_api_base: str
|
| 64 |
+
openai_api_key: str
|
| 65 |
+
model_name: str
|
| 66 |
+
|
| 67 |
+
def __init__(
|
| 68 |
+
self,
|
| 69 |
+
model_name: str,
|
| 70 |
+
openai_api_key: Optional[str] = None,
|
| 71 |
+
openai_api_base: str = "https://openrouter.ai/api/v1",
|
| 72 |
+
**kwargs,
|
| 73 |
+
):
|
| 74 |
+
openai_api_key = os.getenv("OPENROUTER_API_KEY")
|
| 75 |
+
super().__init__(
|
| 76 |
+
openai_api_base=openai_api_base,
|
| 77 |
+
openai_api_key=openai_api_key,
|
| 78 |
+
model_name=model_name,
|
| 79 |
+
**kwargs,
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
llm_openrouter = ChatOpenRouter(model_name="meta-llama/llama-3.1-405b-instruct", temperature=0.1)
|
| 83 |
+
|
| 84 |
+
def get_relevant_texts(query, table=table):
|
| 85 |
+
results = (
|
| 86 |
+
table.search(query)
|
| 87 |
+
.limit(5)
|
| 88 |
+
.to_polars()
|
| 89 |
+
)
|
| 90 |
+
return " ".join([results["text"][i] + "\n\n---\n\n" for i in range(5)])
|
| 91 |
+
|
| 92 |
+
def generate_prompt(query, table=table):
|
| 93 |
+
return (
|
| 94 |
+
"Answer the question based only on the following context:\n\n"
|
| 95 |
+
+ get_relevant_texts(query, table)
|
| 96 |
+
+ "\n\nQuestion: "
|
| 97 |
+
+ query
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
def generate_response(query, table=table):
|
| 101 |
+
prompt = generate_prompt(query, table)
|
| 102 |
+
response = llm_openrouter.invoke(input=prompt)
|
| 103 |
+
return response.content
|
| 104 |
+
|
| 105 |
+
import kuzu
|
| 106 |
+
|
| 107 |
+
shutil.rmtree("test_kuzudb", ignore_errors=True)
|
| 108 |
+
db = kuzu.Database("test_kuzudb")
|
| 109 |
+
conn = kuzu.Connection(db)
|
| 110 |
+
# Create schema
|
| 111 |
+
conn.execute("CREATE NODE TABLE ARTIST(name STRING, PRIMARY KEY (name))")
|
| 112 |
+
conn.execute("CREATE NODE TABLE ALBUM(name STRING, PRIMARY KEY (name))")
|
| 113 |
+
conn.execute("CREATE NODE TABLE SONG(ID SERIAL, name STRING, lyrics STRING, PRIMARY KEY(ID))")
|
| 114 |
+
conn.execute("CREATE REL TABLE IN_ALBUM(FROM SONG TO ALBUM)")
|
| 115 |
+
conn.execute("CREATE REL TABLE FROM_ARTIST(FROM ALBUM TO ARTIST)");
|
| 116 |
+
|
| 117 |
+
# Insert nodes
|
| 118 |
+
for artist in df["Artist"].unique():
|
| 119 |
+
conn.execute(f"CREATE (artist:ARTIST {{name: '{artist}'}})")
|
| 120 |
+
|
| 121 |
+
for album in df["Album"].unique():
|
| 122 |
+
conn.execute(f"""CREATE (album:ALBUM {{name: "{album}"}})""")
|
| 123 |
+
|
| 124 |
+
for song, lyrics in df.select(["Song", "text"]).unique().rows():
|
| 125 |
+
replaced_lyrics = lyrics.replace('"', "'")
|
| 126 |
+
conn.execute(
|
| 127 |
+
f"""CREATE (song:SONG {{name: "{song}", lyrics: "{replaced_lyrics}"}})"""
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# Insert edges
|
| 131 |
+
for song, album, lyrics in df.select(["Song", "Album", "text"]).rows():
|
| 132 |
+
replaced_lyrics = lyrics.replace('"', "'")
|
| 133 |
+
conn.execute(
|
| 134 |
+
f"""
|
| 135 |
+
MATCH (song:SONG), (album:ALBUM)
|
| 136 |
+
WHERE song.name = "{song}" AND song.lyrics = "{replaced_lyrics}" AND album.name = "{album}"
|
| 137 |
+
CREATE (song)-[:IN_ALBUM]->(album)
|
| 138 |
+
"""
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
for album, artist in df.select(["Album", "Artist"]).unique().rows():
|
| 142 |
+
conn.execute(
|
| 143 |
+
f"""
|
| 144 |
+
MATCH (album:ALBUM), (artist:ARTIST) WHERE album.name = "{album}" AND artist.name = "{artist}"
|
| 145 |
+
CREATE (album)-[:FROM_ARTIST]->(artist)
|
| 146 |
+
"""
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
response = conn.execute(
|
| 150 |
+
"""
|
| 151 |
+
MATCH (a:ALBUM {name: 'The Black Album'})<-[:IN_ALBUM]-(s:SONG) RETURN s.name
|
| 152 |
+
"""
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
df_response = response.get_as_pl()
|
| 156 |
+
|
| 157 |
+
from langchain_community.graphs import KuzuGraph
|
| 158 |
+
|
| 159 |
+
graph = KuzuGraph(db)
|
| 160 |
+
|
| 161 |
+
def generate_kuzu_prompt(user_query):
|
| 162 |
+
return """Task: Generate Kùzu Cypher statement to query a graph database.
|
| 163 |
+
|
| 164 |
+
Instructions:
|
| 165 |
+
Generate the Kùzu dialect of Cypher with the following rules in mind:
|
| 166 |
+
1. Do not omit the relationship pattern. Always use `()-[]->()` instead of `()->()`.
|
| 167 |
+
2. Do not include triple backticks ``` in your response. Return only Cypher.
|
| 168 |
+
3. Do not return any notes or comments in your response.
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
Use only the provided relationship types and properties in the schema.
|
| 172 |
+
Do not use any other relationship types or properties that are not provided.
|
| 173 |
+
|
| 174 |
+
Schema:\n""" + graph.get_schema + """\nExample:
|
| 175 |
+
The question is:\n"Which songs does the load album have?"
|
| 176 |
+
MATCH (a:ALBUM {name: 'Load'})<-[:IN_ALBUM]-(s:SONG) RETURN s.name
|
| 177 |
+
|
| 178 |
+
Note: Do not include any explanations or apologies in your responses.
|
| 179 |
+
Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
|
| 180 |
+
Do not include any text except the generated Cypher statement.
|
| 181 |
+
|
| 182 |
+
The question is:\n""" + user_query
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def generate_final_prompt(query,cypher_query,col_name,_values):
|
| 186 |
+
return f"""You are an assistant that helps to form nice and human understandable answers.
|
| 187 |
+
The information part contains the provided information that you must use to construct an answer.
|
| 188 |
+
The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it.
|
| 189 |
+
Make the answer sound as a response to the question. Do not mention that you based the result on the given information.
|
| 190 |
+
Here is an example:
|
| 191 |
+
|
| 192 |
+
Question: Which managers own Neo4j stocks?
|
| 193 |
+
Context:[manager:CTL LLC, manager:JANE STREET GROUP LLC]
|
| 194 |
+
Helpful Answer: CTL LLC, JANE STREET GROUP LLC owns Neo4j stocks.
|
| 195 |
+
|
| 196 |
+
Follow this example when generating answers.
|
| 197 |
+
If the provided information is empty, say that you don't know the answer.
|
| 198 |
+
Query:\n{cypher_query}
|
| 199 |
+
Information:
|
| 200 |
+
[{col_name}: {_values}]
|
| 201 |
+
|
| 202 |
+
Question: {query}
|
| 203 |
+
Helpful Answer:
|
| 204 |
+
"""
|
| 205 |
+
|
| 206 |
+
def generate_kg_response(query):
|
| 207 |
+
prompt = generate_kuzu_prompt(query)
|
| 208 |
+
cypher_query_response = llm_openrouter.invoke(input=prompt)
|
| 209 |
+
cypher_query = cypher_query_response.content
|
| 210 |
+
response = conn.execute(
|
| 211 |
+
f"""
|
| 212 |
+
{cypher_query}
|
| 213 |
+
"""
|
| 214 |
+
)
|
| 215 |
+
df = response.get_as_pl()
|
| 216 |
+
col_name = df.columns[0]
|
| 217 |
+
_values = df[col_name].to_list()
|
| 218 |
+
final_prompt = generate_final_prompt(query,cypher_query,col_name,_values)
|
| 219 |
+
final_response = llm_openrouter.invoke(input=final_prompt)
|
| 220 |
+
final_response = final_response.content
|
| 221 |
+
return final_response, cypher_query
|
| 222 |
+
|
| 223 |
+
query = solara.reactive("How many songs does the black album have?")
|
| 224 |
+
@solara.component
|
| 225 |
+
def Page():
|
| 226 |
+
with solara.Column(margin=10):
|
| 227 |
+
solara.Markdown("# Metallica Song Finder graph-only")
|
| 228 |
+
solara.InputText("Enter some query:", query, continuous_update=False)
|
| 229 |
+
if query.value != "":
|
| 230 |
+
response, cypher_query = generate_kg_response(query.value)
|
| 231 |
+
solara.Markdown("## Answer:")
|
| 232 |
+
solara.Markdown(response)
|
| 233 |
+
solara.Markdown("## Cypher query:")
|
| 234 |
+
solara.Markdown(cypher_query)
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
solara==1.39.0
|
| 2 |
+
polars==1.7.1
|
| 3 |
+
lancedb==0.13.0
|
| 4 |
+
sentence-transformers==3.1.1
|
| 5 |
+
langchain-community==0.3.0
|
| 6 |
+
openai==1.47.1
|
| 7 |
+
kuzu==0.6.0
|
test_kuzudb/.lock
ADDED
|
File without changes
|
test_kuzudb/.shadow
ADDED
|
File without changes
|
test_kuzudb/.wal
ADDED
|
Binary file (191 kB). View file
|
|
|
test_kuzudb/catalog.kz
ADDED
|
Binary file (76 Bytes). View file
|
|
|
test_kuzudb/data.kz
ADDED
|
File without changes
|
test_kuzudb/metadata.kz
ADDED
|
File without changes
|
test_kuzudb/n-0.hindex
ADDED
|
File without changes
|
test_kuzudb/n-0.hindex.ovf
ADDED
|
File without changes
|
test_kuzudb/n-1.hindex
ADDED
|
File without changes
|
test_kuzudb/n-1.hindex.ovf
ADDED
|
File without changes
|
test_kuzudb/n-2.hindex
ADDED
|
File without changes
|
test_lancedb/Songs.lance/_transactions/0-792a2d61-740e-4405-ac65-9bfd39f8045e.txn
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
$792a2d61-740e-4405-ac65-9bfd39f8045e��Song ���������*string8Lyrics ���������*string8Album ���������*string8Artist ���������*string8text ���������*string82vector ���������*fixed_size_list:float:3848
|
test_lancedb/Songs.lance/_transactions/1-11446c4d-5b76-4653-b0b5-0e515412fdcb.txn
ADDED
|
Binary file (104 Bytes). View file
|
|
|
test_lancedb/Songs.lance/_versions/1.manifest
ADDED
|
Binary file (616 Bytes). View file
|
|
|
test_lancedb/Songs.lance/_versions/2.manifest
ADDED
|
Binary file (676 Bytes). View file
|
|
|
test_lancedb/Songs.lance/data/535b0fe7-3004-4639-a5c4-303489599295.lance
ADDED
|
Binary file (521 kB). View file
|
|
|