import os import numpy as np from dotenv import load_dotenv from openai import OpenAI import networkx as nx from sklearn.metrics.pairwise import cosine_similarity # Initialize OpenAI client load_dotenv(override=True) client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) # Load graph from GML G = nx.read_gml("graph.gml") enodes = list(G.nodes) embeddings = np.array([G.nodes[n]['embedding'] for n in enodes]) def query_graph(question, top_k=5): # Embed question emb_resp = client.embeddings.create( model="text-embedding-3-large", input=question ) q_vec = emb_resp.data[0].embedding sims = cosine_similarity([q_vec], embeddings)[0] idxs = sims.argsort()[::-1][:top_k] # Gather context and sources context = [G.nodes[enodes[i]]['text'] for i in idxs] sources = list({G.nodes[enodes[i]]['source'] for i in idxs}) # Generate answer prompt = ( "Use the following context to answer the question:\n\n" + "\n\n---\n\n".join(context) + f"\n\nQuestion: {question}\nAnswer:") chat_resp = client.chat.completions.create( model="gpt-4o-mini", messages=[ {"role": "system", "content": "You are a helpful assistant for XR safety training."}, {"role": "user", "content": prompt} ] ) answer = chat_resp.choices[0].message.content return answer, sources # Test queries test_questions = [ "What are general machine guarding requirements?", "Explain the key steps in lockout/tagout procedures." ] for q in test_questions: ans, srcs = query_graph(q) print(f"Q: {q}\nA: {ans}\nSources: {srcs}\n")