Spaces:

Bellok
/

warbler-cda

Running on Zero

jmeyer1980 commited on 14 days ago

Commit

2133289

1 Parent(s): 9380cea

Fix syntax errors in app.py preventing Gradio app startup on HF Spaces

- Fix unterminated f-string literals that were split across multiple lines
- Consolidate f-strings onto single lines to resolve SyntaxError
- This restores the interactive UI for the Warbler CDA demo

Files changed (13) hide show

app.py +31 -53
convert_to_jsonl.py +14 -12
test_embedding_integration.py +16 -13
warbler_cda/api/cli.py +48 -73
warbler_cda/api/service.py +49 -56
warbler_cda/conflict_detector.py +39 -64
warbler_cda/evaporation.py +39 -76
warbler_cda/pack_loader.py +25 -36
warbler_cda/retrieval_api.py +66 -120
warbler_cda/stat7_entity.py +11 -28
warbler_cda/stat7_experiments.py +22 -30
warbler_cda/stat7_rag_bridge.py +19 -33
warbler_cda/utils/load_warbler_packs.py +12 -24

app.py CHANGED Viewed

@@ -66,12 +66,9 @@ class PackManager:
         self.cache_dir = Path.home() / ".warbler_cda" / "cache"
         self.cache_dir.mkdir(parents=True, exist_ok=True)
         self.metadata_file = self.cache_dir / "pack_metadata.json"
-        self.skip_cache = os.getenv(
-            "WARBLER_SKIP_PACK_CACHE", "").lower() == "true"
-        self.sample_only = os.getenv(
-            "WARBLER_SAMPLE_ONLY", "").lower() == "true"
-        self.ingest_packs = os.getenv(
-            "WARBLER_INGEST_PACKS", "true").lower() == "true"
     def _load_metadata(self) -> Optional[Dict]:
         if not self.metadata_file.exists():
@@ -146,8 +143,7 @@ api = None
 if WARBLER_AVAILABLE:
     try:
         embedding_provider = EmbeddingProviderFactory.get_default_provider()
-        semantic_anchors = SemanticAnchorGraph(
-            embedding_provider=embedding_provider)
         stat7_bridge = STAT7RAGBridge()
         api = RetrievalAPI(
@@ -176,31 +172,24 @@ if WARBLER_AVAILABLE:
             pack_loader = PackLoader()
             pack_docs = pack_loader.discover_documents()
-            if pack_docs and pack_manager.should_ingest_packs(
-                    api, len(pack_docs)):
-                print(
-                    f"[INFO] Ingesting {len(pack_docs)} documents from Warbler packs...")
                 for doc in pack_docs:
-                    success = api.add_document(
-                        doc["id"], doc["content"], doc["metadata"])
                     if not success:
                         print(f"[WARN] Failed to add document {doc['id']}")
                 packs_loaded = len(pack_docs)
                 pack_manager.mark_packs_ingested(1, packs_loaded)
-                print(
-                    f"[OK] Loaded {packs_loaded} documents from Warbler packs")
             elif pack_docs:
                 packs_loaded = len(pack_docs)
-                print(
-                    f"[INFO] Using cached pack data ({packs_loaded} documents)")
             else:
-                print(
-                    "[INFO] No Warbler packs found. Using sample documents instead.")
                 for doc in SAMPLE_DOCS:
-                    api.add_document(
-                        doc["id"], doc["content"], doc["metadata"])
                 packs_loaded = len(SAMPLE_DOCS)
                 print(f"[OK] Loaded {packs_loaded} sample documents")
@@ -252,14 +241,14 @@ def query_warbler(
             weight_stat7=weight_stat7,
         )
-        print(
-            f"DEBUG: Query created - ID: {query.query_id}, Text: {query_text}")
         # Execute query
         assembly = api.retrieve_context(query)
         print(
-            f"DEBUG: Retrieved {len(assembly.results)} results, Assembly ID: {assembly.assembly_id}")
         elapsed_ms = (time.time() - start_time) * 1000
@@ -323,11 +312,7 @@ def query_warbler(
         return f"Error: {str(e)}", json.dumps({"error": str(e)}, indent=2)
-def add_document(
-        doc_id: str,
-        content: str,
-        realm_type: str,
-        realm_label: str) -> str:
     """Add a new document to the system"""
     if not WARBLER_AVAILABLE or not api:
         return "Warbler CDA not available."
@@ -377,8 +362,7 @@ def get_system_stats() -> str:
             metrics['retrieval_metrics']['hybrid_queries']}\n\n"
         stats += "## Quality Distribution\n\n"
-        for quality, count in metrics["retrieval_metrics"]["quality_distribution"].items(
-        ):
             stats += f"- {quality.capitalize()}: {count}\n"
         return stats
@@ -413,19 +397,17 @@ with gr.Blocks(title="Warbler CDA - RAG System Demo", theme=gr.themes.Soft()) as
                 with gr.Row():
                     max_results = gr.Slider(
-                        minimum=1, maximum=10, value=5, step=1, label="Max Results")
-                    use_hybrid = gr.Checkbox(
-                        label="Enable STAT7 Hybrid Scoring", value=True)
                 with gr.Row():
                     weight_semantic = gr.Slider(
-                        minimum=0.0, maximum=1.0, value=0.6, step=0.1, label="Semantic Weight")
                     weight_stat7 = gr.Slider(
-                        minimum=0.0,
-                        maximum=1.0,
-                        value=0.4,
-                        step=0.1,
-                        label="STAT7 Weight")
                 query_btn = gr.Button("Search", variant="primary")
@@ -449,38 +431,34 @@ with gr.Blocks(title="Warbler CDA - RAG System Demo", theme=gr.themes.Soft()) as
         query_btn.click(
             fn=query_warbler,
-            inputs=[query_input, max_results, use_hybrid,
-                    weight_semantic, weight_stat7],
             outputs=[results_output, metrics_output],
         )
     with gr.Tab("Add Document"):
         with gr.Row():
             with gr.Column():
-                doc_id_input = gr.Textbox(
-                    label="Document ID", placeholder="unique_doc_id")
                 content_input = gr.Textbox(
-                    label="Content",
-                    placeholder="Enter document content...",
-                    lines=5)
                 with gr.Row():
                     realm_type_input = gr.Dropdown(
-                        choices=["wisdom", "technical",
-                                 "narrative", "pattern", "data"],
                         value="wisdom",
                         label="Realm Type",
                     )
                     realm_label_input = gr.Textbox(
-                        label="Realm Label", placeholder="e.g., philosophy, documentation")
                 add_btn = gr.Button("Add Document", variant="primary")
                 add_output = gr.Textbox(label="Status", lines=3)
                 add_btn.click(
                     fn=add_document,
-                    inputs=[doc_id_input, content_input,
-                            realm_type_input, realm_label_input],
                     outputs=add_output,
                 )

         self.cache_dir = Path.home() / ".warbler_cda" / "cache"
         self.cache_dir.mkdir(parents=True, exist_ok=True)
         self.metadata_file = self.cache_dir / "pack_metadata.json"
+        self.skip_cache = os.getenv("WARBLER_SKIP_PACK_CACHE", "").lower() == "true"
+        self.sample_only = os.getenv("WARBLER_SAMPLE_ONLY", "").lower() == "true"
+        self.ingest_packs = os.getenv("WARBLER_INGEST_PACKS", "true").lower() == "true"
     def _load_metadata(self) -> Optional[Dict]:
         if not self.metadata_file.exists():
 if WARBLER_AVAILABLE:
     try:
         embedding_provider = EmbeddingProviderFactory.get_default_provider()
+        semantic_anchors = SemanticAnchorGraph(embedding_provider=embedding_provider)
         stat7_bridge = STAT7RAGBridge()
         api = RetrievalAPI(
             pack_loader = PackLoader()
             pack_docs = pack_loader.discover_documents()
+            if pack_docs and pack_manager.should_ingest_packs(api, len(pack_docs)):
+                print(f"[INFO] Ingesting {len(pack_docs)} documents from Warbler packs...")
                 for doc in pack_docs:
+                    success = api.add_document(doc["id"], doc["content"], doc["metadata"])
                     if not success:
                         print(f"[WARN] Failed to add document {doc['id']}")
                 packs_loaded = len(pack_docs)
                 pack_manager.mark_packs_ingested(1, packs_loaded)
+                print(f"[OK] Loaded {packs_loaded} documents from Warbler packs")
             elif pack_docs:
                 packs_loaded = len(pack_docs)
+                print(f"[INFO] Using cached pack data ({packs_loaded} documents)")
             else:
+                print("[INFO] No Warbler packs found. Using sample documents instead.")
                 for doc in SAMPLE_DOCS:
+                    api.add_document(doc["id"], doc["content"], doc["metadata"])
                 packs_loaded = len(SAMPLE_DOCS)
                 print(f"[OK] Loaded {packs_loaded} sample documents")
             weight_stat7=weight_stat7,
         )
+        print(f"DEBUG: Query created - ID: {query.query_id}, Text: {query_text}")
         # Execute query
         assembly = api.retrieve_context(query)
         print(
+            f"DEBUG: Retrieved {len(assembly.results)} results, Assembly ID: {assembly.assembly_id}"
+        )
         elapsed_ms = (time.time() - start_time) * 1000
         return f"Error: {str(e)}", json.dumps({"error": str(e)}, indent=2)
+def add_document(doc_id: str, content: str, realm_type: str, realm_label: str) -> str:
     """Add a new document to the system"""
     if not WARBLER_AVAILABLE or not api:
         return "Warbler CDA not available."
             metrics['retrieval_metrics']['hybrid_queries']}\n\n"
         stats += "## Quality Distribution\n\n"
+        for quality, count in metrics["retrieval_metrics"]["quality_distribution"].items():
             stats += f"- {quality.capitalize()}: {count}\n"
         return stats
                 with gr.Row():
                     max_results = gr.Slider(
+                        minimum=1, maximum=10, value=5, step=1, label="Max Results"
+                    )
+                    use_hybrid = gr.Checkbox(label="Enable STAT7 Hybrid Scoring", value=True)
                 with gr.Row():
                     weight_semantic = gr.Slider(
+                        minimum=0.0, maximum=1.0, value=0.6, step=0.1, label="Semantic Weight"
+                    )
                     weight_stat7 = gr.Slider(
+                        minimum=0.0, maximum=1.0, value=0.4, step=0.1, label="STAT7 Weight"
+                    )
                 query_btn = gr.Button("Search", variant="primary")
         query_btn.click(
             fn=query_warbler,
+            inputs=[query_input, max_results, use_hybrid, weight_semantic, weight_stat7],
             outputs=[results_output, metrics_output],
         )
     with gr.Tab("Add Document"):
         with gr.Row():
             with gr.Column():
+                doc_id_input = gr.Textbox(label="Document ID", placeholder="unique_doc_id")
                 content_input = gr.Textbox(
+                    label="Content", placeholder="Enter document content...", lines=5
+                )
                 with gr.Row():
                     realm_type_input = gr.Dropdown(
+                        choices=["wisdom", "technical", "narrative", "pattern", "data"],
                         value="wisdom",
                         label="Realm Type",
                     )
                     realm_label_input = gr.Textbox(
+                        label="Realm Label", placeholder="e.g., philosophy, documentation"
+                    )
                 add_btn = gr.Button("Add Document", variant="primary")
                 add_output = gr.Textbox(label="Status", lines=3)
                 add_btn.click(
                     fn=add_document,
+                    inputs=[doc_id_input, content_input, realm_type_input, realm_label_input],
                     outputs=add_output,
                 )

convert_to_jsonl.py CHANGED Viewed

@@ -1,31 +1,33 @@
 import json
 import os
 def convert_templates_to_jsonl(pack_dir):
     """Convert templates.json to pack_name.jsonl for a given pack directory."""
     pack_name = os.path.basename(pack_dir)
-    templates_path = os.path.join(pack_dir, 'pack', 'templates.json')
-    jsonl_path = os.path.join(pack_dir, f'{pack_name}.jsonl')
     if not os.path.exists(templates_path):
         print(f"No templates.json found in {pack_dir}")
         return
-    with open(templates_path, 'r') as f:
         templates = json.load(f)
-    with open(jsonl_path, 'w') as f:
         for template in templates:
             json.dump(template, f)
-            f.write('\n')
     print(f"Converted {templates_path} to {jsonl_path}")
 # Convert the three default packs
 packs_to_convert = [
-    'packs/warbler-pack-core',
-    'packs/warbler-pack-faction-politics',
-    'packs/warbler-pack-wisdom-scrolls'
 ]
 for pack in packs_to_convert:

 import json
 import os
 def convert_templates_to_jsonl(pack_dir):
     """Convert templates.json to pack_name.jsonl for a given pack directory."""
     pack_name = os.path.basename(pack_dir)
+    templates_path = os.path.join(pack_dir, "pack", "templates.json")
+    jsonl_path = os.path.join(pack_dir, f"{pack_name}.jsonl")
     if not os.path.exists(templates_path):
         print(f"No templates.json found in {pack_dir}")
         return
+    with open(templates_path, "r") as f:
         templates = json.load(f)
+    with open(jsonl_path, "w") as f:
         for template in templates:
             json.dump(template, f)
+            f.write("\n")
     print(f"Converted {templates_path} to {jsonl_path}")
 # Convert the three default packs
 packs_to_convert = [
+    "packs/warbler-pack-core",
+    "packs/warbler-pack-faction-politics",
+    "packs/warbler-pack-wisdom-scrolls",
 ]
 for pack in packs_to_convert:

test_embedding_integration.py CHANGED Viewed

@@ -26,7 +26,9 @@ def test_embedding_provider_factory():
 def test_sentence_transformer_provider():
     """Test SentenceTransformer provider directly"""
     try:
-        from warbler_cda.embeddings.sentence_transformer_provider import SentenceTransformerEmbeddingProvider
         provider = SentenceTransformerEmbeddingProvider()
@@ -39,7 +41,7 @@ def test_sentence_transformer_provider():
         print(f"  - Embedding dimension: {len(embedding)}")
         print(f"  - Embedding shape: {provider.get_dimension()}")
-        if hasattr(provider, 'compute_stat7_from_embedding'):
             stat7_coords = provider.compute_stat7_from_embedding(embedding)
             print(f"  - STAT7 coordinates computed: {list(stat7_coords.keys())}")
@@ -47,6 +49,7 @@ def test_sentence_transformer_provider():
     except Exception as e:
         print(f"✗ Failed to create SentenceTransformer provider: {e}")
         import traceback
         traceback.print_exc()
         return False
@@ -59,20 +62,19 @@ def test_retrieval_api_with_embeddings():
         embedding_provider = EmbeddingProviderFactory.get_default_provider()
         api = RetrievalAPI(
-            embedding_provider=embedding_provider,
-            config={"enable_stat7_hybrid": True}
         )
         doc1 = {
             "id": "doc1",
             "content": "The quick brown fox jumps over the lazy dog.",
-            "metadata": {"type": "test"}
         }
         doc2 = {
-            "id": "doc2",
             "content": "Semantic embeddings enable efficient document retrieval.",
-            "metadata": {"type": "test"}
         }
         api.add_document(doc1["id"], doc1["content"], doc1["metadata"])
@@ -86,7 +88,7 @@ def test_retrieval_api_with_embeddings():
             mode=RetrievalMode.SEMANTIC_SIMILARITY,
             semantic_query="fast animal jumps",
             max_results=5,
-            confidence_threshold=0.3
         )
         assembly = api.retrieve_context(query)
@@ -98,6 +100,7 @@ def test_retrieval_api_with_embeddings():
     except Exception as e:
         print(f"✗ Failed RetrievalAPI test: {e}")
         import traceback
         traceback.print_exc()
         return False
@@ -105,7 +108,9 @@ def test_retrieval_api_with_embeddings():
 def test_embedding_cache():
     """Test embedding cache functionality"""
     try:
-        from warbler_cda.embeddings.sentence_transformer_provider import SentenceTransformerEmbeddingProvider
         provider = SentenceTransformerEmbeddingProvider()
@@ -141,12 +146,10 @@ def main():
     results.append(("Factory", test_embedding_provider_factory()))
     print("\n2. Testing SentenceTransformer Provider...")
-    results.append(("SentenceTransformer",
-                    test_sentence_transformer_provider()))
     print("\n3. Testing RetrievalAPI Integration...")
-    results.append(("RetrievalAPI",
-                    test_retrieval_api_with_embeddings()))
     print("\n4. Testing Embedding Cache...")
     results.append(("Cache", test_embedding_cache()))

 def test_sentence_transformer_provider():
     """Test SentenceTransformer provider directly"""
     try:
+        from warbler_cda.embeddings.sentence_transformer_provider import (
+            SentenceTransformerEmbeddingProvider,
+        )
         provider = SentenceTransformerEmbeddingProvider()
         print(f"  - Embedding dimension: {len(embedding)}")
         print(f"  - Embedding shape: {provider.get_dimension()}")
+        if hasattr(provider, "compute_stat7_from_embedding"):
             stat7_coords = provider.compute_stat7_from_embedding(embedding)
             print(f"  - STAT7 coordinates computed: {list(stat7_coords.keys())}")
     except Exception as e:
         print(f"✗ Failed to create SentenceTransformer provider: {e}")
         import traceback
         traceback.print_exc()
         return False
         embedding_provider = EmbeddingProviderFactory.get_default_provider()
         api = RetrievalAPI(
+            embedding_provider=embedding_provider, config={"enable_stat7_hybrid": True}
         )
         doc1 = {
             "id": "doc1",
             "content": "The quick brown fox jumps over the lazy dog.",
+            "metadata": {"type": "test"},
         }
         doc2 = {
+            "id": "doc2",
             "content": "Semantic embeddings enable efficient document retrieval.",
+            "metadata": {"type": "test"},
         }
         api.add_document(doc1["id"], doc1["content"], doc1["metadata"])
             mode=RetrievalMode.SEMANTIC_SIMILARITY,
             semantic_query="fast animal jumps",
             max_results=5,
+            confidence_threshold=0.3,
         )
         assembly = api.retrieve_context(query)
     except Exception as e:
         print(f"✗ Failed RetrievalAPI test: {e}")
         import traceback
         traceback.print_exc()
         return False
 def test_embedding_cache():
     """Test embedding cache functionality"""
     try:
+        from warbler_cda.embeddings.sentence_transformer_provider import (
+            SentenceTransformerEmbeddingProvider,
+        )
         provider = SentenceTransformerEmbeddingProvider()
     results.append(("Factory", test_embedding_provider_factory()))
     print("\n2. Testing SentenceTransformer Provider...")
+    results.append(("SentenceTransformer", test_sentence_transformer_provider()))
     print("\n3. Testing RetrievalAPI Integration...")
+    results.append(("RetrievalAPI", test_retrieval_api_with_embeddings()))
     print("\n4. Testing Embedding Cache...")
     results.append(("Cache", test_embedding_cache()))

warbler_cda/api/cli.py CHANGED Viewed

@@ -38,25 +38,20 @@ class APIClient:
     def single_query(self, query_data: Dict[str, Any]) -> Dict[str, Any]:
         """Execute single query"""
-        response = self.session.post(
-            f"{self.base_url}/query", json=query_data, timeout=30)
         response.raise_for_status()
         return response.json()
-    def bulk_query(self,
-                   queries: List[Dict[str,
-                                      Any]],
-                   concurrency: int = 5,
-                   include_narrative: bool = False) -> Dict[str,
-                                                            Any]:
         """Execute bulk concurrent queries"""
         payload = {
             "queries": queries,
             "concurrency_level": concurrency,
             "include_narrative_analysis": include_narrative,
         }
-        response = self.session.post(
-            f"{self.base_url}/bulk_query", json=payload, timeout=120)
         response.raise_for_status()
         return response.json()
@@ -68,16 +63,13 @@ class APIClient:
     def reset_metrics(self) -> Dict[str, Any]:
         """Reset service metrics"""
-        response = self.session.post(
-            f"{self.base_url}/metrics/reset", timeout=5)
         response.raise_for_status()
         return response.json()
 @click.group()
-@click.option("--api-url",
-              default="http://localhost:8000",
-              help="API service URL")
 @click.pass_context
 def cli(ctx, api_url):
     """EXP-09 CLI - STAT7 Retrieval API Command Line Interface"""
@@ -106,12 +98,14 @@ def health(ctx):
             f"  Concurrent Queries: {
                 health_data.get(
                     'concurrent_queries',
-                    0)}")
         click.echo(
             f"  Max Concurrent Observed: {
                 health_data.get(
                     'max_concurrent_observed',
-                    0)}")
         click.echo(f"  Hybrid Queries: {health_data.get('hybrid_queries', 0)}")
         click.echo(f"  Errors: {health_data.get('errors', 0)}")
     else:
@@ -127,10 +121,8 @@ def health(ctx):
 @click.option("--hybrid", is_flag=True, help="Enable STAT7 hybrid scoring")
 @click.option("--max-results", default=10, help="Maximum results to return")
 @click.option("--confidence", default=0.6, help="Confidence threshold")
-@click.option("--weight-semantic", default=0.6,
-              help="Semantic weight in hybrid mode")
-@click.option("--weight-stat7", default=0.4,
-              help="STAT7 weight in hybrid mode")
 @click.option("--json-output", is_flag=True, help="Output as JSON")
 @click.pass_context
 def query(
@@ -173,30 +165,26 @@ def query(
             click.echo(f"Query: {result.get('query_id')}")
             click.echo("=" * 60)
             click.echo(f"Results: {result.get('result_count')}")
-            click.echo(
-                f"Execution Time: {result.get('execution_time_ms'):.1f}ms")
             if result.get("semantic_similarity"):
                 click.echo(
                     f"Semantic Similarity: {
-                        result.get('semantic_similarity'):.3f}")
             if result.get("stat7_resonance"):
-                click.echo(
-                    f"STAT7 Resonance: {result.get('stat7_resonance'):.3f}")
             # Show narrative analysis
             if result.get("narrative_analysis"):
                 narr = result["narrative_analysis"]
                 click.echo(f"\nNarrative Analysis:")
-                click.echo(
-                    f"  Coherence Score: {narr.get('coherence_score', 0):.3f}")
-                click.echo(
-                    f"  Narrative Threads: {narr.get('narrative_threads', 0)}")
                 click.echo(f"  Analysis: {narr.get('analysis')}")
             # Show results
-            click.echo(
-                f"\nTop Results ({min(3, len(result.get('results', [])))}):")
             for i, res in enumerate(result.get("results", [])[:3], 1):
                 click.echo(
                     f"  {i}. Score: {
@@ -206,7 +194,8 @@ def query(
                         res.get(
                             'content',
                             'N/A')[
-                            :50]}...")
             click.echo()
@@ -217,10 +206,8 @@ def query(
 @cli.command()
 @click.option("--num-queries", default=5, help="Number of concurrent queries")
 @click.option("--concurrency", default=5, help="Concurrency level")
-@click.option("--semantic", multiple=True,
-              help="Semantic queries (can specify multiple)")
-@click.option("--hybrid", is_flag=True,
-              help="Enable STAT7 hybrid for all queries")
 @click.option("--json-output", is_flag=True, help="Output as JSON")
 @click.pass_context
 def bulk(ctx, num_queries, concurrency, semantic, hybrid, json_output):
@@ -256,11 +243,11 @@ def bulk(ctx, num_queries, concurrency, semantic, hybrid, json_output):
     try:
         click.echo(
             f"\nExecuting {
-                len(query_data)} concurrent queries (concurrency={concurrency})...")
         start_time = time.time()
-        result = client.bulk_query(
-            query_data, concurrency=concurrency, include_narrative=True)
         elapsed = time.time() - start_time
@@ -276,21 +263,21 @@ def bulk(ctx, num_queries, concurrency, semantic, hybrid, json_output):
             click.echo(f"Failed: {result.get('failed')} ✗")
             click.echo(
                 f"Total Execution Time: {
-                    result.get('execution_time_ms'):.1f}ms")
-            click.echo(
-                f"Avg Query Time: {result.get('avg_query_time_ms'):.1f}ms")
             # Narrative analysis for entire batch
             if result.get("batch_narrative_analysis"):
                 narr = result["batch_narrative_analysis"]
                 click.echo(f"\nBatch Narrative Analysis:")
-                click.echo(
-                    f"  Coherence Score: {narr.get('coherence_score', 0):.3f}")
                 click.echo(
                     f"  Total Narrative Threads: {
                         narr.get(
                             'narrative_threads',
-                            0)}")
                 click.echo(f"  Total Results: {narr.get('result_count', 0)}")
                 click.echo(f"  Analysis: {narr.get('analysis')}")
@@ -301,7 +288,8 @@ def bulk(ctx, num_queries, concurrency, semantic, hybrid, json_output):
                     f"  {
                         res.get('query_id')}: {
                         res.get('result_count')} results in {
-                        res.get('execution_time_ms'):.1f}ms")
             click.echo()
@@ -329,7 +317,8 @@ def metrics(ctx, json_output):
             click.echo(f"Total Queries: {metrics_data.get('total_queries')}")
             click.echo(
                 f"Concurrent Queries: {
-                    metrics_data.get('concurrent_queries')}")
             click.echo(f"Max Concurrent: {metrics_data.get('max_concurrent')}")
             click.echo(f"Hybrid Queries: {metrics_data.get('hybrid_queries')}")
             click.echo(f"Errors: {metrics_data.get('errors')}")
@@ -354,19 +343,12 @@ def reset_metrics(ctx):
 @cli.command()
-@click.option("--num-scenarios", default=3,
-              help="Number of concurrent test scenarios")
-@click.option("--queries-per-scenario", default=10,
-              help="Queries per scenario")
 @click.option("--use-hybrid", is_flag=True, help="Use STAT7 hybrid scoring")
 @click.option("--output-file", help="Save results to file")
 @click.pass_context
-def stress_test(
-        ctx,
-        num_scenarios,
-        queries_per_scenario,
-        use_hybrid,
-        output_file):
     """Run EXP-10 narrative preservation stress test"""
     client = ctx.obj["client"]
@@ -415,8 +397,7 @@ def stress_test(
             )
         try:
-            result = client.bulk_query(
-                queries, concurrency=10, include_narrative=True)
             scenario_result = {
                 "scenario": scenario,
@@ -424,35 +405,29 @@ def stress_test(
                 "successful": result.get("successful"),
                 "failed": result.get("failed"),
                 "avg_query_time_ms": result.get("avg_query_time_ms"),
-                "batch_coherence": result.get(
-                    "batch_narrative_analysis",
-                    {}).get(
-                    "coherence_score",
-                    0),
             }
             results_summary["scenarios"].append(scenario_result)
             results_summary["total_coherence_score"] += scenario_result["batch_coherence"]
-            click.echo(
-                f"  ✓ {result.get('successful')}/{len(queries)} queries successful")
-            click.echo(
-                f"  Coherence: {scenario_result['batch_coherence']:.3f}")
         except Exception as e:
             click.secho(f"  ✗ Scenario failed: {str(e)}", fg="red")
     # Summary
-    avg_coherence = results_summary["total_coherence_score"] / \
-        max(1, num_scenarios)
     results_summary["average_coherence"] = avg_coherence
     results_summary["end_time"] = datetime.now().isoformat()
     click.echo("\n" + "=" * 60)
     click.echo("Stress Test Summary")
     click.echo("=" * 60)
-    click.echo(
-        f"Scenarios Completed: {len(results_summary['scenarios'])}/{num_scenarios}")
     click.echo(f"Average Coherence Score: {avg_coherence:.3f}")
     click.echo(f"Result: {'PASS ✓' if avg_coherence > 0.7 else 'FAIL ✗'}")
     click.echo()

     def single_query(self, query_data: Dict[str, Any]) -> Dict[str, Any]:
         """Execute single query"""
+        response = self.session.post(f"{self.base_url}/query", json=query_data, timeout=30)
         response.raise_for_status()
         return response.json()
+    def bulk_query(
+        self, queries: List[Dict[str, Any]], concurrency: int = 5, include_narrative: bool = False
+    ) -> Dict[str, Any]:
         """Execute bulk concurrent queries"""
         payload = {
             "queries": queries,
             "concurrency_level": concurrency,
             "include_narrative_analysis": include_narrative,
         }
+        response = self.session.post(f"{self.base_url}/bulk_query", json=payload, timeout=120)
         response.raise_for_status()
         return response.json()
     def reset_metrics(self) -> Dict[str, Any]:
         """Reset service metrics"""
+        response = self.session.post(f"{self.base_url}/metrics/reset", timeout=5)
         response.raise_for_status()
         return response.json()
 @click.group()
+@click.option("--api-url", default="http://localhost:8000", help="API service URL")
 @click.pass_context
 def cli(ctx, api_url):
     """EXP-09 CLI - STAT7 Retrieval API Command Line Interface"""
             f"  Concurrent Queries: {
                 health_data.get(
                     'concurrent_queries',
+                    0)}"
+        )
         click.echo(
             f"  Max Concurrent Observed: {
                 health_data.get(
                     'max_concurrent_observed',
+                    0)}"
+        )
         click.echo(f"  Hybrid Queries: {health_data.get('hybrid_queries', 0)}")
         click.echo(f"  Errors: {health_data.get('errors', 0)}")
     else:
 @click.option("--hybrid", is_flag=True, help="Enable STAT7 hybrid scoring")
 @click.option("--max-results", default=10, help="Maximum results to return")
 @click.option("--confidence", default=0.6, help="Confidence threshold")
+@click.option("--weight-semantic", default=0.6, help="Semantic weight in hybrid mode")
+@click.option("--weight-stat7", default=0.4, help="STAT7 weight in hybrid mode")
 @click.option("--json-output", is_flag=True, help="Output as JSON")
 @click.pass_context
 def query(
             click.echo(f"Query: {result.get('query_id')}")
             click.echo("=" * 60)
             click.echo(f"Results: {result.get('result_count')}")
+            click.echo(f"Execution Time: {result.get('execution_time_ms'):.1f}ms")
             if result.get("semantic_similarity"):
                 click.echo(
                     f"Semantic Similarity: {
+                        result.get('semantic_similarity'):.3f}"
+                )
             if result.get("stat7_resonance"):
+                click.echo(f"STAT7 Resonance: {result.get('stat7_resonance'):.3f}")
             # Show narrative analysis
             if result.get("narrative_analysis"):
                 narr = result["narrative_analysis"]
                 click.echo(f"\nNarrative Analysis:")
+                click.echo(f"  Coherence Score: {narr.get('coherence_score', 0):.3f}")
+                click.echo(f"  Narrative Threads: {narr.get('narrative_threads', 0)}")
                 click.echo(f"  Analysis: {narr.get('analysis')}")
             # Show results
+            click.echo(f"\nTop Results ({min(3, len(result.get('results', [])))}):")
             for i, res in enumerate(result.get("results", [])[:3], 1):
                 click.echo(
                     f"  {i}. Score: {
                         res.get(
                             'content',
                             'N/A')[
+                            :50]}..."
+                )
             click.echo()
 @cli.command()
 @click.option("--num-queries", default=5, help="Number of concurrent queries")
 @click.option("--concurrency", default=5, help="Concurrency level")
+@click.option("--semantic", multiple=True, help="Semantic queries (can specify multiple)")
+@click.option("--hybrid", is_flag=True, help="Enable STAT7 hybrid for all queries")
 @click.option("--json-output", is_flag=True, help="Output as JSON")
 @click.pass_context
 def bulk(ctx, num_queries, concurrency, semantic, hybrid, json_output):
     try:
         click.echo(
             f"\nExecuting {
+                len(query_data)} concurrent queries (concurrency={concurrency})..."
+        )
         start_time = time.time()
+        result = client.bulk_query(query_data, concurrency=concurrency, include_narrative=True)
         elapsed = time.time() - start_time
             click.echo(f"Failed: {result.get('failed')} ✗")
             click.echo(
                 f"Total Execution Time: {
+                    result.get('execution_time_ms'):.1f}ms"
+            )
+            click.echo(f"Avg Query Time: {result.get('avg_query_time_ms'):.1f}ms")
             # Narrative analysis for entire batch
             if result.get("batch_narrative_analysis"):
                 narr = result["batch_narrative_analysis"]
                 click.echo(f"\nBatch Narrative Analysis:")
+                click.echo(f"  Coherence Score: {narr.get('coherence_score', 0):.3f}")
                 click.echo(
                     f"  Total Narrative Threads: {
                         narr.get(
                             'narrative_threads',
+                            0)}"
+                )
                 click.echo(f"  Total Results: {narr.get('result_count', 0)}")
                 click.echo(f"  Analysis: {narr.get('analysis')}")
                     f"  {
                         res.get('query_id')}: {
                         res.get('result_count')} results in {
+                        res.get('execution_time_ms'):.1f}ms"
+                )
             click.echo()
             click.echo(f"Total Queries: {metrics_data.get('total_queries')}")
             click.echo(
                 f"Concurrent Queries: {
+                    metrics_data.get('concurrent_queries')}"
+            )
             click.echo(f"Max Concurrent: {metrics_data.get('max_concurrent')}")
             click.echo(f"Hybrid Queries: {metrics_data.get('hybrid_queries')}")
             click.echo(f"Errors: {metrics_data.get('errors')}")
 @cli.command()
+@click.option("--num-scenarios", default=3, help="Number of concurrent test scenarios")
+@click.option("--queries-per-scenario", default=10, help="Queries per scenario")
 @click.option("--use-hybrid", is_flag=True, help="Use STAT7 hybrid scoring")
 @click.option("--output-file", help="Save results to file")
 @click.pass_context
+def stress_test(ctx, num_scenarios, queries_per_scenario, use_hybrid, output_file):
     """Run EXP-10 narrative preservation stress test"""
     client = ctx.obj["client"]
             )
         try:
+            result = client.bulk_query(queries, concurrency=10, include_narrative=True)
             scenario_result = {
                 "scenario": scenario,
                 "successful": result.get("successful"),
                 "failed": result.get("failed"),
                 "avg_query_time_ms": result.get("avg_query_time_ms"),
+                "batch_coherence": result.get("batch_narrative_analysis", {}).get(
+                    "coherence_score", 0
+                ),
             }
             results_summary["scenarios"].append(scenario_result)
             results_summary["total_coherence_score"] += scenario_result["batch_coherence"]
+            click.echo(f"  ✓ {result.get('successful')}/{len(queries)} queries successful")
+            click.echo(f"  Coherence: {scenario_result['batch_coherence']:.3f}")
         except Exception as e:
             click.secho(f"  ✗ Scenario failed: {str(e)}", fg="red")
     # Summary
+    avg_coherence = results_summary["total_coherence_score"] / max(1, num_scenarios)
     results_summary["average_coherence"] = avg_coherence
     results_summary["end_time"] = datetime.now().isoformat()
     click.echo("\n" + "=" * 60)
     click.echo("Stress Test Summary")
     click.echo("=" * 60)
+    click.echo(f"Scenarios Completed: {len(results_summary['scenarios'])}/{num_scenarios}")
     click.echo(f"Average Coherence Score: {avg_coherence:.3f}")
     click.echo(f"Result: {'PASS ✓' if avg_coherence > 0.7 else 'FAIL ✗'}")
     click.echo()

warbler_cda/api/service.py CHANGED Viewed

@@ -47,8 +47,7 @@ class STAT7Address(BaseModel):
     """STAT7 coordinate specification"""
     realm: Dict[str, Any] = Field(
-        default_factory=lambda: {
-            "type": "retrieval_query", "label": "api_query"}
     )
     lineage: int = 0
     adjacency: str = "semantic_proximity"
@@ -128,8 +127,7 @@ def _init_api():
     return _api_instance
-def _analyze_narrative_coherence(
-        results: List[Dict[str, Any]]) -> Dict[str, Any]:
     """
     Analyze narrative coherence across results.
     Used to validate that meaning/story threads survive concurrent access.
@@ -179,12 +177,9 @@ def _analyze_narrative_coherence(
         relevance_scores.append(result.get("relevance_score", 0.0))
     # Calculate coherence components
-    avg_semantic = sum(semantic_scores) / \
-        len(semantic_scores) if semantic_scores else 0.0
-    avg_stat7 = sum(stat7_resonances) / \
-        len(stat7_resonances) if stat7_resonances else 0.0
-    avg_relevance = sum(relevance_scores) / \
-        len(relevance_scores) if relevance_scores else 0.0
     # 1. RESULT QUALITY (50% weight): Average relevance of all results
     # This is the primary signal - if results aren't relevant, nothing else
@@ -197,8 +192,7 @@ def _analyze_narrative_coherence(
     semantic_variance = sum((s - avg_semantic) ** 2 for s in semantic_scores) / max(
         1, len(semantic_scores)
     )
-    semantic_coherence = 1.0 / \
-        (1.0 + semantic_variance) if semantic_variance < 1.0 else 0.0
     # 3. STAT7 ENTANGLEMENT (10% weight): Are results connected in STAT7 space?
     stat7_coherence = avg_stat7
@@ -231,14 +225,18 @@ def _analyze_narrative_coherence(
     if len(results) > 50:  # Only log for bulk operations
         logger.info(
             f"Coherence analysis for {
-                len(results)} results: " f"quality={
                 quality_score:.3f}, semantic_coh={
                 semantic_coherence:.3f} (var={
-                    semantic_variance:.4f}), " f"stat7={
                         stat7_coherence:.3f}, focus={
-                            focus_coherence:.3f}, " f"threads={
                                 len(narrative_threads)}, final={
-                                    coherence_score:.3f}")
     return {
         "coherence_score": coherence_score,
@@ -306,7 +304,8 @@ async def _stress_test_result(
         if query.stat7_hybrid and query.semantic_query:
             logger.info(
                 f"Bob Test 1: Pure semantic retrieval for query {
-                    query.query_id}")
             semantic_query = RetrievalQuery(
                 query_id=f"{query.query_id}_bob_semantic",
                 mode=RetrievalMode.SEMANTIC_SIMILARITY,
@@ -316,8 +315,7 @@ async def _stress_test_result(
             )
             semantic_assembly = api.retrieve_context(semantic_query)
             semantic_ids = set(r.content_id for r in semantic_assembly.results)
-            semantic_overlap = len(
-                original_ids & semantic_ids) / max(1, len(original_ids))
             log["tests_run"].append(
                 {
@@ -329,8 +327,7 @@ async def _stress_test_result(
         # Test 2: Pure STAT7 retrieval (if hybrid was used)
         if query.stat7_hybrid and query.stat7_address:
-            logger.info(
-                f"Bob Test 2: Pure STAT7 retrieval for query {query.query_id}")
             stat7_query = RetrievalQuery(
                 query_id=f"{query.query_id}_bob_stat7",
                 mode=RetrievalMode.STAT7_ADDRESS,
@@ -340,8 +337,7 @@ async def _stress_test_result(
             )
             stat7_assembly = api.retrieve_context(stat7_query)
             stat7_ids = set(r.content_id for r in stat7_assembly.results)
-            stat7_overlap = len(original_ids & stat7_ids) / \
-                max(1, len(original_ids))
             log["tests_run"].append(
                 {
@@ -356,21 +352,20 @@ async def _stress_test_result(
         if query.confidence_threshold < 0.8:
             logger.info(
                 f"Bob Test 3: Higher confidence threshold for query {
-                    query.query_id}")
             high_conf_query = RetrievalQuery(
                 query_id=f"{query.query_id}_bob_high_conf",
                 mode=query.mode,
                 semantic_query=query.semantic_query,
                 anchor_ids=query.anchor_ids,
                 max_results=query.max_results,
-                confidence_threshold=min(
-                    0.85, query.confidence_threshold + 0.2),
                 stat7_hybrid=query.stat7_hybrid,
                 stat7_address=query.stat7_address,
             )
             high_conf_assembly = api.retrieve_context(high_conf_query)
-            high_conf_ids = set(
-                r.content_id for r in high_conf_assembly.results)
             high_conf_overlap = len(original_ids & high_conf_ids) / max(
                 1, min(len(original_ids), len(high_conf_ids))
             )
@@ -390,8 +385,7 @@ async def _stress_test_result(
             avg_consistency = sum(consistency_scores) / len(consistency_scores)
             log["consistency_score"] = avg_consistency
-            if avg_consistency >= (
-                    1.0 - BobSkepticConfig.STRESS_TEST_DIVERGENCE_THRESHOLD):
                 log["verdict"] = "CONSISTENT"
                 is_consistent = True
             else:
@@ -405,8 +399,7 @@ async def _stress_test_result(
         log["stress_test_completed"] = datetime.now().isoformat()
     except Exception as e:
-        logger.error(
-            f"Error during Bob's stress test for {query.query_id}: {str(e)}")
         log["error"] = str(e)
         log["verdict"] = "ERROR_DURING_TEST"
         is_consistent = False  # Err on side of caution
@@ -454,19 +447,24 @@ async def _bob_skeptic_filter(
             # Results are verified despite low entanglement
             logger.info(
                 f"✅ BOB VERIFIED: Query {
-                    query.query_id} is consistent across stress tests. " f"High coherence is genuine, not an artifact. (consistency={
                     verification_log.get(
                         'consistency_score',
-                        0.0):.3f})")
             return "VERIFIED", verification_log
         else:
             # Results diverge under stress testing = quarantine
             logger.warning(
                 f"🚨 BOB QUARANTINE: Query {
-                    query.query_id} FAILED stress tests. " f"High coherence appears to be artifact or dataset bias. (consistency={
                     verification_log.get(
                         'consistency_score',
-                        0.0):.3f}) " f"Escalating to Faculty for review.")
             return "QUARANTINED", verification_log
     # Results are normal - no investigation needed
@@ -484,8 +482,7 @@ async def startup_event():
 async def health_check():
     """Health check endpoint"""
     api = _init_api()
-    uptime = (datetime.now() -
-              datetime.fromisoformat(_metrics["start_time"])).total_seconds()
     return HealthResponse(
         status="healthy",
@@ -504,8 +501,7 @@ async def single_query(request: QueryRequest):
     api = _init_api()
     _metrics["total_queries"] += 1
     _metrics["concurrent_queries"] += 1
-    _metrics["max_concurrent"] = max(
-        _metrics["max_concurrent"], _metrics["concurrent_queries"])
     if request.stat7_hybrid:
         _metrics["hybrid_queries"] += 1
@@ -514,8 +510,9 @@ async def single_query(request: QueryRequest):
         start_time = time.time()
         # Convert request to RetrievalQuery
-        mode = RetrievalMode[request.mode.upper().replace(
-            "_", "").replace("SIMILARITY", "_SIMILARITY")]
         stat7_addr = None
         if request.stat7_address:
@@ -571,11 +568,9 @@ async def single_query(request: QueryRequest):
             result_count=len(results_data),
             results=results_data,
             semantic_similarity=(
-                results_data[0].get(
-                    "semantic_similarity") if results_data else None
             ),
-            stat7_resonance=results_data[0].get(
-                "stat7_resonance") if results_data else None,
             execution_time_ms=execution_time,
             timestamp=datetime.now().isoformat(),
             narrative_analysis=narrative_analysis,
@@ -600,7 +595,8 @@ async def bulk_concurrent_queries(request: BulkQueryRequest):
         f"Executing {
             len(
                 request.queries)} queries with concurrency level {
-            request.concurrency_level}")
     results = []
     semaphore = asyncio.Semaphore(request.concurrency_level)
@@ -615,8 +611,7 @@ async def bulk_concurrent_queries(request: BulkQueryRequest):
         batch_results = await asyncio.gather(*tasks, return_exceptions=True)
         # Separate successful results from errors
-        successful_results = [
-            r for r in batch_results if not isinstance(r, Exception)]
         errors = [
             {"query_id": request.queries[i].query_id, "error": str(r)}
@@ -629,8 +624,7 @@ async def bulk_concurrent_queries(request: BulkQueryRequest):
         for result in successful_results:
             all_results_flat.extend(result.results)
-        batch_narrative_analysis = _analyze_narrative_coherence(
-            all_results_flat)
         return {
             "batch_id": f"batch_{int(time.time() * 1000)}",
@@ -675,21 +669,20 @@ async def ingest_documents(request: Dict[str, Any]):
                 continue
             # Use the new add_document method
-            success = api.add_document(
-                doc_id=content_id, content=content, metadata=metadata)
             if success:
                 ingested += 1
                 logger.info(f"✓ Ingested: {content_id}")
             else:
-                failed.append(
-                    {"doc_id": content_id, "error": "Document already exists"})
                 logger.warning(f"Document already exists: {content_id}")
         logger.info(
             f"Ingested {ingested}/{
                 len(documents)} documents (context store now has {
-                api.get_context_store_size()} total)")
         response = {
             "status": "success",

     """STAT7 coordinate specification"""
     realm: Dict[str, Any] = Field(
+        default_factory=lambda: {"type": "retrieval_query", "label": "api_query"}
     )
     lineage: int = 0
     adjacency: str = "semantic_proximity"
     return _api_instance
+def _analyze_narrative_coherence(results: List[Dict[str, Any]]) -> Dict[str, Any]:
     """
     Analyze narrative coherence across results.
     Used to validate that meaning/story threads survive concurrent access.
         relevance_scores.append(result.get("relevance_score", 0.0))
     # Calculate coherence components
+    avg_semantic = sum(semantic_scores) / len(semantic_scores) if semantic_scores else 0.0
+    avg_stat7 = sum(stat7_resonances) / len(stat7_resonances) if stat7_resonances else 0.0
+    avg_relevance = sum(relevance_scores) / len(relevance_scores) if relevance_scores else 0.0
     # 1. RESULT QUALITY (50% weight): Average relevance of all results
     # This is the primary signal - if results aren't relevant, nothing else
     semantic_variance = sum((s - avg_semantic) ** 2 for s in semantic_scores) / max(
         1, len(semantic_scores)
     )
+    semantic_coherence = 1.0 / (1.0 + semantic_variance) if semantic_variance < 1.0 else 0.0
     # 3. STAT7 ENTANGLEMENT (10% weight): Are results connected in STAT7 space?
     stat7_coherence = avg_stat7
     if len(results) > 50:  # Only log for bulk operations
         logger.info(
             f"Coherence analysis for {
+                len(results)} results: "
+            f"quality={
                 quality_score:.3f}, semantic_coh={
                 semantic_coherence:.3f} (var={
+                    semantic_variance:.4f}), "
+            f"stat7={
                         stat7_coherence:.3f}, focus={
+                            focus_coherence:.3f}, "
+            f"threads={
                                 len(narrative_threads)}, final={
+                                    coherence_score:.3f}"
+        )
     return {
         "coherence_score": coherence_score,
         if query.stat7_hybrid and query.semantic_query:
             logger.info(
                 f"Bob Test 1: Pure semantic retrieval for query {
+                    query.query_id}"
+            )
             semantic_query = RetrievalQuery(
                 query_id=f"{query.query_id}_bob_semantic",
                 mode=RetrievalMode.SEMANTIC_SIMILARITY,
             )
             semantic_assembly = api.retrieve_context(semantic_query)
             semantic_ids = set(r.content_id for r in semantic_assembly.results)
+            semantic_overlap = len(original_ids & semantic_ids) / max(1, len(original_ids))
             log["tests_run"].append(
                 {
         # Test 2: Pure STAT7 retrieval (if hybrid was used)
         if query.stat7_hybrid and query.stat7_address:
+            logger.info(f"Bob Test 2: Pure STAT7 retrieval for query {query.query_id}")
             stat7_query = RetrievalQuery(
                 query_id=f"{query.query_id}_bob_stat7",
                 mode=RetrievalMode.STAT7_ADDRESS,
             )
             stat7_assembly = api.retrieve_context(stat7_query)
             stat7_ids = set(r.content_id for r in stat7_assembly.results)
+            stat7_overlap = len(original_ids & stat7_ids) / max(1, len(original_ids))
             log["tests_run"].append(
                 {
         if query.confidence_threshold < 0.8:
             logger.info(
                 f"Bob Test 3: Higher confidence threshold for query {
+                    query.query_id}"
+            )
             high_conf_query = RetrievalQuery(
                 query_id=f"{query.query_id}_bob_high_conf",
                 mode=query.mode,
                 semantic_query=query.semantic_query,
                 anchor_ids=query.anchor_ids,
                 max_results=query.max_results,
+                confidence_threshold=min(0.85, query.confidence_threshold + 0.2),
                 stat7_hybrid=query.stat7_hybrid,
                 stat7_address=query.stat7_address,
             )
             high_conf_assembly = api.retrieve_context(high_conf_query)
+            high_conf_ids = set(r.content_id for r in high_conf_assembly.results)
             high_conf_overlap = len(original_ids & high_conf_ids) / max(
                 1, min(len(original_ids), len(high_conf_ids))
             )
             avg_consistency = sum(consistency_scores) / len(consistency_scores)
             log["consistency_score"] = avg_consistency
+            if avg_consistency >= (1.0 - BobSkepticConfig.STRESS_TEST_DIVERGENCE_THRESHOLD):
                 log["verdict"] = "CONSISTENT"
                 is_consistent = True
             else:
         log["stress_test_completed"] = datetime.now().isoformat()
     except Exception as e:
+        logger.error(f"Error during Bob's stress test for {query.query_id}: {str(e)}")
         log["error"] = str(e)
         log["verdict"] = "ERROR_DURING_TEST"
         is_consistent = False  # Err on side of caution
             # Results are verified despite low entanglement
             logger.info(
                 f"✅ BOB VERIFIED: Query {
+                    query.query_id} is consistent across stress tests. "
+                f"High coherence is genuine, not an artifact. (consistency={
                     verification_log.get(
                         'consistency_score',
+                        0.0):.3f})"
+            )
             return "VERIFIED", verification_log
         else:
             # Results diverge under stress testing = quarantine
             logger.warning(
                 f"🚨 BOB QUARANTINE: Query {
+                    query.query_id} FAILED stress tests. "
+                f"High coherence appears to be artifact or dataset bias. (consistency={
                     verification_log.get(
                         'consistency_score',
+                        0.0):.3f}) "
+                f"Escalating to Faculty for review."
+            )
             return "QUARANTINED", verification_log
     # Results are normal - no investigation needed
 async def health_check():
     """Health check endpoint"""
     api = _init_api()
+    uptime = (datetime.now() - datetime.fromisoformat(_metrics["start_time"])).total_seconds()
     return HealthResponse(
         status="healthy",
     api = _init_api()
     _metrics["total_queries"] += 1
     _metrics["concurrent_queries"] += 1
+    _metrics["max_concurrent"] = max(_metrics["max_concurrent"], _metrics["concurrent_queries"])
     if request.stat7_hybrid:
         _metrics["hybrid_queries"] += 1
         start_time = time.time()
         # Convert request to RetrievalQuery
+        mode = RetrievalMode[
+            request.mode.upper().replace("_", "").replace("SIMILARITY", "_SIMILARITY")
+        ]
         stat7_addr = None
         if request.stat7_address:
             result_count=len(results_data),
             results=results_data,
             semantic_similarity=(
+                results_data[0].get("semantic_similarity") if results_data else None
             ),
+            stat7_resonance=results_data[0].get("stat7_resonance") if results_data else None,
             execution_time_ms=execution_time,
             timestamp=datetime.now().isoformat(),
             narrative_analysis=narrative_analysis,
         f"Executing {
             len(
                 request.queries)} queries with concurrency level {
+            request.concurrency_level}"
+    )
     results = []
     semaphore = asyncio.Semaphore(request.concurrency_level)
         batch_results = await asyncio.gather(*tasks, return_exceptions=True)
         # Separate successful results from errors
+        successful_results = [r for r in batch_results if not isinstance(r, Exception)]
         errors = [
             {"query_id": request.queries[i].query_id, "error": str(r)}
         for result in successful_results:
             all_results_flat.extend(result.results)
+        batch_narrative_analysis = _analyze_narrative_coherence(all_results_flat)
         return {
             "batch_id": f"batch_{int(time.time() * 1000)}",
                 continue
             # Use the new add_document method
+            success = api.add_document(doc_id=content_id, content=content, metadata=metadata)
             if success:
                 ingested += 1
                 logger.info(f"✓ Ingested: {content_id}")
             else:
+                failed.append({"doc_id": content_id, "error": "Document already exists"})
                 logger.warning(f"Document already exists: {content_id}")
         logger.info(
             f"Ingested {ingested}/{
                 len(documents)} documents (context store now has {
+                api.get_context_store_size()} total)"
+        )
         response = {
             "status": "success",

warbler_cda/conflict_detector.py CHANGED Viewed

@@ -65,22 +65,15 @@ class ConflictDetector:
     - Confidence scoring and evidence collection
     """
-    def __init__(self,
-                 config: Optional[Dict[str,
-                                       Any]] = None,
-                 embedding_provider=None):
         self.config = config or {}
         self.embedding_provider = embedding_provider
         # Configuration parameters
-        self.opposition_threshold = self.config.get(
-            "opposition_threshold", 0.7)
-        self.semantic_similarity_threshold = self.config.get(
-            "semantic_similarity_threshold", 0.8)
-        self.min_confidence_score = self.config.get(
-            "min_confidence_score", 0.6)
-        self.max_statement_age_hours = self.config.get(
-            "max_statement_age_hours", 24)
         # Storage
         self.statement_fingerprints: Dict[str, StatementFingerprint] = {}
@@ -152,8 +145,7 @@ class ConflictDetector:
             "average_confidence": 0.0,
         }
-    def process_statements(
-            self, statements: List[Dict[str, Any]]) -> Dict[str, Any]:
         """
         Process new statements and detect conflicts with existing statements.
@@ -169,10 +161,7 @@ class ConflictDetector:
             "new_conflicts": [],
             "fingerprints_created": 0,
             "total_active_statements": 0,
-            "conflict_summary": {
-                "high_confidence": 0,
-                "medium_confidence": 0,
-                "low_confidence": 0},
         }
         # Process each statement
@@ -184,8 +173,7 @@ class ConflictDetector:
                 continue
             # Create fingerprint for new statement
-            fingerprint = self._create_statement_fingerprint(
-                statement_id, content, statement)
             self.statement_fingerprints[statement_id] = fingerprint
             processing_report["fingerprints_created"] += 1
@@ -203,7 +191,8 @@ class ConflictDetector:
                             "conflict_type": conflict.conflict_type.value,
                             "confidence_score": conflict.confidence_score,
                             "opposition_indicators": conflict.opposition_indicators,
-                        })
                     # Categorize by confidence
                     if conflict.confidence_score >= 0.8:
@@ -219,8 +208,7 @@ class ConflictDetector:
         # Update metrics
         elapsed_ms = (time.time() - start_time) * 1000
         self.metrics["statements_processed"] += len(statements)
-        self.metrics["conflicts_detected"] += len(
-            processing_report["new_conflicts"])
         self.metrics["processing_time_ms"] += elapsed_ms
         if self.detected_conflicts:
@@ -229,10 +217,8 @@ class ConflictDetector:
             ) / len(self.detected_conflicts)
         processing_report["elapsed_ms"] = elapsed_ms
-        processing_report["total_active_statements"] = len(
-            self.statement_fingerprints)
-        processing_report["total_conflicts_detected"] = len(
-            self.detected_conflicts)
         return processing_report
@@ -263,8 +249,7 @@ class ConflictDetector:
         for conflict in conflicts_involving_statement:
             conflict_type = conflict.conflict_type.value
-            conflict_types[conflict_type] = conflict_types.get(
-                conflict_type, 0) + 1
             max_confidence = max(max_confidence, conflict.confidence_score)
             # Add opposing statement
@@ -324,8 +309,7 @@ class ConflictDetector:
         for conflict in self.detected_conflicts:
             # Count by type
             conflict_type = conflict.conflict_type.value
-            conflict_types[conflict_type] = conflict_types.get(
-                conflict_type, 0) + 1
             # Count by confidence
             if conflict.confidence_score >= 0.8:
@@ -351,8 +335,7 @@ class ConflictDetector:
             status = "healthy"
         health_score = self._calculate_health_score()
-        recommendations = self._generate_system_recommendations(
-            status, conflict_types)
         return {
             "total_conflicts": len(self.detected_conflicts),
@@ -402,16 +385,19 @@ class ConflictDetector:
         # Detect negation indicators
         content_lower = content.lower()
         negation_indicators = [
-            pattern for pattern in self.negation_patterns if pattern in content_lower]
         # Calculate assertion strength
         assertion_indicators = [
-            pattern for pattern in self.assertion_patterns if pattern in content_lower]
         assertion_strength = min(len(assertion_indicators) * 0.2, 1.0)
         # Extract temporal markers
         temporal_markers = [
-            pattern for pattern in self.temporal_patterns if pattern in content_lower]
         # Extract domain tags (simple keyword-based)
         domain_tags = set()
@@ -467,8 +453,7 @@ class ConflictDetector:
                         context_overlap = len(
                             new_fingerprint.domain_tags & existing_fingerprint.domain_tags
                         ) / max(
-                            len(new_fingerprint.domain_tags |
-                                existing_fingerprint.domain_tags), 1
                         )
                         # Collect opposition evidence
@@ -477,14 +462,12 @@ class ConflictDetector:
                             new_fingerprint.negation_indicators
                             and not existing_fingerprint.negation_indicators
                         ):
-                            opposition_indicators.extend(
-                                new_fingerprint.negation_indicators)
                         elif (
                             existing_fingerprint.negation_indicators
                             and not new_fingerprint.negation_indicators
                         ):
-                            opposition_indicators.extend(
-                                existing_fingerprint.negation_indicators)
                         # Determine conflict type
                         conflict_type = self._determine_conflict_type(
@@ -493,7 +476,8 @@ class ConflictDetector:
                         # Calculate confidence score
                         confidence = self._calculate_confidence_score(
-                            similarity, opposition_score, context_overlap, opposition_indicators)
                         if confidence >= self.min_confidence_score:
                             conflict = ConflictEvidence(
@@ -530,13 +514,9 @@ class ConflictDetector:
         # Temporal conflicts
         if fp1.temporal_markers and fp2.temporal_markers:
             # Simple temporal conflict detection
-            if any(
-                marker in [
-                    "before",
-                    "earlier"] for marker in fp1.temporal_markers) and any(
-                marker in [
-                    "after",
-                    "later"] for marker in fp2.temporal_markers):
                 score += 0.3
         return min(score, 1.0)
@@ -570,8 +550,7 @@ class ConflictDetector:
         indicators: List[str],
     ) -> float:
         """Calculate confidence score for a conflict detection."""
-        base_score = (similarity * 0.4) + \
-            (opposition_score * 0.4) + (context_overlap * 0.2)
         # Boost confidence if we have clear opposition indicators
         indicator_boost = min(len(indicators) * 0.1, 0.2)
@@ -601,11 +580,10 @@ class ConflictDetector:
     def _generate_conflict_id(self, conflict: ConflictEvidence) -> str:
         """Generate unique ID for a conflict."""
-        content = (
-            f"{
                 conflict.statement_a_id}_{
                 conflict.statement_b_id}_{
-                conflict.conflict_type.value}")
         return hashlib.md5(content.encode()).hexdigest()[:12]
     def _generate_conflict_recommendation(
@@ -629,12 +607,10 @@ class ConflictDetector:
         recommendations = []
         if status == "critical":
-            recommendations.append(
-                "Immediate review required - multiple high-confidence conflicts")
             recommendations.append("Consider statement validation workflow")
         elif status == "warning":
-            recommendations.append(
-                "Monitor conflicts closely - elevated conflict level")
             recommendations.append("Review recent statements for accuracy")
         # Type-specific recommendations
@@ -644,12 +620,10 @@ class ConflictDetector:
             )
         if conflict_types.get("temporal_conflict", 0) > 2:
-            recommendations.append(
-                "Temporal conflicts detected - verify timeline consistency")
         if not recommendations:
-            recommendations.append(
-                "System operating normally - continue monitoring")
         return recommendations
@@ -659,7 +633,8 @@ class ConflictDetector:
             return 1.0
         high_confidence_conflicts = sum(
-            1 for conflict in self.detected_conflicts if conflict.confidence_score > 0.8)
         total_statements = len(self.statement_fingerprints)
         if total_statements == 0:

     - Confidence scoring and evidence collection
     """
+    def __init__(self, config: Optional[Dict[str, Any]] = None, embedding_provider=None):
         self.config = config or {}
         self.embedding_provider = embedding_provider
         # Configuration parameters
+        self.opposition_threshold = self.config.get("opposition_threshold", 0.7)
+        self.semantic_similarity_threshold = self.config.get("semantic_similarity_threshold", 0.8)
+        self.min_confidence_score = self.config.get("min_confidence_score", 0.6)
+        self.max_statement_age_hours = self.config.get("max_statement_age_hours", 24)
         # Storage
         self.statement_fingerprints: Dict[str, StatementFingerprint] = {}
             "average_confidence": 0.0,
         }
+    def process_statements(self, statements: List[Dict[str, Any]]) -> Dict[str, Any]:
         """
         Process new statements and detect conflicts with existing statements.
             "new_conflicts": [],
             "fingerprints_created": 0,
             "total_active_statements": 0,
+            "conflict_summary": {"high_confidence": 0, "medium_confidence": 0, "low_confidence": 0},
         }
         # Process each statement
                 continue
             # Create fingerprint for new statement
+            fingerprint = self._create_statement_fingerprint(statement_id, content, statement)
             self.statement_fingerprints[statement_id] = fingerprint
             processing_report["fingerprints_created"] += 1
                             "conflict_type": conflict.conflict_type.value,
                             "confidence_score": conflict.confidence_score,
                             "opposition_indicators": conflict.opposition_indicators,
+                        }
+                    )
                     # Categorize by confidence
                     if conflict.confidence_score >= 0.8:
         # Update metrics
         elapsed_ms = (time.time() - start_time) * 1000
         self.metrics["statements_processed"] += len(statements)
+        self.metrics["conflicts_detected"] += len(processing_report["new_conflicts"])
         self.metrics["processing_time_ms"] += elapsed_ms
         if self.detected_conflicts:
             ) / len(self.detected_conflicts)
         processing_report["elapsed_ms"] = elapsed_ms
+        processing_report["total_active_statements"] = len(self.statement_fingerprints)
+        processing_report["total_conflicts_detected"] = len(self.detected_conflicts)
         return processing_report
         for conflict in conflicts_involving_statement:
             conflict_type = conflict.conflict_type.value
+            conflict_types[conflict_type] = conflict_types.get(conflict_type, 0) + 1
             max_confidence = max(max_confidence, conflict.confidence_score)
             # Add opposing statement
         for conflict in self.detected_conflicts:
             # Count by type
             conflict_type = conflict.conflict_type.value
+            conflict_types[conflict_type] = conflict_types.get(conflict_type, 0) + 1
             # Count by confidence
             if conflict.confidence_score >= 0.8:
             status = "healthy"
         health_score = self._calculate_health_score()
+        recommendations = self._generate_system_recommendations(status, conflict_types)
         return {
             "total_conflicts": len(self.detected_conflicts),
         # Detect negation indicators
         content_lower = content.lower()
         negation_indicators = [
+            pattern for pattern in self.negation_patterns if pattern in content_lower
+        ]
         # Calculate assertion strength
         assertion_indicators = [
+            pattern for pattern in self.assertion_patterns if pattern in content_lower
+        ]
         assertion_strength = min(len(assertion_indicators) * 0.2, 1.0)
         # Extract temporal markers
         temporal_markers = [
+            pattern for pattern in self.temporal_patterns if pattern in content_lower
+        ]
         # Extract domain tags (simple keyword-based)
         domain_tags = set()
                         context_overlap = len(
                             new_fingerprint.domain_tags & existing_fingerprint.domain_tags
                         ) / max(
+                            len(new_fingerprint.domain_tags | existing_fingerprint.domain_tags), 1
                         )
                         # Collect opposition evidence
                             new_fingerprint.negation_indicators
                             and not existing_fingerprint.negation_indicators
                         ):
+                            opposition_indicators.extend(new_fingerprint.negation_indicators)
                         elif (
                             existing_fingerprint.negation_indicators
                             and not new_fingerprint.negation_indicators
                         ):
+                            opposition_indicators.extend(existing_fingerprint.negation_indicators)
                         # Determine conflict type
                         conflict_type = self._determine_conflict_type(
                         # Calculate confidence score
                         confidence = self._calculate_confidence_score(
+                            similarity, opposition_score, context_overlap, opposition_indicators
+                        )
                         if confidence >= self.min_confidence_score:
                             conflict = ConflictEvidence(
         # Temporal conflicts
         if fp1.temporal_markers and fp2.temporal_markers:
             # Simple temporal conflict detection
+            if any(marker in ["before", "earlier"] for marker in fp1.temporal_markers) and any(
+                marker in ["after", "later"] for marker in fp2.temporal_markers
+            ):
                 score += 0.3
         return min(score, 1.0)
         indicators: List[str],
     ) -> float:
         """Calculate confidence score for a conflict detection."""
+        base_score = (similarity * 0.4) + (opposition_score * 0.4) + (context_overlap * 0.2)
         # Boost confidence if we have clear opposition indicators
         indicator_boost = min(len(indicators) * 0.1, 0.2)
     def _generate_conflict_id(self, conflict: ConflictEvidence) -> str:
         """Generate unique ID for a conflict."""
+        content = f"{
                 conflict.statement_a_id}_{
                 conflict.statement_b_id}_{
+                conflict.conflict_type.value}"
         return hashlib.md5(content.encode()).hexdigest()[:12]
     def _generate_conflict_recommendation(
         recommendations = []
         if status == "critical":
+            recommendations.append("Immediate review required - multiple high-confidence conflicts")
             recommendations.append("Consider statement validation workflow")
         elif status == "warning":
+            recommendations.append("Monitor conflicts closely - elevated conflict level")
             recommendations.append("Review recent statements for accuracy")
         # Type-specific recommendations
             )
         if conflict_types.get("temporal_conflict", 0) > 2:
+            recommendations.append("Temporal conflicts detected - verify timeline consistency")
         if not recommendations:
+            recommendations.append("System operating normally - continue monitoring")
         return recommendations
             return 1.0
         high_confidence_conflicts = sum(
+            1 for conflict in self.detected_conflicts if conflict.confidence_score > 0.8
+        )
         total_statements = len(self.statement_fingerprints)
         if total_statements == 0:

warbler_cda/evaporation.py CHANGED Viewed

@@ -9,8 +9,7 @@ from collections import Counter
 class EvaporationEngine:
     """Evaporation: converts molten glyphs into mist lines (proto-thoughts) with advanced style bias."""
-    def __init__(self, magma_store, cloud_store,
-                 config: Optional[Dict[str, Any]] = None):
         self.magma_store = magma_store
         self.cloud_store = cloud_store
         self.config = config or {}
@@ -23,15 +22,12 @@ class EvaporationEngine:
         # Language generation parameters
         self.creativity_level = self.config.get("creativity_level", 0.7)
         self.compression_ratio = self.config.get("compression_ratio", 0.6)
-        self.mythic_amplification = self.config.get(
-            "mythic_amplification", 1.2)
         # Advanced distillation parameters
-        self.semantic_density_threshold = self.config.get(
-            "semantic_density_threshold", 0.5)
         self.affect_sensitivity = self.config.get("affect_sensitivity", 0.8)
-        self.temporal_decay_factor = self.config.get(
-            "temporal_decay_factor", 0.1)
     def evaporate(
         self, limit: int = 5, style_override: Optional[str] = None
@@ -43,8 +39,7 @@ class EvaporationEngine:
         mist_lines = []
         for i, glyph in enumerate(molten[:limit]):
             # Apply style variation for diversity
-            current_style = style_override or self._determine_style_for_glyph(
-                glyph, i)
             # Advanced mist distillation
             mist = self._advanced_distill_mist(glyph, current_style)
@@ -62,11 +57,9 @@ class EvaporationEngine:
         return mist_lines
-    def _select_optimal_glyphs(
-            self, target_count: int) -> List[Dict[str, Any]]:
         """Select optimal glyphs for evaporation based on multiple criteria."""
-        molten = self.magma_store.select_hot(
-            target_count * 3)  # Get more candidates
         # Score glyphs based on multiple factors
         scored_glyphs = []
@@ -80,8 +73,7 @@ class EvaporationEngine:
             # Affect diversity factor
             affect = glyph.get("affect", {})
-            affect_score = sum(abs(v)
-                               for v in affect.values()) / max(len(affect), 1)
             score += affect_score * 0.3
             # Temporal freshness factor
@@ -100,8 +92,7 @@ class EvaporationEngine:
         scored_glyphs.sort(key=lambda x: x[1], reverse=True)
         return [glyph for glyph, _ in scored_glyphs[:target_count]]
-    def _determine_style_for_glyph(
-            self, glyph: Dict[str, Any], index: int) -> str:
         """Determine optimal style for a specific glyph."""
         affect = glyph.get("affect", {})
         heat = glyph.get("heat", 0.0)
@@ -128,27 +119,22 @@ class EvaporationEngine:
         return base_style
-    def _advanced_distill_mist(
-            self, glyph: Dict[str, Any], style: str) -> Dict[str, Any]:
         """Advanced mist distillation with style bias."""
         summary = glyph.get("compressed_summary", "")
         affect = glyph.get("affect", {})
         heat = glyph.get("heat", 0.0)
         # Get style profile
-        style_profile = self.style_profiles.get(
-            style, self.style_profiles["balanced"])
         # Generate proto-thought with style bias
-        proto_thought = self._generate_styled_proto_thought(
-            summary, affect, style_profile)
         # Calculate advanced metrics
-        evaporation_temp = self._calculate_evaporation_temperature(
-            heat, affect)
         technical_clarity = self._calculate_technical_clarity(summary, style)
-        mythic_weight = self._calculate_mythic_weight(
-            affect, style) * self.mythic_amplification
         # Create enhanced mist line
         mist_line = {
@@ -212,15 +198,13 @@ class EvaporationEngine:
         return concepts[:5]  # Limit to top 5 concepts
-    def _apply_poetic_style(
-            self, concepts: List[str], affect: Dict[str, Any]) -> str:
         """Apply poetic style to proto-thought."""
         if not concepts:
             return "[Poetic] Ethereal mist of untold stories..."
         # Poetic connectors and imagery
-        poetic_connectors = ["whispers", "dreams",
-                             "echoes", "shadows", "light", "flow"]
         poetic_imagery = [
             "through ancient corridors",
             "across starlit paths",
@@ -238,8 +222,7 @@ class EvaporationEngine:
                 concepts[0]} and {
                 concepts[1]} {connector} {imagery}."
-    def _apply_technical_style(
-            self, concepts: List[str], affect: Dict[str, Any]) -> str:
         """Apply technical style to proto-thought."""
         if not concepts:
             return "[Technical] System processing: null input detected."
@@ -251,8 +234,7 @@ class EvaporationEngine:
             "Architecture review:",
             "Implementation note:",
         ]
-        tech_connectors = ["enables", "facilitates",
-                           "optimizes", "integrates", "synchronizes"]
         prefix = random.choice(tech_prefixes)
         connector = random.choice(tech_connectors)
@@ -264,8 +246,7 @@ class EvaporationEngine:
                 concepts[0]} {connector} {
                 concepts[1]} subsystem."
-    def _apply_narrative_style(
-            self, concepts: List[str], affect: Dict[str, Any]) -> str:
         """Apply narrative style to proto-thought."""
         if not concepts:
             return "[Narrative] Once upon a time, in the realm of forgotten ideas..."
@@ -277,8 +258,7 @@ class EvaporationEngine:
             "Beyond the horizon,",
             "Within the tapestry of",
         ]
-        narrative_actions = ["emerges", "dances",
-                             "whispers", "journeys", "transforms"]
         opener = random.choice(narrative_openers)
         action = random.choice(narrative_actions)
@@ -290,8 +270,7 @@ class EvaporationEngine:
                 concepts[0]} and {
                 concepts[1]} {action} together."
-    def _apply_mythic_style(
-            self, concepts: List[str], affect: Dict[str, Any]) -> str:
         """Apply mythic style to proto-thought."""
         if not concepts:
             return "[Mythic] From the primordial void, legends are born..."
@@ -320,8 +299,7 @@ class EvaporationEngine:
                 concepts[0]} and {
                 concepts[1]} within {entity}."
-    def _apply_balanced_style(
-            self, concepts: List[str], affect: Dict[str, Any]) -> str:
         """Apply balanced style to proto-thought."""
         if not concepts:
             return "[Balanced] Contemplation on the nature of existence..."
@@ -330,13 +308,11 @@ class EvaporationEngine:
             return f"[Balanced] Reflection on {
                 concepts[0]} reveals deeper meaning."
         else:
-            return (
-                f"[Balanced] The interplay between {
                     concepts[0]} and {
-                    concepts[1]} creates harmony.")
-    def _apply_affect_coloring(
-            self, proto_thought: str, affect: Dict[str, Any]) -> str:
         """Apply affect-based coloring to proto-thought."""
         if not affect:
             return proto_thought
@@ -367,15 +343,13 @@ class EvaporationEngine:
         if len(words) > 15:
             # Keep first, middle, and last parts
             keep_first = words[:5]
-            keep_middle = words[len(words) // 2 - 2: len(words) // 2 + 2]
             keep_last = words[-3:]
-            compressed = keep_first + ["..."] + \
-                keep_middle + ["..."] + keep_last
             return " ".join(compressed)
         return proto_thought
-    def _calculate_advanced_humidity(
-            self, mist_lines: List[Dict[str, Any]]) -> float:
         """Calculate sophisticated humidity based on multiple factors."""
         if not mist_lines:
             return 0.0
@@ -390,10 +364,8 @@ class EvaporationEngine:
         mythic_contribution = avg_mythic * 0.3
         # Technical clarity contribution (inverse relationship)
-        avg_clarity = sum(m.get("technical_clarity", 0.5)
-                          for m in mist_lines) / len(mist_lines)
-        clarity_contribution = (1.0 - avg_clarity) * \
-            0.1  # Less clear = more humid
         # Style diversity contribution
         styles = [m.get("style", "balanced") for m in mist_lines]
@@ -426,10 +398,8 @@ class EvaporationEngine:
             return
         # Analyze mist characteristics
-        avg_mythic = sum(m.get("mythic_weight", 0.0)
-                         for m in mist_lines) / len(mist_lines)
-        avg_clarity = sum(m.get("technical_clarity", 0.5)
-                          for m in mist_lines) / len(mist_lines)
         styles = [m.get("style", "balanced") for m in mist_lines]
         style_diversity = len(set(styles)) / len(styles) if styles else 0
@@ -471,14 +441,12 @@ class EvaporationEngine:
         return min(1.0, density * length_factor)
-    def _calculate_evaporation_temperature(
-            self, heat: float, affect: Dict[str, Any]) -> float:
         """Calculate evaporation temperature based on heat and affect."""
         base_temp = heat * 0.8  # Primary factor from heat
         # Affect modulation
-        affect_intensity = sum(abs(v)
-                               for v in affect.values()) / max(len(affect), 1)
         affect_modulation = affect_intensity * 0.2
         # Combine and bound
@@ -508,17 +476,14 @@ class EvaporationEngine:
         if summary:
             # Normalize by expected length
             complexity = len(summary.split()) / 20.0
-            complexity_adjustment = max(-0.2,
-                                        min(0.2, (0.5 - complexity) * 0.4))
             clarity += complexity_adjustment
         return min(1.0, max(0.1, clarity))
-    def _calculate_mythic_weight(
-            self, affect: Dict[str, Any], style: str) -> float:
         """Calculate mythic weight based on affect and style."""
-        base_weight = affect.get("awe", 0.0) * 0.6 + \
-            affect.get("wonder", 0.0) * 0.4
         # Style multipliers
         style_multipliers = {
@@ -536,8 +501,7 @@ class EvaporationEngine:
         multiplier = style_multipliers.get(style, 1.0)
         return min(1.0, base_weight * multiplier)
-    def _create_affect_signature(
-            self, affect: Dict[str, Any]) -> Dict[str, float]:
         """Create normalized affect signature."""
         if not affect:
             return {}
@@ -574,8 +538,7 @@ class EvaporationEngine:
         proto_concepts = set(self._extract_key_concepts(proto_thought))
         if original_concepts:
-            preservation = len(original_concepts &
-                               proto_concepts) / len(original_concepts)
             quality_score += preservation * 0.2
         # Affect alignment

 class EvaporationEngine:
     """Evaporation: converts molten glyphs into mist lines (proto-thoughts) with advanced style bias."""
+    def __init__(self, magma_store, cloud_store, config: Optional[Dict[str, Any]] = None):
         self.magma_store = magma_store
         self.cloud_store = cloud_store
         self.config = config or {}
         # Language generation parameters
         self.creativity_level = self.config.get("creativity_level", 0.7)
         self.compression_ratio = self.config.get("compression_ratio", 0.6)
+        self.mythic_amplification = self.config.get("mythic_amplification", 1.2)
         # Advanced distillation parameters
+        self.semantic_density_threshold = self.config.get("semantic_density_threshold", 0.5)
         self.affect_sensitivity = self.config.get("affect_sensitivity", 0.8)
+        self.temporal_decay_factor = self.config.get("temporal_decay_factor", 0.1)
     def evaporate(
         self, limit: int = 5, style_override: Optional[str] = None
         mist_lines = []
         for i, glyph in enumerate(molten[:limit]):
             # Apply style variation for diversity
+            current_style = style_override or self._determine_style_for_glyph(glyph, i)
             # Advanced mist distillation
             mist = self._advanced_distill_mist(glyph, current_style)
         return mist_lines
+    def _select_optimal_glyphs(self, target_count: int) -> List[Dict[str, Any]]:
         """Select optimal glyphs for evaporation based on multiple criteria."""
+        molten = self.magma_store.select_hot(target_count * 3)  # Get more candidates
         # Score glyphs based on multiple factors
         scored_glyphs = []
             # Affect diversity factor
             affect = glyph.get("affect", {})
+            affect_score = sum(abs(v) for v in affect.values()) / max(len(affect), 1)
             score += affect_score * 0.3
             # Temporal freshness factor
         scored_glyphs.sort(key=lambda x: x[1], reverse=True)
         return [glyph for glyph, _ in scored_glyphs[:target_count]]
+    def _determine_style_for_glyph(self, glyph: Dict[str, Any], index: int) -> str:
         """Determine optimal style for a specific glyph."""
         affect = glyph.get("affect", {})
         heat = glyph.get("heat", 0.0)
         return base_style
+    def _advanced_distill_mist(self, glyph: Dict[str, Any], style: str) -> Dict[str, Any]:
         """Advanced mist distillation with style bias."""
         summary = glyph.get("compressed_summary", "")
         affect = glyph.get("affect", {})
         heat = glyph.get("heat", 0.0)
         # Get style profile
+        style_profile = self.style_profiles.get(style, self.style_profiles["balanced"])
         # Generate proto-thought with style bias
+        proto_thought = self._generate_styled_proto_thought(summary, affect, style_profile)
         # Calculate advanced metrics
+        evaporation_temp = self._calculate_evaporation_temperature(heat, affect)
         technical_clarity = self._calculate_technical_clarity(summary, style)
+        mythic_weight = self._calculate_mythic_weight(affect, style) * self.mythic_amplification
         # Create enhanced mist line
         mist_line = {
         return concepts[:5]  # Limit to top 5 concepts
+    def _apply_poetic_style(self, concepts: List[str], affect: Dict[str, Any]) -> str:
         """Apply poetic style to proto-thought."""
         if not concepts:
             return "[Poetic] Ethereal mist of untold stories..."
         # Poetic connectors and imagery
+        poetic_connectors = ["whispers", "dreams", "echoes", "shadows", "light", "flow"]
         poetic_imagery = [
             "through ancient corridors",
             "across starlit paths",
                 concepts[0]} and {
                 concepts[1]} {connector} {imagery}."
+    def _apply_technical_style(self, concepts: List[str], affect: Dict[str, Any]) -> str:
         """Apply technical style to proto-thought."""
         if not concepts:
             return "[Technical] System processing: null input detected."
             "Architecture review:",
             "Implementation note:",
         ]
+        tech_connectors = ["enables", "facilitates", "optimizes", "integrates", "synchronizes"]
         prefix = random.choice(tech_prefixes)
         connector = random.choice(tech_connectors)
                 concepts[0]} {connector} {
                 concepts[1]} subsystem."
+    def _apply_narrative_style(self, concepts: List[str], affect: Dict[str, Any]) -> str:
         """Apply narrative style to proto-thought."""
         if not concepts:
             return "[Narrative] Once upon a time, in the realm of forgotten ideas..."
             "Beyond the horizon,",
             "Within the tapestry of",
         ]
+        narrative_actions = ["emerges", "dances", "whispers", "journeys", "transforms"]
         opener = random.choice(narrative_openers)
         action = random.choice(narrative_actions)
                 concepts[0]} and {
                 concepts[1]} {action} together."
+    def _apply_mythic_style(self, concepts: List[str], affect: Dict[str, Any]) -> str:
         """Apply mythic style to proto-thought."""
         if not concepts:
             return "[Mythic] From the primordial void, legends are born..."
                 concepts[0]} and {
                 concepts[1]} within {entity}."
+    def _apply_balanced_style(self, concepts: List[str], affect: Dict[str, Any]) -> str:
         """Apply balanced style to proto-thought."""
         if not concepts:
             return "[Balanced] Contemplation on the nature of existence..."
             return f"[Balanced] Reflection on {
                 concepts[0]} reveals deeper meaning."
         else:
+            return f"[Balanced] The interplay between {
                     concepts[0]} and {
+                    concepts[1]} creates harmony."
+    def _apply_affect_coloring(self, proto_thought: str, affect: Dict[str, Any]) -> str:
         """Apply affect-based coloring to proto-thought."""
         if not affect:
             return proto_thought
         if len(words) > 15:
             # Keep first, middle, and last parts
             keep_first = words[:5]
+            keep_middle = words[len(words) // 2 - 2 : len(words) // 2 + 2]
             keep_last = words[-3:]
+            compressed = keep_first + ["..."] + keep_middle + ["..."] + keep_last
             return " ".join(compressed)
         return proto_thought
+    def _calculate_advanced_humidity(self, mist_lines: List[Dict[str, Any]]) -> float:
         """Calculate sophisticated humidity based on multiple factors."""
         if not mist_lines:
             return 0.0
         mythic_contribution = avg_mythic * 0.3
         # Technical clarity contribution (inverse relationship)
+        avg_clarity = sum(m.get("technical_clarity", 0.5) for m in mist_lines) / len(mist_lines)
+        clarity_contribution = (1.0 - avg_clarity) * 0.1  # Less clear = more humid
         # Style diversity contribution
         styles = [m.get("style", "balanced") for m in mist_lines]
             return
         # Analyze mist characteristics
+        avg_mythic = sum(m.get("mythic_weight", 0.0) for m in mist_lines) / len(mist_lines)
+        avg_clarity = sum(m.get("technical_clarity", 0.5) for m in mist_lines) / len(mist_lines)
         styles = [m.get("style", "balanced") for m in mist_lines]
         style_diversity = len(set(styles)) / len(styles) if styles else 0
         return min(1.0, density * length_factor)
+    def _calculate_evaporation_temperature(self, heat: float, affect: Dict[str, Any]) -> float:
         """Calculate evaporation temperature based on heat and affect."""
         base_temp = heat * 0.8  # Primary factor from heat
         # Affect modulation
+        affect_intensity = sum(abs(v) for v in affect.values()) / max(len(affect), 1)
         affect_modulation = affect_intensity * 0.2
         # Combine and bound
         if summary:
             # Normalize by expected length
             complexity = len(summary.split()) / 20.0
+            complexity_adjustment = max(-0.2, min(0.2, (0.5 - complexity) * 0.4))
             clarity += complexity_adjustment
         return min(1.0, max(0.1, clarity))
+    def _calculate_mythic_weight(self, affect: Dict[str, Any], style: str) -> float:
         """Calculate mythic weight based on affect and style."""
+        base_weight = affect.get("awe", 0.0) * 0.6 + affect.get("wonder", 0.0) * 0.4
         # Style multipliers
         style_multipliers = {
         multiplier = style_multipliers.get(style, 1.0)
         return min(1.0, base_weight * multiplier)
+    def _create_affect_signature(self, affect: Dict[str, Any]) -> Dict[str, float]:
         """Create normalized affect signature."""
         if not affect:
             return {}
         proto_concepts = set(self._extract_key_concepts(proto_thought))
         if original_concepts:
+            preservation = len(original_concepts & proto_concepts) / len(original_concepts)
             quality_score += preservation * 0.2
         # Affect alignment

warbler_cda/pack_loader.py CHANGED Viewed

@@ -37,14 +37,12 @@ class PackLoader:
             pack_docs = self._load_pack(pack_dir, pack_name)
             documents.extend(pack_docs)
-            logger.info(
-                f"✓ Loaded {len(pack_docs)} documents from {pack_name}")
         self.documents = documents
         return documents
-    def _load_pack(self, pack_dir: Path,
-                   pack_name: str) -> List[Dict[str, Any]]:
         """Load documents from a specific pack"""
         documents = []
@@ -61,11 +59,7 @@ class PackLoader:
         return documents
-    def _is_valid_warbler_pack(
-            self,
-            pack_dir: Path,
-            pack_name: str,
-            jsonl_file: Path) -> bool:
         """Validate that a directory is a valid Warbler pack
         A valid Warbler pack must have:
@@ -105,8 +99,7 @@ class PackLoader:
             # For chunked packs, look for chunk files
             chunk_files = list(pack_dir.glob(f"{pack_name}-chunk-*.jsonl"))
             if chunk_files:
-                logger.debug(
-                    f"Found {len(chunk_files)} chunk files for {pack_name}")
                 return True
             else:
                 logger.warning(f"Chunked pack {pack_name} has no chunk files")
@@ -116,12 +109,10 @@ class PackLoader:
             if jsonl_file.exists():
                 return True
             else:
-                logger.warning(
-                    f"Single-file pack {pack_name} missing JSONL file: {jsonl_file}")
                 return False
-    def _load_jsonl_pack(self, pack_dir: Path,
-                         pack_name: str) -> List[Dict[str, Any]]:
         """Load JSONL-based pack (supports both single-file and chunked packs)"""
         documents = []
@@ -135,11 +126,9 @@ class PackLoader:
                 with open(package_json, "r", encoding="utf-8") as f:
                     metadata = json.load(f)
                     is_chunked = metadata.get("chunked", False)
-                    chunk_pattern = metadata.get(
-                        "chunk_pattern", f"{pack_name}-chunk-*.jsonl")
             except (json.JSONDecodeError, IOError) as e:
-                logger.warning(
-                    f"Could not read package.json for {pack_name}: {e}")
         if is_chunked:
             # Load chunked pack
@@ -152,12 +141,10 @@ class PackLoader:
             chunk_files = sorted(pack_dir.glob(f"{pack_name}-chunk-*.jsonl"))
             if not chunk_files:
-                logger.warning(
-                    f"No chunk files found for chunked pack {pack_name}")
                 return documents
-            logger.info(
-                f"Found {len(chunk_files)} chunk files for {pack_name}")
             # Load each chunk file in order
             for chunk_file in chunk_files:
@@ -168,7 +155,8 @@ class PackLoader:
             logger.info(
                 f"Loaded {
                     len(documents)} total documents from {
-                    len(chunk_files)} chunks")
         else:
             # Load single-file pack (backward compatibility)
             jsonl_file = pack_dir / f"{pack_name}.jsonl"
@@ -180,8 +168,7 @@ class PackLoader:
         return documents
-    def _load_jsonl_file(self, jsonl_file: Path,
-                         pack_name: str) -> List[Dict[str, Any]]:
         """Load a single JSONL file with robust error handling"""
         documents = []
         error_count = 0
@@ -196,8 +183,11 @@ class PackLoader:
                     try:
                         entry = json.loads(line)
                         doc = self._format_document(
-                            entry, pack_name, f"{
-                                jsonl_file.stem}_line_{line_num}")
                         documents.append(doc)
                     except json.JSONDecodeError as e:
                         error_count += 1
@@ -205,7 +195,8 @@ class PackLoader:
                         if error_count <= max_errors_to_log:
                             logger.warning(
                                 f"Error parsing line {line_num} in {
-                                    jsonl_file.name}: {e}")
                         # Continue processing other lines instead of failing
                         continue
@@ -213,14 +204,14 @@ class PackLoader:
                 logger.info(
                     f"Loaded {
                         len(documents)} documents from {
-                        jsonl_file.name} ({error_count} lines skipped due to errors)")
         except Exception as e:
             logger.error(f"Error loading JSONL file {jsonl_file}: {e}")
         return documents
-    def _load_structured_pack(self, pack_dir: Path,
-                              pack_name: str) -> List[Dict[str, Any]]:
         """Load structured pack with templates"""
         documents = []
@@ -233,8 +224,7 @@ class PackLoader:
             with open(templates_file, "r", encoding="utf-8") as f:
                 data = json.load(f)
-            templates = data if isinstance(
-                data, list) else data.get("templates", [])
             for template in templates:
                 doc = {
@@ -260,8 +250,7 @@ class PackLoader:
         self, entry: Dict[str, Any], pack_name: str, doc_id: str
     ) -> Dict[str, Any]:
         """Format a pack entry into a document"""
-        content = entry.get("content") or entry.get(
-            "text") or json.dumps(entry)
         return {
             "id": f"{pack_name}/{doc_id}",

             pack_docs = self._load_pack(pack_dir, pack_name)
             documents.extend(pack_docs)
+            logger.info(f"✓ Loaded {len(pack_docs)} documents from {pack_name}")
         self.documents = documents
         return documents
+    def _load_pack(self, pack_dir: Path, pack_name: str) -> List[Dict[str, Any]]:
         """Load documents from a specific pack"""
         documents = []
         return documents
+    def _is_valid_warbler_pack(self, pack_dir: Path, pack_name: str, jsonl_file: Path) -> bool:
         """Validate that a directory is a valid Warbler pack
         A valid Warbler pack must have:
             # For chunked packs, look for chunk files
             chunk_files = list(pack_dir.glob(f"{pack_name}-chunk-*.jsonl"))
             if chunk_files:
+                logger.debug(f"Found {len(chunk_files)} chunk files for {pack_name}")
                 return True
             else:
                 logger.warning(f"Chunked pack {pack_name} has no chunk files")
             if jsonl_file.exists():
                 return True
             else:
+                logger.warning(f"Single-file pack {pack_name} missing JSONL file: {jsonl_file}")
                 return False
+    def _load_jsonl_pack(self, pack_dir: Path, pack_name: str) -> List[Dict[str, Any]]:
         """Load JSONL-based pack (supports both single-file and chunked packs)"""
         documents = []
                 with open(package_json, "r", encoding="utf-8") as f:
                     metadata = json.load(f)
                     is_chunked = metadata.get("chunked", False)
+                    chunk_pattern = metadata.get("chunk_pattern", f"{pack_name}-chunk-*.jsonl")
             except (json.JSONDecodeError, IOError) as e:
+                logger.warning(f"Could not read package.json for {pack_name}: {e}")
         if is_chunked:
             # Load chunked pack
             chunk_files = sorted(pack_dir.glob(f"{pack_name}-chunk-*.jsonl"))
             if not chunk_files:
+                logger.warning(f"No chunk files found for chunked pack {pack_name}")
                 return documents
+            logger.info(f"Found {len(chunk_files)} chunk files for {pack_name}")
             # Load each chunk file in order
             for chunk_file in chunk_files:
             logger.info(
                 f"Loaded {
                     len(documents)} total documents from {
+                    len(chunk_files)} chunks"
+            )
         else:
             # Load single-file pack (backward compatibility)
             jsonl_file = pack_dir / f"{pack_name}.jsonl"
         return documents
+    def _load_jsonl_file(self, jsonl_file: Path, pack_name: str) -> List[Dict[str, Any]]:
         """Load a single JSONL file with robust error handling"""
         documents = []
         error_count = 0
                     try:
                         entry = json.loads(line)
                         doc = self._format_document(
+                            entry,
+                            pack_name,
+                            f"{
+                                jsonl_file.stem}_line_{line_num}",
+                        )
                         documents.append(doc)
                     except json.JSONDecodeError as e:
                         error_count += 1
                         if error_count <= max_errors_to_log:
                             logger.warning(
                                 f"Error parsing line {line_num} in {
+                                    jsonl_file.name}: {e}"
+                            )
                         # Continue processing other lines instead of failing
                         continue
                 logger.info(
                     f"Loaded {
                         len(documents)} documents from {
+                        jsonl_file.name} ({error_count} lines skipped due to errors)"
+                )
         except Exception as e:
             logger.error(f"Error loading JSONL file {jsonl_file}: {e}")
         return documents
+    def _load_structured_pack(self, pack_dir: Path, pack_name: str) -> List[Dict[str, Any]]:
         """Load structured pack with templates"""
         documents = []
             with open(templates_file, "r", encoding="utf-8") as f:
                 data = json.load(f)
+            templates = data if isinstance(data, list) else data.get("templates", [])
             for template in templates:
                 doc = {
         self, entry: Dict[str, Any], pack_name: str, doc_id: str
     ) -> Dict[str, Any]:
         """Format a pack entry into a document"""
+        content = entry.get("content") or entry.get("text") or json.dumps(entry)
         return {
             "id": f"{pack_name}/{doc_id}",

warbler_cda/retrieval_api.py CHANGED Viewed

@@ -31,8 +31,7 @@ class RetrievalQuery:
     mode: RetrievalMode
     anchor_ids: Optional[List[str]] = None
     semantic_query: Optional[str] = None
-    temporal_range: Optional[Tuple[float, float]
-                             ] = None  # (start_time, end_time)
     max_results: int = 10
     confidence_threshold: float = 0.6
     exclude_conflicts: bool = True
@@ -129,17 +128,13 @@ class RetrievalAPI:
         self.quality_threshold = self.config.get("quality_threshold", 0.6)
         # STAT7 hybrid scoring configuration
-        self.enable_stat7_hybrid = self.config.get(
-            "enable_stat7_hybrid", False)
-        self.default_weight_semantic = self.config.get(
-            "default_weight_semantic", 0.6)
-        self.default_weight_stat7 = self.config.get(
-            "default_weight_stat7", 0.4)
         # Retrieval cache (for performance)
         self.query_cache: Dict[str, ContextAssembly] = {}
-        self.cache_ttl_seconds = self.config.get(
-            "cache_ttl_seconds", 300)  # 5 minutes
         # Document STAT7 assignments cache (for rapid re-retrieval)
         self.document_stat7_cache: Dict[str, Dict[str, Any]] = {}
@@ -158,8 +153,7 @@ class RetrievalAPI:
             "quality_distribution": {"high": 0, "medium": 0, "low": 0},
         }
-    def retrieve_context(
-            self, query: Union[RetrievalQuery, Dict[str, Any]]) -> ContextAssembly:
         """
         Main retrieval method - assemble context based on query.
@@ -243,10 +237,7 @@ class RetrievalAPI:
         assembly = self.retrieve_context(query)
         return assembly.results
-    def get_anchor_context(
-            self,
-            anchor_id: str,
-            context_radius: int = 3) -> ContextAssembly:
         """
         Get context around a specific anchor.
@@ -266,10 +257,7 @@ class RetrievalAPI:
         return self.retrieve_context(query)
-    def trace_provenance(
-            self,
-            content_id: str,
-            max_depth: int = 5) -> ContextAssembly:
         """
         Trace provenance chain for a piece of content.
@@ -327,10 +315,12 @@ class RetrievalAPI:
         if embedding:
             doc_entry["embedding"] = embedding
-        if (stat7_coordinates is None and embedding and hasattr(
-                self.embedding_provider, "compute_stat7_from_embedding")):
-            stat7_coordinates = self.embedding_provider.compute_stat7_from_embedding(
-                embedding)
         if stat7_coordinates:
             doc_entry["stat7_coordinates"] = stat7_coordinates
@@ -367,22 +357,17 @@ class RetrievalAPI:
             anchor_ids=query_dict.get("anchor_ids"),
             semantic_query=query_dict.get("semantic_query"),
             temporal_range=query_dict.get("temporal_range"),
-            max_results=query_dict.get(
-                "max_results", self.default_max_results),
             confidence_threshold=query_dict.get("confidence_threshold", 0.6),
             exclude_conflicts=query_dict.get("exclude_conflicts", True),
             include_provenance=query_dict.get("include_provenance", True),
-            stat7_hybrid=query_dict.get(
-                "stat7_hybrid", self.enable_stat7_hybrid),
             stat7_address=query_dict.get("stat7_address"),
-            weight_semantic=query_dict.get(
-                "weight_semantic", self.default_weight_semantic),
-            weight_stat7=query_dict.get(
-                "weight_stat7", self.default_weight_stat7),
         )
-    def _retrieve_semantic_similarity(
-            self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Retrieve content based on semantic similarity."""
         results = []
@@ -403,15 +388,13 @@ class RetrievalAPI:
                 self.semantic_anchors}",
             file=sys.stderr,
         )
-        print(
-            f"DEBUG: context_store size={len(self._context_store)}", file=sys.stderr)
         # If embedding provider available, use it
         if self.embedding_provider:
             # Get query embedding
             try:
-                query_embedding = self.embedding_provider.embed_text(
-                    query.semantic_query)
             except Exception:
                 return results
@@ -431,8 +414,8 @@ class RetrievalAPI:
                             content=anchor.concept_text,
                             relevance_score=similarity,
                             temporal_distance=self._calculate_temporal_distance(
-                                anchor.provenance.first_seen,
-                                query.query_timestamp),
                             anchor_connections=[anchor_id],
                             provenance_depth=1,
                             conflict_flags=[],
@@ -461,8 +444,8 @@ class RetrievalAPI:
                             content=micro.compressed_text,
                             relevance_score=similarity,
                             temporal_distance=self._calculate_temporal_distance(
-                                micro.creation_timestamp,
-                                query.query_timestamp),
                             anchor_connections=[],
                             provenance_depth=2,
                             conflict_flags=[],
@@ -481,9 +464,7 @@ class RetrievalAPI:
         return results
-    def _search_context_store(
-            self,
-            query: RetrievalQuery) -> List[RetrievalResult]:
         """
         Search context store using embeddings (semantic) or keyword fallback.
         Prefers embedding-based semantic search when available.
@@ -494,16 +475,14 @@ class RetrievalAPI:
             return results
         try:
-            if self.embedding_provider and hasattr(
-                    self.embedding_provider, "semantic_search"):
                 return self._search_context_store_semantic(query)
         except Exception as e:
             pass
         return self._search_context_store_keyword(query)
-    def _search_context_store_semantic(
-            self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Search context store using semantic embeddings."""
         results = []
@@ -540,10 +519,8 @@ class RetrievalAPI:
                     hybrid_score = sim_score
                     if query.stat7_hybrid:
                         hybrid_score = (
-                            query.weight_semantic *
-                            sim_score +
-                            query.weight_stat7 *
-                            stat7_resonance)
                     result = RetrievalResult(
                         result_id=f"ctx_{doc_id}",
@@ -565,8 +542,7 @@ class RetrievalAPI:
         return results
-    def _search_context_store_keyword(
-            self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Fallback keyword-based search of context store."""
         results = []
@@ -613,21 +589,16 @@ class RetrievalAPI:
             return 0.5
         try:
-            lineage_dist = abs(doc_stat7.get("lineage", 0.5) -
-                               query_stat7.get("lineage", 0.5))
             adjacency_dist = abs(
-                doc_stat7.get("adjacency", 0.5) -
-                query_stat7.get("adjacency", 0.5)
             )
             luminosity_dist = abs(
-                doc_stat7.get("luminosity", 0.7) -
-                query_stat7.get("luminosity", 0.7)
             )
-            polarity_dist = abs(doc_stat7.get(
-                "polarity", 0.5) - query_stat7.get("polarity", 0.5))
             dimensionality_dist = abs(
-                doc_stat7.get("dimensionality", 0.5) -
-                query_stat7.get("dimensionality", 0.5)
             )
             avg_distance = (
@@ -643,8 +614,7 @@ class RetrievalAPI:
         except Exception:
             return 0.5
-    def _retrieve_temporal_sequence(
-            self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Retrieve content based on temporal sequence."""
         results = []
@@ -664,15 +634,15 @@ class RetrievalAPI:
             for anchor_id, anchor in self.semantic_anchors.anchors.items():
                 if temporal_range[0] <= anchor.provenance.first_seen <= temporal_range[1]:
                     temporal_items.append(
-                        ("anchor", anchor_id, anchor.provenance.first_seen, anchor))
         # Add micro-summaries
         if self.summarization_ladder:
             for micro in self.summarization_ladder.micro_summaries:
                 if temporal_range[0] <= micro.creation_timestamp <= temporal_range[1]:
                     temporal_items.append(
-                        ("micro_summary", micro.summary_id,
-                         micro.creation_timestamp, micro)
                     )
         # Sort by timestamp
@@ -711,15 +681,13 @@ class RetrievalAPI:
                     anchor_connections=[],
                     provenance_depth=2,
                     conflict_flags=[],
-                    metadata={"timestamp": timestamp,
-                              "window_size": micro.window_size},
                 )
                 results.append(result)
         return results
-    def _retrieve_anchor_neighborhood(
-            self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Retrieve content in the neighborhood of specific anchors."""
         results = []
@@ -756,15 +724,13 @@ class RetrievalAPI:
                             anchor_connections=[anchor_id, other_id],
                             provenance_depth=1,
                             conflict_flags=[],
-                            metadata={"neighbor_of": anchor_id,
-                                      "similarity": similarity},
                         )
                         results.append(result)
         return results
-    def _retrieve_provenance_chain(
-            self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Retrieve content following provenance relationships."""
         results = []
@@ -795,8 +761,7 @@ class RetrievalAPI:
                     results.append(result)
                     # Add related content from update history
-                    for i, update in enumerate(
-                            anchor.provenance.update_history):
                         if i >= query.max_results - 1:
                             break
@@ -806,20 +771,17 @@ class RetrievalAPI:
                             content_id=f"{anchor_id}_update_{i}",
                             content=f"Update: {update.get('context', {}).get('mist_id', 'unknown')}",
                             relevance_score=0.8 - (i * 0.1),
-                            temporal_distance=abs(
-                                update["timestamp"] - query.query_timestamp),
                             anchor_connections=[anchor_id],
                             provenance_depth=i + 1,
                             conflict_flags=[],
-                            metadata={
-                                "update_context": update.get("context", {})},
                         )
                         results.append(result)
         return results
-    def _retrieve_conflict_aware(
-            self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Retrieve content while avoiding conflicts."""
         # First get base results
         base_results = self._retrieve_semantic_similarity(query)
@@ -835,14 +797,14 @@ class RetrievalAPI:
             # Check for conflicts involving this content
             if hasattr(self.conflict_detector, "get_conflict_analysis"):
-                conflict_analysis = self.conflict_detector.get_conflict_analysis(
-                    result.content_id)
                 if conflict_analysis.get("conflicts_found", 0) > 0:
                     conflicts = [
                         f"conflict_confidence_{
                             conflict_analysis.get(
                                 'max_confidence',
-                                0):.2f}"]
             # Include result but flag conflicts
             result.conflict_flags = conflicts
@@ -851,9 +813,7 @@ class RetrievalAPI:
         return filtered_results
-    def _retrieve_composite(
-            self,
-            query: RetrievalQuery) -> List[RetrievalResult]:
         """Retrieve using multiple modes and combine results."""
         all_results = []
@@ -896,15 +856,13 @@ class RetrievalAPI:
             self.metrics["hybrid_queries"] += 1
         # Filter by confidence threshold
-        filtered = [r for r in results if r.relevance_score >=
-                    query.confidence_threshold]
         # Apply temporal decay
         current_time = query.query_timestamp
         for result in filtered:
             age_hours = result.temporal_distance / 3600
-            decay_factor = max(
-                0.1, 1.0 - (age_hours / self.temporal_decay_hours))
             result.relevance_score *= decay_factor
         # Sort by relevance score
@@ -937,8 +895,7 @@ class RetrievalAPI:
         # Temporal span
         timestamps = [r.temporal_distance for r in results]
         temporal_span_hours = (
-            (max(timestamps) - min(timestamps)) /
-            3600 if len(timestamps) > 1 else 0
         )
         # Anchor coverage
@@ -968,17 +925,11 @@ class RetrievalAPI:
             retrieval_timestamp=time.time(),
         )
-    def _calculate_temporal_distance(
-            self,
-            timestamp: float,
-            reference_time: float) -> float:
         """Calculate temporal distance between two timestamps."""
         return abs(timestamp - reference_time)
-    def _calculate_temporal_relevance(
-            self,
-            timestamp: float,
-            reference_time: float) -> float:
         """Calculate relevance based on temporal proximity."""
         distance_seconds = abs(timestamp - reference_time)
         distance_hours = distance_seconds / 3600
@@ -1060,14 +1011,12 @@ class RetrievalAPI:
     def _update_metrics(self, assembly: ContextAssembly, elapsed_ms: float):
         """Update performance metrics."""
         self.metrics["average_results_per_query"] = (
-            self.metrics["average_results_per_query"] *
-            (self.metrics["total_queries"] - 1)
             + len(assembly.results)
         ) / self.metrics["total_queries"]
         self.metrics["average_retrieval_time_ms"] = (
-            self.metrics["average_retrieval_time_ms"] *
-            (self.metrics["total_queries"] - 1)
             + elapsed_ms
         ) / self.metrics["total_queries"]
@@ -1081,8 +1030,7 @@ class RetrievalAPI:
     def _calculate_cache_hit_rate(self) -> float:
         """Calculate cache hit rate."""
-        total_requests = self.metrics["cache_hits"] + \
-            self.metrics["cache_misses"]
         if total_requests == 0:
             return 0.0
         return self.metrics["cache_hits"] / total_requests
@@ -1228,8 +1176,8 @@ class RetrievalAPI:
         try:
             q_stat7_dict = query.stat7_address
             query_realm = Realm(
-                type=q_stat7_dict["realm"]["type"],
-                label=q_stat7_dict["realm"]["label"])
             query_stat7 = STAT7Address(
                 realm=query_realm,
                 lineage=q_stat7_dict["lineage"],
@@ -1259,8 +1207,8 @@ class RetrievalAPI:
             try:
                 doc_realm = Realm(
-                    type=doc_stat7_dict["realm"]["type"],
-                    label=doc_stat7_dict["realm"]["label"])
                 doc_stat7 = STAT7Address(
                     realm=doc_realm,
                     lineage=doc_stat7_dict["lineage"],
@@ -1275,8 +1223,7 @@ class RetrievalAPI:
                 continue
             # Compute STAT7 resonance score
-            stat7_res = self.stat7_bridge.stat7_resonance(
-                query_stat7, doc_stat7)
             result.stat7_resonance = stat7_res
             # Compute semantic similarity (if available)
@@ -1284,8 +1231,7 @@ class RetrievalAPI:
             result.semantic_similarity = semantic_sim
             # Combine into hybrid score
-            hybrid = (query.weight_semantic * semantic_sim) + \
-                (query.weight_stat7 * stat7_res)
             result.relevance_score = max(0.0, min(hybrid, 1.0))
         return results

     mode: RetrievalMode
     anchor_ids: Optional[List[str]] = None
     semantic_query: Optional[str] = None
+    temporal_range: Optional[Tuple[float, float]] = None  # (start_time, end_time)
     max_results: int = 10
     confidence_threshold: float = 0.6
     exclude_conflicts: bool = True
         self.quality_threshold = self.config.get("quality_threshold", 0.6)
         # STAT7 hybrid scoring configuration
+        self.enable_stat7_hybrid = self.config.get("enable_stat7_hybrid", False)
+        self.default_weight_semantic = self.config.get("default_weight_semantic", 0.6)
+        self.default_weight_stat7 = self.config.get("default_weight_stat7", 0.4)
         # Retrieval cache (for performance)
         self.query_cache: Dict[str, ContextAssembly] = {}
+        self.cache_ttl_seconds = self.config.get("cache_ttl_seconds", 300)  # 5 minutes
         # Document STAT7 assignments cache (for rapid re-retrieval)
         self.document_stat7_cache: Dict[str, Dict[str, Any]] = {}
             "quality_distribution": {"high": 0, "medium": 0, "low": 0},
         }
+    def retrieve_context(self, query: Union[RetrievalQuery, Dict[str, Any]]) -> ContextAssembly:
         """
         Main retrieval method - assemble context based on query.
         assembly = self.retrieve_context(query)
         return assembly.results
+    def get_anchor_context(self, anchor_id: str, context_radius: int = 3) -> ContextAssembly:
         """
         Get context around a specific anchor.
         return self.retrieve_context(query)
+    def trace_provenance(self, content_id: str, max_depth: int = 5) -> ContextAssembly:
         """
         Trace provenance chain for a piece of content.
         if embedding:
             doc_entry["embedding"] = embedding
+        if (
+            stat7_coordinates is None
+            and embedding
+            and hasattr(self.embedding_provider, "compute_stat7_from_embedding")
+        ):
+            stat7_coordinates = self.embedding_provider.compute_stat7_from_embedding(embedding)
         if stat7_coordinates:
             doc_entry["stat7_coordinates"] = stat7_coordinates
             anchor_ids=query_dict.get("anchor_ids"),
             semantic_query=query_dict.get("semantic_query"),
             temporal_range=query_dict.get("temporal_range"),
+            max_results=query_dict.get("max_results", self.default_max_results),
             confidence_threshold=query_dict.get("confidence_threshold", 0.6),
             exclude_conflicts=query_dict.get("exclude_conflicts", True),
             include_provenance=query_dict.get("include_provenance", True),
+            stat7_hybrid=query_dict.get("stat7_hybrid", self.enable_stat7_hybrid),
             stat7_address=query_dict.get("stat7_address"),
+            weight_semantic=query_dict.get("weight_semantic", self.default_weight_semantic),
+            weight_stat7=query_dict.get("weight_stat7", self.default_weight_stat7),
         )
+    def _retrieve_semantic_similarity(self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Retrieve content based on semantic similarity."""
         results = []
                 self.semantic_anchors}",
             file=sys.stderr,
         )
+        print(f"DEBUG: context_store size={len(self._context_store)}", file=sys.stderr)
         # If embedding provider available, use it
         if self.embedding_provider:
             # Get query embedding
             try:
+                query_embedding = self.embedding_provider.embed_text(query.semantic_query)
             except Exception:
                 return results
                             content=anchor.concept_text,
                             relevance_score=similarity,
                             temporal_distance=self._calculate_temporal_distance(
+                                anchor.provenance.first_seen, query.query_timestamp
+                            ),
                             anchor_connections=[anchor_id],
                             provenance_depth=1,
                             conflict_flags=[],
                             content=micro.compressed_text,
                             relevance_score=similarity,
                             temporal_distance=self._calculate_temporal_distance(
+                                micro.creation_timestamp, query.query_timestamp
+                            ),
                             anchor_connections=[],
                             provenance_depth=2,
                             conflict_flags=[],
         return results
+    def _search_context_store(self, query: RetrievalQuery) -> List[RetrievalResult]:
         """
         Search context store using embeddings (semantic) or keyword fallback.
         Prefers embedding-based semantic search when available.
             return results
         try:
+            if self.embedding_provider and hasattr(self.embedding_provider, "semantic_search"):
                 return self._search_context_store_semantic(query)
         except Exception as e:
             pass
         return self._search_context_store_keyword(query)
+    def _search_context_store_semantic(self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Search context store using semantic embeddings."""
         results = []
                     hybrid_score = sim_score
                     if query.stat7_hybrid:
                         hybrid_score = (
+                            query.weight_semantic * sim_score + query.weight_stat7 * stat7_resonance
+                        )
                     result = RetrievalResult(
                         result_id=f"ctx_{doc_id}",
         return results
+    def _search_context_store_keyword(self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Fallback keyword-based search of context store."""
         results = []
             return 0.5
         try:
+            lineage_dist = abs(doc_stat7.get("lineage", 0.5) - query_stat7.get("lineage", 0.5))
             adjacency_dist = abs(
+                doc_stat7.get("adjacency", 0.5) - query_stat7.get("adjacency", 0.5)
             )
             luminosity_dist = abs(
+                doc_stat7.get("luminosity", 0.7) - query_stat7.get("luminosity", 0.7)
             )
+            polarity_dist = abs(doc_stat7.get("polarity", 0.5) - query_stat7.get("polarity", 0.5))
             dimensionality_dist = abs(
+                doc_stat7.get("dimensionality", 0.5) - query_stat7.get("dimensionality", 0.5)
             )
             avg_distance = (
         except Exception:
             return 0.5
+    def _retrieve_temporal_sequence(self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Retrieve content based on temporal sequence."""
         results = []
             for anchor_id, anchor in self.semantic_anchors.anchors.items():
                 if temporal_range[0] <= anchor.provenance.first_seen <= temporal_range[1]:
                     temporal_items.append(
+                        ("anchor", anchor_id, anchor.provenance.first_seen, anchor)
+                    )
         # Add micro-summaries
         if self.summarization_ladder:
             for micro in self.summarization_ladder.micro_summaries:
                 if temporal_range[0] <= micro.creation_timestamp <= temporal_range[1]:
                     temporal_items.append(
+                        ("micro_summary", micro.summary_id, micro.creation_timestamp, micro)
                     )
         # Sort by timestamp
                     anchor_connections=[],
                     provenance_depth=2,
                     conflict_flags=[],
+                    metadata={"timestamp": timestamp, "window_size": micro.window_size},
                 )
                 results.append(result)
         return results
+    def _retrieve_anchor_neighborhood(self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Retrieve content in the neighborhood of specific anchors."""
         results = []
                             anchor_connections=[anchor_id, other_id],
                             provenance_depth=1,
                             conflict_flags=[],
+                            metadata={"neighbor_of": anchor_id, "similarity": similarity},
                         )
                         results.append(result)
         return results
+    def _retrieve_provenance_chain(self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Retrieve content following provenance relationships."""
         results = []
                     results.append(result)
                     # Add related content from update history
+                    for i, update in enumerate(anchor.provenance.update_history):
                         if i >= query.max_results - 1:
                             break
                             content_id=f"{anchor_id}_update_{i}",
                             content=f"Update: {update.get('context', {}).get('mist_id', 'unknown')}",
                             relevance_score=0.8 - (i * 0.1),
+                            temporal_distance=abs(update["timestamp"] - query.query_timestamp),
                             anchor_connections=[anchor_id],
                             provenance_depth=i + 1,
                             conflict_flags=[],
+                            metadata={"update_context": update.get("context", {})},
                         )
                         results.append(result)
         return results
+    def _retrieve_conflict_aware(self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Retrieve content while avoiding conflicts."""
         # First get base results
         base_results = self._retrieve_semantic_similarity(query)
             # Check for conflicts involving this content
             if hasattr(self.conflict_detector, "get_conflict_analysis"):
+                conflict_analysis = self.conflict_detector.get_conflict_analysis(result.content_id)
                 if conflict_analysis.get("conflicts_found", 0) > 0:
                     conflicts = [
                         f"conflict_confidence_{
                             conflict_analysis.get(
                                 'max_confidence',
+                                0):.2f}"
+                    ]
             # Include result but flag conflicts
             result.conflict_flags = conflicts
         return filtered_results
+    def _retrieve_composite(self, query: RetrievalQuery) -> List[RetrievalResult]:
         """Retrieve using multiple modes and combine results."""
         all_results = []
             self.metrics["hybrid_queries"] += 1
         # Filter by confidence threshold
+        filtered = [r for r in results if r.relevance_score >= query.confidence_threshold]
         # Apply temporal decay
         current_time = query.query_timestamp
         for result in filtered:
             age_hours = result.temporal_distance / 3600
+            decay_factor = max(0.1, 1.0 - (age_hours / self.temporal_decay_hours))
             result.relevance_score *= decay_factor
         # Sort by relevance score
         # Temporal span
         timestamps = [r.temporal_distance for r in results]
         temporal_span_hours = (
+            (max(timestamps) - min(timestamps)) / 3600 if len(timestamps) > 1 else 0
         )
         # Anchor coverage
             retrieval_timestamp=time.time(),
         )
+    def _calculate_temporal_distance(self, timestamp: float, reference_time: float) -> float:
         """Calculate temporal distance between two timestamps."""
         return abs(timestamp - reference_time)
+    def _calculate_temporal_relevance(self, timestamp: float, reference_time: float) -> float:
         """Calculate relevance based on temporal proximity."""
         distance_seconds = abs(timestamp - reference_time)
         distance_hours = distance_seconds / 3600
     def _update_metrics(self, assembly: ContextAssembly, elapsed_ms: float):
         """Update performance metrics."""
         self.metrics["average_results_per_query"] = (
+            self.metrics["average_results_per_query"] * (self.metrics["total_queries"] - 1)
             + len(assembly.results)
         ) / self.metrics["total_queries"]
         self.metrics["average_retrieval_time_ms"] = (
+            self.metrics["average_retrieval_time_ms"] * (self.metrics["total_queries"] - 1)
             + elapsed_ms
         ) / self.metrics["total_queries"]
     def _calculate_cache_hit_rate(self) -> float:
         """Calculate cache hit rate."""
+        total_requests = self.metrics["cache_hits"] + self.metrics["cache_misses"]
         if total_requests == 0:
             return 0.0
         return self.metrics["cache_hits"] / total_requests
         try:
             q_stat7_dict = query.stat7_address
             query_realm = Realm(
+                type=q_stat7_dict["realm"]["type"], label=q_stat7_dict["realm"]["label"]
+            )
             query_stat7 = STAT7Address(
                 realm=query_realm,
                 lineage=q_stat7_dict["lineage"],
             try:
                 doc_realm = Realm(
+                    type=doc_stat7_dict["realm"]["type"], label=doc_stat7_dict["realm"]["label"]
+                )
                 doc_stat7 = STAT7Address(
                     realm=doc_realm,
                     lineage=doc_stat7_dict["lineage"],
                 continue
             # Compute STAT7 resonance score
+            stat7_res = self.stat7_bridge.stat7_resonance(query_stat7, doc_stat7)
             result.stat7_resonance = stat7_res
             # Compute semantic similarity (if available)
             result.semantic_similarity = semantic_sim
             # Combine into hybrid score
+            hybrid = (query.weight_semantic * semantic_sim) + (query.weight_stat7 * stat7_res)
             result.relevance_score = max(0.0, min(hybrid, 1.0))
         return results

warbler_cda/stat7_entity.py CHANGED Viewed

@@ -251,8 +251,7 @@ class STAT7Entity(ABC):
     # Event Tracking
     # ========================================================================
-    def _record_event(self, event_type: str, description: str,
-                      metadata: Dict[str, Any] = None):
         """Record a lifecycle event"""
         event = LifecycleEvent(
             timestamp=datetime.utcnow(),
@@ -263,12 +262,9 @@ class STAT7Entity(ABC):
         self.lifecycle_events.append(event)
         self.last_activity = event.timestamp
-    def get_event_history(
-            self,
-            limit: Optional[int] = None) -> List[LifecycleEvent]:
         """Get lifecycle events, optionally limited to most recent"""
-        events = sorted(self.lifecycle_events,
-                        key=lambda e: e.timestamp, reverse=True)
         return events[:limit] if limit else events
     # ========================================================================
@@ -284,8 +280,7 @@ class STAT7Entity(ABC):
             self.entangled_entities.append(other_entity_id)
             self.entanglement_strength.append(strength)
             self._record_event(
-                "entanglement_added", f"Entangled with {other_entity_id}", {
-                    "strength": strength}
             )
     def remove_entanglement(self, other_entity_id: str):
@@ -294,17 +289,13 @@ class STAT7Entity(ABC):
             idx = self.entangled_entities.index(other_entity_id)
             self.entangled_entities.pop(idx)
             self.entanglement_strength.pop(idx)
-            self._record_event("entanglement_removed",
-                               f"Untangled from {other_entity_id}")
     def get_entanglements(self) -> List[Tuple[str, float]]:
         """Get all entangled entities with strength"""
         return list(zip(self.entangled_entities, self.entanglement_strength))
-    def update_entanglement_strength(
-            self,
-            other_entity_id: str,
-            new_strength: float):
         """Update entanglement strength with another entity"""
         if other_entity_id in self.entangled_entities:
             idx = self.entangled_entities.index(other_entity_id)
@@ -365,8 +356,7 @@ class STAT7Entity(ABC):
                 {"trait_type": "Realm", "value": self.stat7.realm.value},
                 {"trait_type": "Lineage", "value": self.stat7.lineage},
                 {"trait_type": "Horizon", "value": self.stat7.horizon.value},
-                {"trait_type": "Luminosity",
-                    "value": int(self.stat7.luminosity)},
                 {"trait_type": "Polarity", "value": self.stat7.polarity.value},
                 {"trait_type": "Dimensionality", "value": self.stat7.dimensionality},
                 {"trait_type": "STAT7 Address", "value": self.stat7.address},
@@ -374,11 +364,7 @@ class STAT7Entity(ABC):
             "properties": card_data.get("properties", {}),
         }
-    def record_mint(
-            self,
-            contract_address: str,
-            token_id: int,
-            ipfs_hash: str):
         """Record successful NFT minting"""
         self.nft_minted = True
         self.nft_contract = contract_address
@@ -387,8 +373,7 @@ class STAT7Entity(ABC):
         self._record_event(
             "nft_minted",
             f"Minted as ERC-721 token #{token_id}",
-            {"contract": contract_address,
-                "token_id": token_id, "ipfs_hash": ipfs_hash},
         )
     # ========================================================================
@@ -501,8 +486,7 @@ class STAT7Entity(ABC):
                 "type": "entity_profile",
                 **card_data,
                 "owner": self.owner_id,
-                "lifecycle_events": [
-                    e.to_dict() for e in self.lifecycle_events],
                 "entanglements": self.get_entanglements(),
                 "luca_trace": self.get_luca_trace(),
             }
@@ -515,8 +499,7 @@ class STAT7Entity(ABC):
                 "stat7_dimensions": self.stat7.to_dict(),
                 "realm_details": self._get_realm_details(),
                 "entanglement_network": self.get_entanglements(),
-                "event_chronology": [
-                    e.to_dict() for e in self.lifecycle_events],
             }
     def _get_realm_details(self) -> Dict[str, Any]:

     # Event Tracking
     # ========================================================================
+    def _record_event(self, event_type: str, description: str, metadata: Dict[str, Any] = None):
         """Record a lifecycle event"""
         event = LifecycleEvent(
             timestamp=datetime.utcnow(),
         self.lifecycle_events.append(event)
         self.last_activity = event.timestamp
+    def get_event_history(self, limit: Optional[int] = None) -> List[LifecycleEvent]:
         """Get lifecycle events, optionally limited to most recent"""
+        events = sorted(self.lifecycle_events, key=lambda e: e.timestamp, reverse=True)
         return events[:limit] if limit else events
     # ========================================================================
             self.entangled_entities.append(other_entity_id)
             self.entanglement_strength.append(strength)
             self._record_event(
+                "entanglement_added", f"Entangled with {other_entity_id}", {"strength": strength}
             )
     def remove_entanglement(self, other_entity_id: str):
             idx = self.entangled_entities.index(other_entity_id)
             self.entangled_entities.pop(idx)
             self.entanglement_strength.pop(idx)
+            self._record_event("entanglement_removed", f"Untangled from {other_entity_id}")
     def get_entanglements(self) -> List[Tuple[str, float]]:
         """Get all entangled entities with strength"""
         return list(zip(self.entangled_entities, self.entanglement_strength))
+    def update_entanglement_strength(self, other_entity_id: str, new_strength: float):
         """Update entanglement strength with another entity"""
         if other_entity_id in self.entangled_entities:
             idx = self.entangled_entities.index(other_entity_id)
                 {"trait_type": "Realm", "value": self.stat7.realm.value},
                 {"trait_type": "Lineage", "value": self.stat7.lineage},
                 {"trait_type": "Horizon", "value": self.stat7.horizon.value},
+                {"trait_type": "Luminosity", "value": int(self.stat7.luminosity)},
                 {"trait_type": "Polarity", "value": self.stat7.polarity.value},
                 {"trait_type": "Dimensionality", "value": self.stat7.dimensionality},
                 {"trait_type": "STAT7 Address", "value": self.stat7.address},
             "properties": card_data.get("properties", {}),
         }
+    def record_mint(self, contract_address: str, token_id: int, ipfs_hash: str):
         """Record successful NFT minting"""
         self.nft_minted = True
         self.nft_contract = contract_address
         self._record_event(
             "nft_minted",
             f"Minted as ERC-721 token #{token_id}",
+            {"contract": contract_address, "token_id": token_id, "ipfs_hash": ipfs_hash},
         )
     # ========================================================================
                 "type": "entity_profile",
                 **card_data,
                 "owner": self.owner_id,
+                "lifecycle_events": [e.to_dict() for e in self.lifecycle_events],
                 "entanglements": self.get_entanglements(),
                 "luca_trace": self.get_luca_trace(),
             }
                 "stat7_dimensions": self.stat7.to_dict(),
                 "realm_details": self._get_realm_details(),
                 "entanglement_network": self.get_entanglements(),
+                "event_chronology": [e.to_dict() for e in self.lifecycle_events],
             }
     def _get_realm_details(self) -> Dict[str, Any]:

warbler_cda/stat7_experiments.py CHANGED Viewed

@@ -63,8 +63,7 @@ def normalize_float(value: float, decimal_places: int = 8) -> str:
     # Use Decimal for precise rounding
     d = Decimal(str(value))
-    quantized = d.quantize(Decimal(10) ** -decimal_places,
-                           rounding=ROUND_HALF_EVEN)
     # Convert to string and strip trailing zeros (but keep at least one
     # decimal)
@@ -138,8 +137,7 @@ def canonical_serialize(data: Dict[str, Any]) -> str:
     sorted_data = sort_json_keys(data)
     # Serialize with no whitespace, ensure_ascii=False to preserve Unicode
-    canonical = json.dumps(sorted_data, separators=(
-        ",", ":"), ensure_ascii=True, sort_keys=False)
     return canonical
@@ -238,8 +236,7 @@ class BitChain:
     def get_stat7_uri(self) -> str:
         """Generate STAT7 URI address format."""
         coords = self.coordinates
-        adjacency_hash = compute_address_hash(
-            {"adjacency": sorted(coords.adjacency)})[:8]
         uri = f"stat7://{coords.realm}/{coords.lineage}/{adjacency_hash}/{coords.horizon}"
         uri += f"?r={normalize_float(coords.resonance)}"
@@ -255,8 +252,7 @@ class BitChain:
 REALMS = ["data", "narrative", "system", "faculty", "event", "pattern", "void"]
 HORIZONS = ["genesis", "emergence", "peak", "decay", "crystallization"]
-ENTITY_TYPES = ["concept", "artifact", "agent",
-                "lineage", "adjacency", "horizon", "fragment"]
 def generate_random_bitchain(seed: Optional[int] = None) -> BitChain:
@@ -342,11 +338,8 @@ class EXP01_AddressUniqueness:
         for iteration in range(self.iterations):
             # Generate random bit-chains
             bitchains = [
-                generate_random_bitchain(
-                    seed=iteration *
-                    1000 +
-                    i) for i in range(
-                    self.sample_size)]
             # Compute addresses
             addresses = set()
@@ -387,15 +380,14 @@ class EXP01_AddressUniqueness:
             if collision_pairs:
                 for addr, ids in collision_pairs.items():
-                    print(
-                        f"  ⚠️  Collision on {addr[:16]}... : {len(ids)} entries")
         print()
         print(
             f"OVERALL RESULT: {
-                '✅ ALL PASS' if all_success else '❌ SOME FAILED'}")
-        print(
-            f"Success rate: {sum(1 for r in self.results if r.success)}/{self.iterations}")
         return self.results, all_success
@@ -474,8 +466,7 @@ class EXP02_RetrievalEfficiency:
             print(f"Testing scale: {scale:,} bit-chains")
             # Generate bit-chains
-            bitchains = [generate_random_bitchain(
-                seed=i) for i in range(scale)]
             # Index by address for O(1) retrieval simulation
             address_to_bc = {bc.compute_address(): bc for bc in bitchains}
@@ -532,7 +523,8 @@ class EXP02_RetrievalEfficiency:
         print(
             f"OVERALL RESULT: {
-                '✅ ALL PASS' if all_success else '❌ SOME FAILED'}")
         return self.results, all_success
@@ -606,8 +598,7 @@ class EXP03_DimensionNecessity:
         # Baseline: all 7 dimensions
         print("Baseline: All 7 dimensions")
-        bitchains = [generate_random_bitchain(
-            seed=i) for i in range(self.sample_size)]
         addresses = set()
         collisions = 0
@@ -631,7 +622,8 @@ class EXP03_DimensionNecessity:
         status = "✅ PASS" if result.acceptable else "❌ FAIL"
         print(
             f"  {status} | Collisions: {collisions} | Rate: {
-                baseline_collision_rate * 100:.4f}%")
         print()
         # Ablation: remove each dimension
@@ -661,8 +653,7 @@ class EXP03_DimensionNecessity:
             acceptable = collision_rate < 0.001  # Should be unacceptable without each dim
             result = EXP03_Result(
-                dimensions_used=[
-                    d for d in self.STAT7_DIMENSIONS if d != removed_dim],
                 sample_size=self.sample_size,
                 collisions=collisions,
                 collision_rate=collision_rate,
@@ -676,7 +667,8 @@ class EXP03_DimensionNecessity:
             status = "✅ NECESSARY" if necessity else "⚠️  OPTIONAL"
             print(
                 f"  {status} | Collisions: {collisions} | Rate: {
-                    collision_rate * 100:.4f}%")
         print()
         print(
@@ -720,8 +712,7 @@ def run_all_experiments(
     results = {}
     # EXP-01
-    exp01 = EXP01_AddressUniqueness(
-        sample_size=exp01_samples, iterations=exp01_iterations)
     _, exp01_success = exp01.run()
     results["EXP-01"] = {
         "success": exp01_success,
@@ -760,7 +751,8 @@ def run_all_experiments(
     print(
         f"\nOverall Phase 1 Status: {
             '✅ READY FOR PHASE 2' if all(
-                r['success'] for r in results.values()) else '❌ NEEDS WORK'}")
     return results

     # Use Decimal for precise rounding
     d = Decimal(str(value))
+    quantized = d.quantize(Decimal(10) ** -decimal_places, rounding=ROUND_HALF_EVEN)
     # Convert to string and strip trailing zeros (but keep at least one
     # decimal)
     sorted_data = sort_json_keys(data)
     # Serialize with no whitespace, ensure_ascii=False to preserve Unicode
+    canonical = json.dumps(sorted_data, separators=(",", ":"), ensure_ascii=True, sort_keys=False)
     return canonical
     def get_stat7_uri(self) -> str:
         """Generate STAT7 URI address format."""
         coords = self.coordinates
+        adjacency_hash = compute_address_hash({"adjacency": sorted(coords.adjacency)})[:8]
         uri = f"stat7://{coords.realm}/{coords.lineage}/{adjacency_hash}/{coords.horizon}"
         uri += f"?r={normalize_float(coords.resonance)}"
 REALMS = ["data", "narrative", "system", "faculty", "event", "pattern", "void"]
 HORIZONS = ["genesis", "emergence", "peak", "decay", "crystallization"]
+ENTITY_TYPES = ["concept", "artifact", "agent", "lineage", "adjacency", "horizon", "fragment"]
 def generate_random_bitchain(seed: Optional[int] = None) -> BitChain:
         for iteration in range(self.iterations):
             # Generate random bit-chains
             bitchains = [
+                generate_random_bitchain(seed=iteration * 1000 + i) for i in range(self.sample_size)
+            ]
             # Compute addresses
             addresses = set()
             if collision_pairs:
                 for addr, ids in collision_pairs.items():
+                    print(f"  ⚠️  Collision on {addr[:16]}... : {len(ids)} entries")
         print()
         print(
             f"OVERALL RESULT: {
+                '✅ ALL PASS' if all_success else '❌ SOME FAILED'}"
+        )
+        print(f"Success rate: {sum(1 for r in self.results if r.success)}/{self.iterations}")
         return self.results, all_success
             print(f"Testing scale: {scale:,} bit-chains")
             # Generate bit-chains
+            bitchains = [generate_random_bitchain(seed=i) for i in range(scale)]
             # Index by address for O(1) retrieval simulation
             address_to_bc = {bc.compute_address(): bc for bc in bitchains}
         print(
             f"OVERALL RESULT: {
+                '✅ ALL PASS' if all_success else '❌ SOME FAILED'}"
+        )
         return self.results, all_success
         # Baseline: all 7 dimensions
         print("Baseline: All 7 dimensions")
+        bitchains = [generate_random_bitchain(seed=i) for i in range(self.sample_size)]
         addresses = set()
         collisions = 0
         status = "✅ PASS" if result.acceptable else "❌ FAIL"
         print(
             f"  {status} | Collisions: {collisions} | Rate: {
+                baseline_collision_rate * 100:.4f}%"
+        )
         print()
         # Ablation: remove each dimension
             acceptable = collision_rate < 0.001  # Should be unacceptable without each dim
             result = EXP03_Result(
+                dimensions_used=[d for d in self.STAT7_DIMENSIONS if d != removed_dim],
                 sample_size=self.sample_size,
                 collisions=collisions,
                 collision_rate=collision_rate,
             status = "✅ NECESSARY" if necessity else "⚠️  OPTIONAL"
             print(
                 f"  {status} | Collisions: {collisions} | Rate: {
+                    collision_rate * 100:.4f}%"
+            )
         print()
         print(
     results = {}
     # EXP-01
+    exp01 = EXP01_AddressUniqueness(sample_size=exp01_samples, iterations=exp01_iterations)
     _, exp01_success = exp01.run()
     results["EXP-01"] = {
         "success": exp01_success,
     print(
         f"\nOverall Phase 1 Status: {
             '✅ READY FOR PHASE 2' if all(
+                r['success'] for r in results.values()) else '❌ NEEDS WORK'}"
+    )
     return results

warbler_cda/stat7_rag_bridge.py CHANGED Viewed

@@ -55,11 +55,17 @@ class STAT7Address:
     def __post_init__(self):
         """Validate STAT7 constraints."""
-        assert 0.0 <= self.adjacency <= 1.0, f"adjacency must be [0,1], got {
             self.adjacency}"
-        assert 0.0 <= self.luminosity <= 1.0, f"luminosity must be [0,1], got {
             self.luminosity}"
-        assert 0.0 <= self.polarity <= 1.0, f"polarity must be [0,1], got {
             self.polarity}"
         assert self.lineage >= 0, f"lineage must be >= 0, got {self.lineage}"
         assert (
@@ -91,8 +97,7 @@ class RAGDocument:
     def __post_init__(self):
         """Validate document structure."""
-        assert len(
-            self.embedding) > 0, f"embedding must not be empty for {self.id}"
 # ============================================================================
@@ -116,9 +121,7 @@ def cosine_similarity(a: List[float], b: List[float]) -> float:
     return dot / denom
-def stat7_resonance(
-        query_stat7: STAT7Address,
-        doc_stat7: STAT7Address) -> float:
     """
     Compute STAT7 resonance between query and document addresses.
@@ -232,8 +235,7 @@ def retrieve(
     """
     scores = []
     for doc in documents:
-        score = hybrid_score(query_embedding, doc,
-                             query_stat7, weight_semantic, weight_stat7)
         scores.append((doc.id, score))
     # Sort by score descending, return top-k
@@ -389,14 +391,11 @@ def compare_retrieval_results(
     overlap = len(semantic_ids & hybrid_ids)
     overlap_pct = (overlap / k * 100) if k > 0 else 0.0
-    semantic_avg = sum(
-        score for _, score in semantic_results[:k]) / k if k > 0 else 0.0
-    hybrid_avg = sum(
-        score for _, score in hybrid_results[:k]) / k if k > 0 else 0.0
     # Measure ranking distance: how far did top-k items move?
-    semantic_rank = {doc_id: idx for idx,
-                     (doc_id, _) in enumerate(semantic_results[:k])}
     reranking_distances = []
     for idx, (doc_id, _) in enumerate(hybrid_results[:k]):
         if doc_id in semantic_rank:
@@ -404,8 +403,7 @@ def compare_retrieval_results(
             reranking_distances.append(distance)
     avg_reranking_distance = (
-        sum(reranking_distances) /
-        len(reranking_distances) if reranking_distances else 0.0
     )
     return {
@@ -434,8 +432,7 @@ class STAT7RAGBridge:
     dependency injection.
     """
-    def stat7_resonance(self, query_stat7: STAT7Address,
-                        doc_stat7: STAT7Address) -> float:
         """
         Compute STAT7 resonance between query and document addresses.
@@ -467,12 +464,7 @@ class STAT7RAGBridge:
         Returns: [0.0, 1.0] hybrid score
         """
-        return hybrid_score(
-            query_embedding,
-            doc,
-            query_stat7,
-            weight_semantic,
-            weight_stat7)
     def retrieve(
         self,
@@ -496,10 +488,4 @@ class STAT7RAGBridge:
         Returns: List of (doc_id, hybrid_score) tuples, sorted by score (descending)
         """
-        return retrieve(
-            documents,
-            query_embedding,
-            query_stat7,
-            k,
-            weight_semantic,
-            weight_stat7)

     def __post_init__(self):
         """Validate STAT7 constraints."""
+        assert (
+            0.0 <= self.adjacency <= 1.0
+        ), f"adjacency must be [0,1], got {
             self.adjacency}"
+        assert (
+            0.0 <= self.luminosity <= 1.0
+        ), f"luminosity must be [0,1], got {
             self.luminosity}"
+        assert (
+            0.0 <= self.polarity <= 1.0
+        ), f"polarity must be [0,1], got {
             self.polarity}"
         assert self.lineage >= 0, f"lineage must be >= 0, got {self.lineage}"
         assert (
     def __post_init__(self):
         """Validate document structure."""
+        assert len(self.embedding) > 0, f"embedding must not be empty for {self.id}"
 # ============================================================================
     return dot / denom
+def stat7_resonance(query_stat7: STAT7Address, doc_stat7: STAT7Address) -> float:
     """
     Compute STAT7 resonance between query and document addresses.
     """
     scores = []
     for doc in documents:
+        score = hybrid_score(query_embedding, doc, query_stat7, weight_semantic, weight_stat7)
         scores.append((doc.id, score))
     # Sort by score descending, return top-k
     overlap = len(semantic_ids & hybrid_ids)
     overlap_pct = (overlap / k * 100) if k > 0 else 0.0
+    semantic_avg = sum(score for _, score in semantic_results[:k]) / k if k > 0 else 0.0
+    hybrid_avg = sum(score for _, score in hybrid_results[:k]) / k if k > 0 else 0.0
     # Measure ranking distance: how far did top-k items move?
+    semantic_rank = {doc_id: idx for idx, (doc_id, _) in enumerate(semantic_results[:k])}
     reranking_distances = []
     for idx, (doc_id, _) in enumerate(hybrid_results[:k]):
         if doc_id in semantic_rank:
             reranking_distances.append(distance)
     avg_reranking_distance = (
+        sum(reranking_distances) / len(reranking_distances) if reranking_distances else 0.0
     )
     return {
     dependency injection.
     """
+    def stat7_resonance(self, query_stat7: STAT7Address, doc_stat7: STAT7Address) -> float:
         """
         Compute STAT7 resonance between query and document addresses.
         Returns: [0.0, 1.0] hybrid score
         """
+        return hybrid_score(query_embedding, doc, query_stat7, weight_semantic, weight_stat7)
     def retrieve(
         self,
         Returns: List of (doc_id, hybrid_score) tuples, sorted by score (descending)
         """
+        return retrieve(documents, query_embedding, query_stat7, k, weight_semantic, weight_stat7)

warbler_cda/utils/load_warbler_packs.py CHANGED Viewed

@@ -52,26 +52,19 @@ class WarblerPackLoader:
             return []
         # Look for JSON, YAML, markdown, and JSONL files
-        for pattern in [
-            "**/*.json",
-            "**/*.yaml",
-            "**/*.yml",
-            "**/*.md",
-                "**/*.jsonl"]:
             for file_path in pack_path.glob(pattern):
                 try:
                     doc = self._parse_document(file_path, pack_name)
                     if doc:
                         documents.append(doc)
-                        logger.info(
-                            f"Discovered: {file_path.relative_to(PACKS_DIR)}")
                 except Exception as e:
                     logger.error(f"Error parsing {file_path}: {e}")
         return documents
-    def _parse_document(self, file_path: Path,
-                        pack_name: str) -> Dict[str, Any]:
         """Parse a document file"""
         try:
             if file_path.suffix in [".json"]:
@@ -86,8 +79,7 @@ class WarblerPackLoader:
                 # We'll read the first few lines and combine them
                 with open(file_path, "r", encoding="utf-8") as f:
                     lines = f.readlines()[:5]  # First 5 lines
-                    content = "\n".join(line.strip()
-                                        for line in lines if line.strip())
             elif file_path.suffix in [".yaml", ".yml"]:
                 import yaml
@@ -141,8 +133,7 @@ class WarblerPackLoader:
                 logger.info(f"[OK] Loaded: {doc['content_id']}")
                 return True
             else:
-                logger.warning(
-                    f"API returned {response.status_code}: {response.text[:200]}")
                 return False
         except requests.exceptions.ConnectionError:
             logger.error("Cannot connect to API. Is the service running?")
@@ -174,7 +165,8 @@ class WarblerPackLoader:
         click.secho(
             f"[OK] Load Complete: {
                 self.loaded_count} docs ingested",
-            fg="green")
         if self.error_count > 0:
             click.secho(f"[ERROR] Errors: {self.error_count}", fg="yellow")
         click.echo("=" * 60 + "\n")
@@ -189,9 +181,7 @@ def cli():
 @cli.command()
-@click.option("--api-url",
-              default="http://localhost:8000",
-              help="API service URL")
 def load(api_url):
     """Load all Warbler packs into the API"""
     loader = WarblerPackLoader(api_url)
@@ -202,8 +192,7 @@ def load(api_url):
         if response.status_code == 200:
             click.secho("[OK] API service is running", fg="green")
         else:
-            click.secho(
-                "[ERROR] API service not responding correctly", fg="red")
             return
     except Exception as e:
         click.secho(f"[ERROR] Cannot reach API at {api_url}: {e}", fg="red")
@@ -225,9 +214,7 @@ def load(api_url):
 @cli.command()
-@click.option("--api-url",
-              default="http://localhost:8000",
-              help="API service URL")
 def discover(api_url):
     """Discover documents in Warbler packs (no loading)"""
     loader = WarblerPackLoader(api_url)
@@ -251,7 +238,8 @@ def discover(api_url):
                     f"    Realm: {
                         doc['metadata'].get(
                             'realm_type',
-                            'unknown')}")
     click.echo(f"\n[STATS] Total discovered: {total} documents\n")

             return []
         # Look for JSON, YAML, markdown, and JSONL files
+        for pattern in ["**/*.json", "**/*.yaml", "**/*.yml", "**/*.md", "**/*.jsonl"]:
             for file_path in pack_path.glob(pattern):
                 try:
                     doc = self._parse_document(file_path, pack_name)
                     if doc:
                         documents.append(doc)
+                        logger.info(f"Discovered: {file_path.relative_to(PACKS_DIR)}")
                 except Exception as e:
                     logger.error(f"Error parsing {file_path}: {e}")
         return documents
+    def _parse_document(self, file_path: Path, pack_name: str) -> Dict[str, Any]:
         """Parse a document file"""
         try:
             if file_path.suffix in [".json"]:
                 # We'll read the first few lines and combine them
                 with open(file_path, "r", encoding="utf-8") as f:
                     lines = f.readlines()[:5]  # First 5 lines
+                    content = "\n".join(line.strip() for line in lines if line.strip())
             elif file_path.suffix in [".yaml", ".yml"]:
                 import yaml
                 logger.info(f"[OK] Loaded: {doc['content_id']}")
                 return True
             else:
+                logger.warning(f"API returned {response.status_code}: {response.text[:200]}")
                 return False
         except requests.exceptions.ConnectionError:
             logger.error("Cannot connect to API. Is the service running?")
         click.secho(
             f"[OK] Load Complete: {
                 self.loaded_count} docs ingested",
+            fg="green",
+        )
         if self.error_count > 0:
             click.secho(f"[ERROR] Errors: {self.error_count}", fg="yellow")
         click.echo("=" * 60 + "\n")
 @cli.command()
+@click.option("--api-url", default="http://localhost:8000", help="API service URL")
 def load(api_url):
     """Load all Warbler packs into the API"""
     loader = WarblerPackLoader(api_url)
         if response.status_code == 200:
             click.secho("[OK] API service is running", fg="green")
         else:
+            click.secho("[ERROR] API service not responding correctly", fg="red")
             return
     except Exception as e:
         click.secho(f"[ERROR] Cannot reach API at {api_url}: {e}", fg="red")
 @cli.command()
+@click.option("--api-url", default="http://localhost:8000", help="API service URL")
 def discover(api_url):
     """Discover documents in Warbler packs (no loading)"""
     loader = WarblerPackLoader(api_url)
                     f"    Realm: {
                         doc['metadata'].get(
                             'realm_type',
+                            'unknown')}"
+                )
     click.echo(f"\n[STATS] Total discovered: {total} documents\n")