""" Format References Tool Smart reference formatting for any journal's requirements """ import json import re import requests import os import asyncio from tools.base import Tool from core.utils.serper_rate_limited import rate_limited_serper_search class FormatReferencesTool(Tool): """ Smart reference formatting tool that can format references according to any journal's specific requirements. Asks for the target journal, looks up formatting instructions, and applies them to user-provided references. """ def __init__(self): super().__init__() def execute(self, references_text, target_journal=None, max_length=2000): """ Format references according to target journal requirements. Args: references_text (str): Raw references to format target_journal (str, optional): Target journal name max_length (int): Maximum response length Returns: dict: Formatted references and formatting guidelines """ try: # Step 1: If no journal specified, ask for it if not target_journal or not target_journal.strip(): return { "status": "journal_required", "message": "Please specify the target journal for reference formatting. For example: 'Clinical Infectious Diseases', 'The Lancet', 'Nature Medicine', etc.", "formatted_references": "", "formatting_guidelines": "" } # Step 2: Clean and validate input references if not references_text or not references_text.strip(): return { "status": "references_required", "message": "Please provide the references you'd like to format.", "formatted_references": "", "formatting_guidelines": "" } # Step 3: Search for journal-specific formatting guidelines formatting_guidelines = self._get_journal_formatting_guidelines(target_journal) # Step 4: Parse input references parsed_references = self._parse_references(references_text) # Step 5: Apply journal-specific formatting formatted_references = self._apply_journal_formatting( parsed_references, target_journal, formatting_guidelines ) # Step 6: Prepare response response = { "status": "success", "journal": target_journal, "formatted_references": formatted_references, "formatting_guidelines": formatting_guidelines, "reference_count": len(parsed_references), "message": f"Successfully formatted {len(parsed_references)} references for {target_journal}" } # Trim response if too long response_str = json.dumps(response) if len(response_str) > max_length: # Truncate formatted references if needed available_space = max_length - len(json.dumps({**response, "formatted_references": ""})) if available_space > 100: response["formatted_references"] = response["formatted_references"][:available_space-50] + "...[truncated]" return response except Exception as e: return { "status": "error", "message": f"Error formatting references: {str(e)}", "formatted_references": "", "formatting_guidelines": "" } def _get_journal_formatting_guidelines(self, journal_name): """Search for journal-specific reference formatting guidelines""" try: # Search for journal reference formatting guidelines search_queries = [ f"{journal_name} reference format guidelines", f"{journal_name} citation style requirements", f"{journal_name} author guidelines references", f"how to format references for {journal_name}" ] guidelines = "" for query in search_queries: try: print(f"Searching for: {query}") # Debug # Use rate-limited Serper API with caching api_key = os.getenv("SERPER_API_KEY") if api_key: # Create event loop if not exists (for sync context) try: loop = asyncio.get_event_loop() except RuntimeError: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) # Use rate-limited search response_data = loop.run_until_complete( rate_limited_serper_search(query, api_key, num_results=3) ) print(f"Search response received") # Debug if response_data and "organic" in response_data: results = response_data.get("organic", []) print(f"Found {len(results)} results") # Debug for result in results: snippet = result.get("snippet", "") title = result.get("title", "") content = f"{title} {snippet}" if self._contains_formatting_info(content, journal_name): guidelines += content + "\n\n" print(f"Found relevant guidelines") # Debug break if guidelines: break except Exception as e: print(f"Search error: {e}") # Debug continue if not guidelines: guidelines = self._get_fallback_guidelines(journal_name) return guidelines[:1500] # Limit length except Exception as e: return f"Error retrieving guidelines: {str(e)}" def _contains_formatting_info(self, content, journal_name): """Check if content contains relevant formatting information""" keywords = [ "reference", "citation", "format", "style", "bibliography", "author", "title", "journal", "volume", "page", "doi", "vancouver", "ama", "chicago", "harvard", "numbered" ] content_lower = content.lower() journal_lower = journal_name.lower() # Must contain journal name and at least 3 formatting keywords return (journal_lower in content_lower and sum(1 for keyword in keywords if keyword in content_lower) >= 3) def _parse_references(self, references_text): """Parse input references into structured format""" # Split references by common delimiters references = [] # Split by numbered patterns (1., 2., etc.) or line breaks ref_parts = re.split(r'\n+|\d+\.\s+', references_text.strip()) ref_parts = [part.strip() for part in ref_parts if part.strip()] for i, ref_text in enumerate(ref_parts, 1): parsed_ref = self._extract_reference_components(ref_text) parsed_ref["original"] = ref_text parsed_ref["number"] = str(i) references.append(parsed_ref) return references def _extract_reference_components(self, ref_text): """Extract components from a single reference""" components = { "authors": "", "title": "", "journal": "", "year": "", "volume": "", "issue": "", "pages": "", "doi": "", "pmid": "", "url": "" } # Extract DOI doi_match = re.search(r'doi:\s*([^\s,;]+)', ref_text, re.IGNORECASE) if doi_match: components["doi"] = doi_match.group(1) # Extract PMID pmid_match = re.search(r'pmid:\s*(\d+)', ref_text, re.IGNORECASE) if pmid_match: components["pmid"] = pmid_match.group(1) # Extract year (4 digits) year_match = re.search(r'\b(19|20)\d{2}\b', ref_text) if year_match: components["year"] = year_match.group(0) # Extract volume and pages pattern like "2023;45(3):123-130" vol_pages_match = re.search(r'(\d+)\((\d+)\):(\d+[-–]\d+)', ref_text) if vol_pages_match: components["volume"] = vol_pages_match.group(1) components["issue"] = vol_pages_match.group(2) components["pages"] = vol_pages_match.group(3) # Extract URL url_match = re.search(r'https?://[^\s,;]+', ref_text) if url_match: components["url"] = url_match.group(0) # Simple author extraction (everything before first period if present) if '.' in ref_text: potential_authors = ref_text.split('.')[0] if len(potential_authors) < 100: # Reasonable author length components["authors"] = potential_authors.strip() return components def _apply_journal_formatting(self, references, journal_name, guidelines): """Apply journal-specific formatting to references""" formatted_refs = [] # Determine formatting style based on journal and guidelines style = self._determine_formatting_style(journal_name, guidelines) for ref in references: if style == "vancouver": formatted_ref = self._format_vancouver_style(ref) elif style == "ama": formatted_ref = self._format_ama_style(ref) elif style == "chicago": formatted_ref = self._format_chicago_style(ref) else: formatted_ref = self._format_generic_style(ref) formatted_refs.append(f"{ref['number']}. {formatted_ref}") return "\n\n".join(formatted_refs) def _determine_formatting_style(self, journal_name, guidelines): """Determine the appropriate formatting style for the journal""" journal_lower = journal_name.lower() guidelines_lower = guidelines.lower() # Medical journals often use Vancouver or AMA medical_journals = [ "clinical infectious diseases", "journal of infectious diseases", "the lancet", "new england journal of medicine", "jama", "nature medicine", "bmj", "plos" ] if any(j in journal_lower for j in medical_journals): if "vancouver" in guidelines_lower: return "vancouver" elif "ama" in guidelines_lower: return "ama" else: return "vancouver" # Default for medical journals # Check guidelines for specific style mentions if "vancouver" in guidelines_lower: return "vancouver" elif "ama" in guidelines_lower: return "ama" elif "chicago" in guidelines_lower: return "chicago" return "generic" def _format_vancouver_style(self, ref): """Format reference in Vancouver style""" parts = [] if ref["authors"]: # Format authors (last name, first initial) authors = self._format_authors_vancouver(ref["authors"]) parts.append(authors) if ref["title"]: title = ref["title"].strip().rstrip('.') parts.append(title + ".") if ref["journal"]: journal_part = ref["journal"] if ref["year"]: journal_part += f" {ref['year']}" if ref["volume"]: journal_part += f";{ref['volume']}" if ref["issue"]: journal_part += f"({ref['issue']})" if ref["pages"]: journal_part += f":{ref['pages']}" parts.append(journal_part + ".") if ref["doi"]: parts.append(f"doi:{ref['doi']}") return " ".join(parts) def _format_ama_style(self, ref): """Format reference in AMA style""" parts = [] if ref["authors"]: authors = self._format_authors_ama(ref["authors"]) parts.append(authors) if ref["title"]: title = ref["title"].strip().rstrip('.') parts.append(title + ".") if ref["journal"]: journal_part = f"{ref['journal']}." if ref["year"] and ref["volume"]: journal_part += f" {ref['year']};{ref['volume']}" if ref["issue"]: journal_part += f"({ref['issue']})" if ref["pages"]: journal_part += f":{ref['pages']}" parts.append(journal_part) if ref["doi"]: parts.append(f"doi:{ref['doi']}") return " ".join(parts) def _format_chicago_style(self, ref): """Format reference in Chicago style""" parts = [] if ref["authors"]: authors = self._format_authors_chicago(ref["authors"]) parts.append(authors) if ref["title"]: title = f'"{ref["title"].strip().rstrip(".")}"' parts.append(title) if ref["journal"]: journal_part = ref["journal"] if ref["volume"]: journal_part += f" {ref['volume']}" if ref["issue"]: journal_part += f", no. {ref['issue']}" if ref["year"]: journal_part += f" ({ref['year']})" if ref["pages"]: journal_part += f": {ref['pages']}" parts.append(journal_part + ".") return " ".join(parts) def _format_generic_style(self, ref): """Format reference in generic academic style""" return self._format_vancouver_style(ref) # Default to Vancouver def _format_authors_vancouver(self, authors_str): """Format authors in Vancouver style""" # Simple formatting - could be enhanced if "," in authors_str: return authors_str.strip().rstrip('.') + "." return authors_str.strip() + "." def _format_authors_ama(self, authors_str): """Format authors in AMA style""" return self._format_authors_vancouver(authors_str) def _format_authors_chicago(self, authors_str): """Format authors in Chicago style""" return authors_str.strip().rstrip('.') + "." def _get_fallback_guidelines(self, journal_name): """Provide fallback formatting guidelines when internet search fails""" journal_lower = journal_name.lower() # Known guidelines for major medical journals known_guidelines = { "clinical infectious diseases": "Uses numbered Vancouver style: Author(s). Title. Journal Name. Year;Volume(Issue):Pages. doi:xxx", "the lancet": "Uses numbered references in Vancouver style with specific formatting requirements", "nature medicine": "Uses numbered references with Nature style formatting", "jama": "Uses AMA style numbered references with author-year format", "new england journal of medicine": "Uses numbered Vancouver style references", "plos one": "Uses numbered references with specific PLOS formatting requirements", "infection control": "Uses Vancouver style for infection control journals", "antimicrobial": "Uses medical journal Vancouver style formatting" } # Check for exact or partial matches for known_journal, guideline in known_guidelines.items(): if known_journal in journal_lower: return f"Formatting guidelines for {journal_name}: {guideline}" # Generic medical journal guidelines return f"Standard medical journal formatting for {journal_name}: Uses numbered Vancouver style references with Author(s). Title. Journal Name. Year;Volume(Issue):Pages. doi:xxx format." # Tool metadata TOOL_METADATA = { "name": "format_references", "description": "Smart reference formatting tool that formats citations according to any journal's specific requirements", "parameters": { "references_text": { "type": "string", "description": "The references to format (can be in any format)" }, "target_journal": { "type": "string", "description": "The target journal name (e.g., 'Clinical Infectious Diseases', 'Nature Medicine')" } }, "category": "research" }