""" Response cleaner module. Removes markdown formatting and special characters from AI responses. """ import re def clean_response(text: str) -> str: """ Clean AI response from markdown formatting and special characters. Args: text: Raw AI response with possible markdown Returns: Clean text suitable for TTS """ if not text: return "" # Remove citation references like [1], [2], [citation], etc. text = re.sub(r'\[\d+\]', '', text) text = re.sub(r'\[citation\s*needed\]', '', text, flags=re.IGNORECASE) text = re.sub(r'\[source\]', '', text, flags=re.IGNORECASE) # Remove markdown bold **text** and __text__ text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) text = re.sub(r'__(.+?)__', r'\1', text) # Remove markdown italic *text* and _text_ text = re.sub(r'\*(.+?)\*', r'\1', text) text = re.sub(r'(? text text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) # Remove markdown images ![alt](url) text = re.sub(r'!\[([^\]]*)\]\([^)]+\)', '', text) # Remove inline code `code` text = re.sub(r'`([^`]+)`', r'\1', text) # Remove code blocks ```code``` text = re.sub(r'```[\s\S]*?```', '', text) # Remove markdown list markers (-, *, +, numbered) text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE) text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE) # Remove blockquotes text = re.sub(r'^\s*>\s*', '', text, flags=re.MULTILINE) # Remove horizontal rules text = re.sub(r'^[-*_]{3,}\s*$', '', text, flags=re.MULTILINE) # Remove HTML tags if any text = re.sub(r'<[^>]+>', '', text) # Remove extra whitespace text = re.sub(r'\n{3,}', '\n\n', text) text = re.sub(r' +', ' ', text) # Clean up and return text = text.strip() return text