import re import json import unicodedata from flask import current_app from gradio_client import Client import pandas as pd class ContentService: """Service for AI content generation using Hugging Face models.""" def __init__(self, hugging_key=None): # Use provided key or fall back to app config self.hugging_key = hugging_key or current_app.config.get('HUGGING_KEY') # Initialize the Gradio client for content generation self.client = Client("Zelyanoth/Linkedin_poster_dev", hf_token=self.hugging_key) def validate_unicode_content(self, content): """Validate Unicode content while preserving original formatting and spaces.""" if not content or not isinstance(content, str): return content try: # Test if content can be encoded as UTF-8 content.encode('utf-8') return content # Return original content if it's valid UTF-8 except UnicodeEncodeError: try: # If encoding fails, try to preserve as much as possible return content.encode('utf-8', errors='replace').decode('utf-8') except: # Ultimate fallback return str(content) def preserve_formatting(self, content): """Preserve spaces, line breaks, and paragraph formatting.""" if not content: return content # Preserve all whitespace characters including spaces, tabs, and newlines # This ensures that paragraph breaks and indentation are maintained try: # Test encoding first content.encode('utf-8') return content except UnicodeEncodeError: # Fallback with error replacement but preserve whitespace return content.encode('utf-8', errors='replace').decode('utf-8') def sanitize_content_for_api(self, content): """Sanitize content for API calls while preserving original text, spaces, and formatting.""" if not content: return content # First preserve formatting and spaces preserved = self.preserve_formatting(content) # Only validate Unicode, don't remove spaces or formatting validated = self.validate_unicode_content(preserved) # Only remove null bytes that might cause issues in API calls if '\x00' in validated: validated = validated.replace('\x00', '') # Ensure line breaks and spaces are preserved validated = validated.replace('\r\n', '\n').replace('\r', '\n') return validated def generate_post_content(self, user_id: str) -> str: """ Generate post content using AI. Args: user_id (str): User ID for personalization Returns: str: Generated post content """ try: # Call the Hugging Face model to generate content result = self.client.predict( code=user_id, api_name="/poster_linkedin" ) # Parse the result (assuming it returns a list with content as first element) # First try to parse as JSON try: parsed_result = json.loads(result) except json.JSONDecodeError: # If JSON parsing fails, check if it's already a Python list/object try: # Try to evaluate as Python literal (safe for lists/dicts) import ast parsed_result = ast.literal_eval(result) except (ValueError, SyntaxError): # If that fails, treat the result as a plain string parsed_result = [result] # Extract the first element if it's a list if isinstance(parsed_result, list): generated_content = parsed_result[0] if parsed_result and parsed_result[0] is not None else "Generated content will appear here..." else: generated_content = str(parsed_result) if parsed_result is not None else "Generated content will appear here..." # Validate, sanitize, and preserve formatting of the generated content sanitized_content = self.sanitize_content_for_api(generated_content) # Ensure paragraph breaks and formatting are preserved final_content = self.preserve_formatting(sanitized_content) return final_content except Exception as e: error_message = str(e) current_app.logger.error(f"Content generation failed: {error_message}") raise Exception(f"Content generation failed: {error_message}") def add_rss_source(self, rss_link: str, user_id: str) -> str: """ Add an RSS source for content generation. Args: rss_link (str): RSS feed URL user_id (str): User ID Returns: str: Result message """ try: # Call the Hugging Face model to add RSS source rss_input = f"{rss_link}__thi_irrh'èçs_my_id__! {user_id}" sanitized_rss_input = self.sanitize_content_for_api(rss_input) result = self.client.predict( rss_link=sanitized_rss_input, api_name="/ajouter_rss" ) # Sanitize and preserve formatting of the result sanitized_result = self.sanitize_content_for_api(result) return self.preserve_formatting(sanitized_result) except Exception as e: raise Exception(f"Failed to add RSS source: {str(e)}")