Lin / backend /services /content_service.py
Zelyanoth's picture
fff
25f22bf
raw
history blame
5.88 kB
import re
import json
import unicodedata
from flask import current_app
from gradio_client import Client
import pandas as pd
class ContentService:
"""Service for AI content generation using Hugging Face models."""
def __init__(self, hugging_key=None):
# Use provided key or fall back to app config
self.hugging_key = hugging_key or current_app.config.get('HUGGING_KEY')
# Initialize the Gradio client for content generation
self.client = Client("Zelyanoth/Linkedin_poster_dev", hf_token=self.hugging_key)
def validate_unicode_content(self, content):
"""Validate Unicode content while preserving original formatting and spaces."""
if not content or not isinstance(content, str):
return content
try:
# Test if content can be encoded as UTF-8
content.encode('utf-8')
return content # Return original content if it's valid UTF-8
except UnicodeEncodeError:
try:
# If encoding fails, try to preserve as much as possible
return content.encode('utf-8', errors='replace').decode('utf-8')
except:
# Ultimate fallback
return str(content)
def preserve_formatting(self, content):
"""Preserve spaces, line breaks, and paragraph formatting."""
if not content:
return content
# Preserve all whitespace characters including spaces, tabs, and newlines
# This ensures that paragraph breaks and indentation are maintained
try:
# Test encoding first
content.encode('utf-8')
return content
except UnicodeEncodeError:
# Fallback with error replacement but preserve whitespace
return content.encode('utf-8', errors='replace').decode('utf-8')
def sanitize_content_for_api(self, content):
"""Sanitize content for API calls while preserving original text, spaces, and formatting."""
if not content:
return content
# First preserve formatting and spaces
preserved = self.preserve_formatting(content)
# Only validate Unicode, don't remove spaces or formatting
validated = self.validate_unicode_content(preserved)
# Only remove null bytes that might cause issues in API calls
if '\x00' in validated:
validated = validated.replace('\x00', '')
# Ensure line breaks and spaces are preserved
validated = validated.replace('\r\n', '\n').replace('\r', '\n')
return validated
def generate_post_content(self, user_id: str) -> str:
"""
Generate post content using AI.
Args:
user_id (str): User ID for personalization
Returns:
str: Generated post content
"""
try:
# Call the Hugging Face model to generate content
result = self.client.predict(
code=user_id,
api_name="/poster_linkedin"
)
# Parse the result (assuming it returns a list with content as first element)
# First try to parse as JSON
try:
parsed_result = json.loads(result)
except json.JSONDecodeError:
# If JSON parsing fails, check if it's already a Python list/object
try:
# Try to evaluate as Python literal (safe for lists/dicts)
import ast
parsed_result = ast.literal_eval(result)
except (ValueError, SyntaxError):
# If that fails, treat the result as a plain string
parsed_result = [result]
# Extract the first element if it's a list
if isinstance(parsed_result, list):
generated_content = parsed_result[0] if parsed_result and parsed_result[0] is not None else "Generated content will appear here..."
else:
generated_content = str(parsed_result) if parsed_result is not None else "Generated content will appear here..."
# Validate, sanitize, and preserve formatting of the generated content
sanitized_content = self.sanitize_content_for_api(generated_content)
# Ensure paragraph breaks and formatting are preserved
final_content = self.preserve_formatting(sanitized_content)
return final_content
except Exception as e:
error_message = str(e)
current_app.logger.error(f"Content generation failed: {error_message}")
raise Exception(f"Content generation failed: {error_message}")
def add_rss_source(self, rss_link: str, user_id: str) -> str:
"""
Add an RSS source for content generation.
Args:
rss_link (str): RSS feed URL
user_id (str): User ID
Returns:
str: Result message
"""
try:
# Call the Hugging Face model to add RSS source
rss_input = f"{rss_link}__thi_irrh'èçs_my_id__! {user_id}"
sanitized_rss_input = self.sanitize_content_for_api(rss_input)
result = self.client.predict(
rss_link=sanitized_rss_input,
api_name="/ajouter_rss"
)
# Sanitize and preserve formatting of the result
sanitized_result = self.sanitize_content_for_api(result)
return self.preserve_formatting(sanitized_result)
except Exception as e:
raise Exception(f"Failed to add RSS source: {str(e)}")