import streamlit as st import pysrt import io import re # Bold specified language chunks in a given subtitle line def bold_chunks_in_text(text, chunks): for phrase in chunks: # Use case-insensitive and exact matching pattern = re.compile(re.escape(phrase), re.IGNORECASE) text = pattern.sub(lambda m: f"{m.group(0)}", text) return text # Main processing function def process_subtitles(srt_content, german_lines, bold_chunks): subs = pysrt.from_string(srt_content.decode('utf-8')) if len(subs) != len(german_lines): st.error(f"Line mismatch: {len(subs)} subtitle entries vs {len(german_lines)} German lines.") return None for i, sub in enumerate(subs): original = sub.text.strip() german = german_lines[i].strip() original_bold = bold_chunks_in_text(original, bold_chunks) german_bold = bold_chunks_in_text(german, bold_chunks) sub.text = f"{original_bold}\n{german_bold}" output = io.StringIO() subs.write_into(output) return output.getvalue() # ---------- Streamlit UI ---------- st.set_page_config(page_title="Subtitle Formatter", layout="centered") st.title("๐ŸŽฌ Bilingual Subtitle Formatter for Language Learning") st.markdown("Upload a `.srt` file, paste German translations, and highlight key phrases.") uploaded_srt = st.file_uploader("๐Ÿ“„ Upload Original Subtitle (.srt)", type=["srt"]) german_input = st.text_area("๐Ÿ‡ฉ๐Ÿ‡ช German Translations (1 per subtitle line)", height=200) chunks_input = st.text_input("๐Ÿ” Highlight Phrases (comma-separated)", placeholder="comment รงa va, wie geht es dir") if st.button("โœจ Format Subtitles"): if not uploaded_srt or not german_input: st.warning("Please upload a subtitle file and enter German translations.") else: german_lines = [line.strip() for line in german_input.strip().splitlines()] bold_chunks = [chunk.strip() for chunk in chunks_input.split(",") if chunk.strip()] result_srt = process_subtitles(uploaded_srt.read(), german_lines, bold_chunks) if result_srt: st.success("โœ… Subtitle processing complete!") st.download_button("๐Ÿ“ฅ Download Formatted Subtitle", result_srt, "formatted_bilingual.srt", "text/plain")