Baskar2005's picture
Create app.py
a84d337 verified
import streamlit as st
import pysrt
import io
import re
# Bold specified language chunks in a given subtitle line
def bold_chunks_in_text(text, chunks):
for phrase in chunks:
# Use case-insensitive and exact matching
pattern = re.compile(re.escape(phrase), re.IGNORECASE)
text = pattern.sub(lambda m: f"<b>{m.group(0)}</b>", text)
return text
# Main processing function
def process_subtitles(srt_content, german_lines, bold_chunks):
subs = pysrt.from_string(srt_content.decode('utf-8'))
if len(subs) != len(german_lines):
st.error(f"Line mismatch: {len(subs)} subtitle entries vs {len(german_lines)} German lines.")
return None
for i, sub in enumerate(subs):
original = sub.text.strip()
german = german_lines[i].strip()
original_bold = bold_chunks_in_text(original, bold_chunks)
german_bold = bold_chunks_in_text(german, bold_chunks)
sub.text = f"{original_bold}\n{german_bold}"
output = io.StringIO()
subs.write_into(output)
return output.getvalue()
# ---------- Streamlit UI ----------
st.set_page_config(page_title="Subtitle Formatter", layout="centered")
st.title("🎬 Bilingual Subtitle Formatter for Language Learning")
st.markdown("Upload a `.srt` file, paste German translations, and highlight key phrases.")
uploaded_srt = st.file_uploader("πŸ“„ Upload Original Subtitle (.srt)", type=["srt"])
german_input = st.text_area("πŸ‡©πŸ‡ͺ German Translations (1 per subtitle line)", height=200)
chunks_input = st.text_input("πŸ” Highlight Phrases (comma-separated)", placeholder="comment Γ§a va, wie geht es dir")
if st.button("✨ Format Subtitles"):
if not uploaded_srt or not german_input:
st.warning("Please upload a subtitle file and enter German translations.")
else:
german_lines = [line.strip() for line in german_input.strip().splitlines()]
bold_chunks = [chunk.strip() for chunk in chunks_input.split(",") if chunk.strip()]
result_srt = process_subtitles(uploaded_srt.read(), german_lines, bold_chunks)
if result_srt:
st.success("βœ… Subtitle processing complete!")
st.download_button("πŸ“₯ Download Formatted Subtitle", result_srt, "formatted_bilingual.srt", "text/plain")