Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import streamlit as st
|
3 |
+
import pysrt
|
4 |
+
import io
|
5 |
+
import re
|
6 |
+
|
7 |
+
# Bold specified language chunks in a given subtitle line
|
8 |
+
def bold_chunks_in_text(text, chunks):
|
9 |
+
for phrase in chunks:
|
10 |
+
# Use case-insensitive and exact matching
|
11 |
+
pattern = re.compile(re.escape(phrase), re.IGNORECASE)
|
12 |
+
text = pattern.sub(lambda m: f"<b>{m.group(0)}</b>", text)
|
13 |
+
return text
|
14 |
+
|
15 |
+
# Main processing function
|
16 |
+
def process_subtitles(srt_content, german_lines, bold_chunks):
|
17 |
+
subs = pysrt.from_string(srt_content.decode('utf-8'))
|
18 |
+
|
19 |
+
if len(subs) != len(german_lines):
|
20 |
+
st.error(f"Line mismatch: {len(subs)} subtitle entries vs {len(german_lines)} German lines.")
|
21 |
+
return None
|
22 |
+
|
23 |
+
for i, sub in enumerate(subs):
|
24 |
+
original = sub.text.strip()
|
25 |
+
german = german_lines[i].strip()
|
26 |
+
|
27 |
+
original_bold = bold_chunks_in_text(original, bold_chunks)
|
28 |
+
german_bold = bold_chunks_in_text(german, bold_chunks)
|
29 |
+
|
30 |
+
sub.text = f"{original_bold}\n{german_bold}"
|
31 |
+
|
32 |
+
output = io.StringIO()
|
33 |
+
subs.write_into(output)
|
34 |
+
return output.getvalue()
|
35 |
+
|
36 |
+
# ---------- Streamlit UI ----------
|
37 |
+
st.set_page_config(page_title="Subtitle Formatter", layout="centered")
|
38 |
+
st.title("π¬ Bilingual Subtitle Formatter for Language Learning")
|
39 |
+
|
40 |
+
st.markdown("Upload a `.srt` file, paste German translations, and highlight key phrases.")
|
41 |
+
|
42 |
+
uploaded_srt = st.file_uploader("π Upload Original Subtitle (.srt)", type=["srt"])
|
43 |
+
german_input = st.text_area("π©πͺ German Translations (1 per subtitle line)", height=200)
|
44 |
+
chunks_input = st.text_input("π Highlight Phrases (comma-separated)", placeholder="comment Γ§a va, wie geht es dir")
|
45 |
+
|
46 |
+
if st.button("β¨ Format Subtitles"):
|
47 |
+
if not uploaded_srt or not german_input:
|
48 |
+
st.warning("Please upload a subtitle file and enter German translations.")
|
49 |
+
else:
|
50 |
+
german_lines = [line.strip() for line in german_input.strip().splitlines()]
|
51 |
+
bold_chunks = [chunk.strip() for chunk in chunks_input.split(",") if chunk.strip()]
|
52 |
+
|
53 |
+
result_srt = process_subtitles(uploaded_srt.read(), german_lines, bold_chunks)
|
54 |
+
|
55 |
+
if result_srt:
|
56 |
+
st.success("β
Subtitle processing complete!")
|
57 |
+
st.download_button("π₯ Download Formatted Subtitle", result_srt, "formatted_bilingual.srt", "text/plain")
|