gskdsrikrishna commited on
Commit
903d539
·
verified ·
1 Parent(s): 3116ca3

Update wikipedia.py

Browse files
Files changed (1) hide show
  1. wikipedia.py +119 -137
wikipedia.py CHANGED
@@ -1,137 +1,119 @@
1
- import streamlit as st
2
- import wikipediaapi
3
- import datetime
4
- from reportlab.lib.pagesizes import letter
5
- from reportlab.pdfgen import canvas
6
- import tempfile
7
- from gtts import gTTS
8
- import speech_recognition as sr
9
-
10
- # Wikipedia summary function with character limit and summary levels
11
- def get_wikipedia_summary(query, lang_code, char_limit, summary_level):
12
- user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)"
13
- wiki = wikipediaapi.Wikipedia(language=lang_code, extract_format=wikipediaapi.ExtractFormat.WIKI, user_agent=user_agent)
14
- page = wiki.page(query)
15
- if not page.exists():
16
- return "Page not found."
17
- if summary_level == "Brief":
18
- return page.summary[:char_limit]
19
- elif summary_level == "Detailed":
20
- return page.summary # Full summary
21
- elif summary_level == "Bullet Points":
22
- points = page.summary.split('. ')
23
- return '\n'.join(f"- {p.strip()}" for p in points if p)[:char_limit]
24
-
25
- # Save chat history as PDF with a user-defined filename
26
- def save_chat_history_as_pdf(chat_history, file_name):
27
- timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
28
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
29
- pdf = canvas.Canvas(tmp_file.name, pagesize=letter)
30
- pdf.setTitle(file_name)
31
- pdf.drawString(30, 750, f"{file_name} - Saved on {timestamp}")
32
- y_position = 720
33
- for query, response in chat_history:
34
- pdf.drawString(30, y_position, f"User: {query}")
35
- y_position -= 20
36
- pdf.drawString(30, y_position, f"Bot: {response}")
37
- y_position -= 40
38
- if y_position < 40:
39
- pdf.showPage()
40
- y_position = 750
41
- pdf.save()
42
- return tmp_file.name
43
-
44
- # Text-to-speech using gTTS
45
- def text_to_speech(text, filename, lang="en"):
46
- tts = gTTS(text=text, lang=lang)
47
- tts.save(filename)
48
- return filename
49
-
50
- # Voice search function
51
- def voice_search(lang_code):
52
- recognizer = sr.Recognizer()
53
- with sr.Microphone() as source:
54
- st.write("Listening...")
55
- audio = recognizer.listen(source)
56
- try:
57
- # Recognize the speech based on the specified language
58
- query = recognizer.recognize_google(audio, language=lang_code)
59
- st.success(f"You said: {query}")
60
- return query
61
- except sr.UnknownValueError:
62
- st.error("Sorry, I could not understand the audio.")
63
- return None
64
- except sr.RequestError as e:
65
- st.error(f"Could not request results from Google Speech Recognition service; {e}")
66
- return None
67
-
68
- # Initialize the Streamlit app
69
- def main():
70
- st.set_page_config(page_title="Wikipedia Summary & Text-to-Speech", layout="wide")
71
-
72
- # Sidebar options
73
- st.sidebar.title("Options")
74
- lang_map = {
75
- "English": "en",
76
- "Spanish": "es",
77
- "Chinese": "zh",
78
- "Hindi": "hi",
79
- "Telugu": "te"
80
- }
81
- selected_lang = st.sidebar.selectbox("Wikipedia Language", list(lang_map.keys()), key="language_selector")
82
- summary_levels = ["Brief", "Detailed", "Bullet Points"]
83
- summary_level = st.sidebar.selectbox("Summarization Level", summary_levels)
84
- char_limit = st.sidebar.slider("Character Limit", min_value=100, max_value=2000, value=500, step=100)
85
-
86
- # Chat history and favorites in session state
87
- if "chat_history" not in st.session_state:
88
- st.session_state.chat_history = []
89
- if "favorites" not in st.session_state:
90
- st.session_state.favorites = []
91
-
92
- st.title("Wikipedia Summary & Text-to-Speech")
93
-
94
- # Text input for manual search
95
- query = st.text_input("Enter a topic to search on Wikipedia:")
96
-
97
- # Button for voice search
98
- if st.button("Voice Search"):
99
- lang_code = lang_map[selected_lang] # Get the language code for the selected language
100
- voice_query = voice_search(lang_code) # Pass the language code to the voice search
101
- if voice_query:
102
- query = voice_query # Use the voice query if recognized
103
-
104
- # Display summary based on query and language selection
105
- if query:
106
- lang_code = lang_map[selected_lang]
107
- summary = get_wikipedia_summary(query, lang_code, char_limit, summary_level)
108
- st.markdown(f"### Summary for: {query}")
109
- st.write(summary)
110
- st.session_state.chat_history.append((query, summary))
111
-
112
- # Save to favorites
113
- if st.button("Add to Favorites"):
114
- st.session_state.favorites.append((query, summary))
115
- st.success("Added to favorites!")
116
-
117
- # Text-to-speech
118
- tts_filename = f"{query}_speech.mp3"
119
- if st.button("Play Text-to-Speech"):
120
- text_to_speech(summary, tts_filename, lang=lang_code)
121
- st.audio(tts_filename, format="audio/mp3")
122
-
123
- # Save chat history as PDF
124
- file_name = st.sidebar.text_input("File Name to Save Chat", value="chat_history")
125
- if st.sidebar.button("Save Chat as PDF"):
126
- pdf_path = save_chat_history_as_pdf(st.session_state.chat_history, file_name)
127
- with open(pdf_path, "rb") as pdf_file:
128
- st.sidebar.download_button("Download PDF", pdf_file, file_name=f"{file_name}.pdf", mime="application/pdf")
129
-
130
- # Display favorites
131
- st.sidebar.write("### Favorites")
132
- for i, (fav_query, fav_summary) in enumerate(st.session_state.favorites, 1):
133
- st.sidebar.write(f"**{i}. {fav_query}**")
134
- st.sidebar.write(fav_summary[:100] + "...")
135
-
136
- if __name__ == "__main__":
137
- main()
 
1
+ import streamlit as st
2
+ import wikipediaapi
3
+ import datetime
4
+ from reportlab.lib.pagesizes import letter
5
+ from reportlab.pdfgen import canvas
6
+ import tempfile
7
+ from gtts import gTTS
8
+ import speech_recognition as sr
9
+
10
+ def get_wikipedia_summary(query, lang_code, char_limit, summary_level):
11
+ user_agent = "Mozilla/5.0"
12
+ wiki = wikipediaapi.Wikipedia(language=lang_code, extract_format=wikipediaapi.ExtractFormat.WIKI, user_agent=user_agent)
13
+ page = wiki.page(query)
14
+ if not page.exists():
15
+ return "Page not found."
16
+ if summary_level == "Brief":
17
+ return page.summary[:char_limit]
18
+ elif summary_level == "Detailed":
19
+ return page.summary
20
+ elif summary_level == "Bullet Points":
21
+ points = page.summary.split('. ')
22
+ return '\n'.join(f"- {p.strip()}" for p in points if p)[:char_limit]
23
+
24
+ def save_chat_history_as_pdf(chat_history, file_name):
25
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
26
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
27
+ pdf = canvas.Canvas(tmp_file.name, pagesize=letter)
28
+ pdf.setTitle(file_name)
29
+ pdf.drawString(30, 750, f"{file_name} - Saved on {timestamp}")
30
+ y_position = 720
31
+ for query, response in chat_history:
32
+ pdf.drawString(30, y_position, f"User: {query}")
33
+ y_position -= 20
34
+ pdf.drawString(30, y_position, f"Bot: {response}")
35
+ y_position -= 40
36
+ if y_position < 40:
37
+ pdf.showPage()
38
+ y_position = 750
39
+ pdf.save()
40
+ return tmp_file.name
41
+
42
+ def text_to_speech(text, filename, lang="en"):
43
+ tts = gTTS(text=text, lang=lang)
44
+ tts.save(filename)
45
+ return filename
46
+
47
+ def voice_search(lang_code):
48
+ recognizer = sr.Recognizer()
49
+ with sr.Microphone() as source:
50
+ st.write("Listening...")
51
+ audio = recognizer.listen(source)
52
+ try:
53
+ query = recognizer.recognize_google(audio, language=lang_code)
54
+ st.success(f"You said: {query}")
55
+ return query
56
+ except sr.UnknownValueError:
57
+ st.error("Could not understand the audio.")
58
+ return None
59
+ except sr.RequestError as e:
60
+ st.error(f"Could not request results; {e}")
61
+ return None
62
+
63
+ def main():
64
+ st.title("Wikipedia Summary & Text-to-Speech")
65
+
66
+ lang_map = {
67
+ "English": "en",
68
+ "Spanish": "es",
69
+ "Chinese": "zh",
70
+ "Hindi": "hi",
71
+ "Telugu": "te"
72
+ }
73
+ selected_lang = st.sidebar.selectbox("Wikipedia Language", list(lang_map.keys()), key="language_selector")
74
+ summary_levels = ["Brief", "Detailed", "Bullet Points"]
75
+ summary_level = st.sidebar.selectbox("Summarization Level", summary_levels)
76
+ char_limit = st.sidebar.slider("Character Limit", min_value=100, max_value=2000, value=500, step=100)
77
+
78
+ if "chat_history" not in st.session_state:
79
+ st.session_state.chat_history = []
80
+ if "favorites" not in st.session_state:
81
+ st.session_state.favorites = []
82
+
83
+ query = st.text_input("Enter a topic to search on Wikipedia:")
84
+
85
+ if st.button("Voice Search"):
86
+ lang_code = lang_map[selected_lang]
87
+ voice_query = voice_search(lang_code)
88
+ if voice_query:
89
+ query = voice_query
90
+
91
+ if query:
92
+ lang_code = lang_map[selected_lang]
93
+ summary = get_wikipedia_summary(query, lang_code, char_limit, summary_level)
94
+ st.markdown(f"### Summary for: {query}")
95
+ st.write(summary)
96
+ st.session_state.chat_history.append((query, summary))
97
+
98
+ if st.button("Add to Favorites"):
99
+ st.session_state.favorites.append((query, summary))
100
+ st.success("Added to favorites!")
101
+
102
+ tts_filename = f"{query}_speech.mp3"
103
+ if st.button("Play Text-to-Speech"):
104
+ text_to_speech(summary, tts_filename, lang=lang_code)
105
+ st.audio(tts_filename, format="audio/mp3")
106
+
107
+ file_name = st.sidebar.text_input("File Name to Save Chat", value="chat_history")
108
+ if st.sidebar.button("Save Chat as PDF"):
109
+ pdf_path = save_chat_history_as_pdf(st.session_state.chat_history, file_name)
110
+ with open(pdf_path, "rb") as pdf_file:
111
+ st.sidebar.download_button("Download PDF", pdf_file, file_name=f"{file_name}.pdf", mime="application/pdf")
112
+
113
+ st.sidebar.write("### Favorites")
114
+ for i, (fav_query, fav_summary) in enumerate(st.session_state.favorites, 1):
115
+ st.sidebar.write(f"**{i}. {fav_query}**")
116
+ st.sidebar.write(fav_summary[:100] + "...")
117
+
118
+ if __name__ == "__main__":
119
+ main()