akadymatech commited on
Commit
862b3ea
·
1 Parent(s): 5adbc2a

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +4 -6
  2. app.py +166 -0
  3. requirements.txt +4 -0
  4. styles.css +160 -0
README.md CHANGED
@@ -1,12 +1,10 @@
1
  ---
2
- title: ArabicYoutubeSearch
3
- emoji: 📊
4
- colorFrom: pink
5
- colorTo: red
6
  sdk: streamlit
7
  sdk_version: 1.17.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: ArabicYoutube Ask
3
+ emoji: 📈
4
+ colorFrom: gray
5
+ colorTo: blue
6
  sdk: streamlit
7
  sdk_version: 1.17.0
8
  app_file: app.py
9
  pinned: false
10
  ---
 
 
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pinecone
3
+ from sentence_transformers import SentenceTransformer
4
+ import logging
5
+
6
+ PINECONE_KEY = st.secrets["PINECONE_KEY"] # app.pinecone.io
7
+ INDEX_ID = 'youtube-search'
8
+
9
+ st.markdown("<link rel='stylesheet' type='text/css' href='akadymatech/ArabicYoutube-ask/raw/main/styles.css'>", unsafe_allow_html=True)
10
+
11
+ @st.experimental_singleton
12
+ def init_pinecone():
13
+ pinecone.init(api_key=PINECONE_KEY, environment="us-central1-gcp")
14
+ return pinecone.Index(INDEX_ID)
15
+
16
+ @st.experimental_singleton
17
+ def init_retriever():
18
+ return SentenceTransformer("stsb-xlm-r-multilingual")
19
+
20
+ def make_query(query, retriever, top_k=8, include_values=False, include_metadata=True, filter=None):
21
+ xq = retriever.encode([query]).tolist()
22
+ # print('The question',xq)
23
+ logging.info(f"Query: {query}")
24
+ attempt = 0
25
+ while attempt < 3:
26
+ print('I am here')
27
+ try:
28
+ xc = st.session_state.index.query(
29
+ xq,
30
+ top_k=top_k,
31
+ include_values=include_values,
32
+ include_metadata=include_metadata,
33
+ filter=filter
34
+ )
35
+ print('Results: ',xc)
36
+ matches = xc['matches']
37
+ # print(matches)
38
+ break
39
+ except:
40
+ # force reload
41
+ pinecone.init(api_key=PINECONE_KEY, environment="us-central1-gcp")
42
+ st.session_state.index = pinecone.Index(INDEX_ID)
43
+ attempt += 1
44
+ matches = []
45
+ if len(matches) == 0:
46
+ logging.error(f"Query failed")
47
+ return matches
48
+
49
+ st.session_state.index = init_pinecone()
50
+ # print('index.describe_index_stats()',st.session_state.index.describe_index_stats() )
51
+ retriever = init_retriever()
52
+
53
+ def card(thumbnail: str, title: str, urls: list, contexts: list, starts: list, ends: list):
54
+ meta = [(e, s, u, c) for e, s, u, c in zip(ends, starts, urls, contexts)]
55
+ meta.sort(reverse=False)
56
+ text_content = []
57
+ current_start = 0
58
+ current_end = 0
59
+ for end, start, url, context in meta:
60
+ # reformat seconds to timestamp
61
+ time = start / 60
62
+ mins = f"0{int(time)}"[-2:]
63
+ secs = f"0{int(round((time - int(mins))*60, 0))}"[-2:]
64
+ timestamp = f"{mins}:{secs}"
65
+ if start < current_end and start > current_start:
66
+ # this means it is a continuation of the previous sentence
67
+ text_content[-1][0] = text_content[-1][0].split(context[:10])[0]
68
+ text_content.append([f"[{timestamp}] {context.capitalize()}", url])
69
+ else:
70
+ text_content.append(["xxLINEBREAKxx", ""])
71
+ text_content.append([f"[{timestamp}] {context}", url])
72
+ current_start = start
73
+ current_end = end
74
+ html_text = ""
75
+ for text, url in text_content:
76
+ if text == "xxLINEBREAKxx":
77
+ html_text += "<br>"
78
+ else:
79
+ html_text += f"<small><a href={url}>{text.strip()}... </a></small>"
80
+ print(text)
81
+ html = f"""
82
+ <div class="container-fluid">
83
+ <div class="row align-items-start">
84
+ <div class="col-md-4 col-sm-4">
85
+ <div class="position-relative">
86
+ <a href={urls[0]}><img src={thumbnail} class="img-fluid" style="width: 192px; height: 106px"></a>
87
+ </div>
88
+ </div>
89
+ <div class="col-md-8 col-sm-8">
90
+ <h2>{title}</h2>
91
+ </div>
92
+ <div>
93
+ {html_text}
94
+ <br><br>
95
+ """
96
+ return st.markdown(html, unsafe_allow_html=True)
97
+
98
+ channel_map = {
99
+ 'Akadyma': 'UCdS-kUpOLFNr2pdtbcncfng',
100
+ 'test': 'UCr8O8l5cCX85Oem1d18EezQ'
101
+ }
102
+
103
+ st.write("""
104
+ # YouTube ابحث في مقاطع الـ
105
+ """)
106
+
107
+ st.info("""
108
+ باحثك الشخصي يسمح لك في البحث في مقاطع اليوتوب. الاصدار الحالي تم إضافة قناة أكاديما
109
+ """)
110
+
111
+ st.markdown("""
112
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">
113
+ """, unsafe_allow_html=True)
114
+
115
+ query = st.text_input("Search!", "")
116
+
117
+ with st.expander("Advanced Options"):
118
+ channel_options = st.multiselect(
119
+ 'Channels to Search',
120
+ ['Akadyma'],
121
+ ['Akadyma']
122
+ )
123
+
124
+ if query != "":
125
+ channels = [channel_map[name] for name in channel_options]
126
+ print('channels name',channels)
127
+ print(f"query: {query}")
128
+ matches = make_query(
129
+ query, retriever, top_k=5,
130
+ filter={
131
+ 'channel_id': {'$nin': channels}
132
+ }
133
+ )
134
+
135
+ results = {}
136
+ order = []
137
+ for context in matches:
138
+ video_id = context['metadata']['url'].split('/')[-1]
139
+ if video_id not in results:
140
+ results[video_id] = {
141
+ 'title': context['metadata']['title'],
142
+ 'urls': [f"{context['metadata']['url']}?t={int(context['metadata']['start'])}"],
143
+ 'contexts': [context['metadata']['text']],
144
+ 'starts': [int(context['metadata']['start'])],
145
+ 'ends': [int(context['metadata']['end'])]
146
+ }
147
+ order.append(video_id)
148
+ else:
149
+ results[video_id]['urls'].append(
150
+ f"{context['metadata']['url']}?t={int(context['metadata']['start'])}"
151
+ )
152
+ results[video_id]['contexts'].append(
153
+ context['metadata']['text']
154
+ )
155
+ results[video_id]['starts'].append(int(context['metadata']['start']))
156
+ results[video_id]['ends'].append(int(context['metadata']['end']))
157
+ # now display cards
158
+ for video_id in order:
159
+ card(
160
+ thumbnail=f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg",
161
+ title=results[video_id]['title'],
162
+ urls=results[video_id]['urls'],
163
+ contexts=results[video_id]['contexts'],
164
+ starts=results[video_id]['starts'],
165
+ ends=results[video_id]['ends']
166
+ )
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ sentence-transformers
3
+ pinecone-client
4
+ click==8.0
styles.css ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import url("https://fonts.googleapis.com/css?family=Arimo:400,700");
2
+
3
+ section.main[tabindex="0"] {
4
+ overflow: scroll;
5
+ }
6
+
7
+ body {
8
+ height: 100%;
9
+ width: 100%;
10
+ background: #e9e9e9;
11
+ font-family: 'Arimo', Arial, sans-serif;
12
+ font-weight: 400;
13
+ font-size: 14px;
14
+ color: #010b26;
15
+ }
16
+
17
+ * {
18
+ -webkit-transition: 300ms;
19
+ transition: 300ms;
20
+ }
21
+
22
+ .intro {
23
+ text-align: center;
24
+ }
25
+
26
+ ul {
27
+ list-style-type: none;
28
+ }
29
+
30
+ h1,
31
+ h2,
32
+ h3,
33
+ h4,
34
+ h5,
35
+ p {
36
+ font-weight: 400;
37
+ }
38
+
39
+ a {
40
+ text-decoration: none;
41
+ color: inherit;
42
+ }
43
+
44
+ a:hover {
45
+ color: #6ABCEA;
46
+ }
47
+
48
+ .container {
49
+ display: -webkit-box;
50
+ display: -ms-flexbox;
51
+ display: flex;
52
+ -ms-flex-wrap: wrap;
53
+ flex-wrap: wrap;
54
+ max-width: 100%;
55
+ margin-top: 10vh;
56
+ margin-left: auto;
57
+ margin-right: auto;
58
+ -webkit-box-pack: center;
59
+ -ms-flex-pack: center;
60
+ justify-content: center;
61
+ }
62
+
63
+ .movie-card {
64
+ background: #ffffff;
65
+ box-shadow: 0px 6px 18px rgba(0, 0, 0, 0.1);
66
+ width: 100%;
67
+ max-width: 315px;
68
+ margin: 2em;
69
+ border-radius: 10px;
70
+ display: inline-block;
71
+ }
72
+
73
+ .movie-header {
74
+ padding: 0;
75
+ margin: 0;
76
+ height: 367px;
77
+ width: 100%;
78
+ display: block;
79
+ border-top-left-radius: 10px;
80
+ border-top-right-radius: 10px;
81
+ }
82
+
83
+ .header-icon-container {
84
+ position: relative;
85
+ }
86
+
87
+ .header-icon {
88
+ width: 100%;
89
+ height: 367px;
90
+ line-height: 367px;
91
+ text-align: center;
92
+ vertical-align: middle;
93
+ margin: 0 auto;
94
+ color: #ffffff;
95
+ font-size: 54px;
96
+ text-shadow: 0px 0px 20px #6abcea, 0px 5px 20px #6ABCEA;
97
+ opacity: .85;
98
+ }
99
+
100
+ .header-icon:hover {
101
+ background: rgba(0, 0, 0, 0.15);
102
+ font-size: 74px;
103
+ text-shadow: 0px 0px 20px #6abcea, 0px 5px 30px #6ABCEA;
104
+ border-top-left-radius: 10px;
105
+ border-top-right-radius: 10px;
106
+ opacity: 1;
107
+ }
108
+
109
+ .movie-card:hover {
110
+ -webkit-transform: scale(1.03);
111
+ transform: scale(1.03);
112
+ box-shadow: 0px 10px 25px rgba(0, 0, 0, 0.08);
113
+ }
114
+
115
+ .movie-content {
116
+ padding: 18px 18px 24px 18px;
117
+ margin: 0;
118
+ }
119
+
120
+ .movie-content-header,
121
+ .movie-info {
122
+ display: table;
123
+ width: 100%;
124
+ }
125
+
126
+ .movie-title {
127
+ font-size: 24px;
128
+ margin: 0;
129
+ display: table-cell;
130
+ }
131
+
132
+ .movie-info {
133
+ margin-top: 1em;
134
+ }
135
+
136
+ .info-section {
137
+ display: table-cell;
138
+ text-transform: uppercase;
139
+ text-align: center;
140
+ }
141
+
142
+ .info-section:first-of-type {
143
+ text-align: left;
144
+ }
145
+
146
+ .info-section:last-of-type {
147
+ text-align: right;
148
+ }
149
+
150
+ .info-section label {
151
+ display: block;
152
+ color: rgba(0, 0, 0, 0.5);
153
+ margin-bottom: .5em;
154
+ font-size: 9px;
155
+ }
156
+
157
+ .info-section span {
158
+ font-weight: 700;
159
+ font-size: 11px;
160
+ }