Spaces:
Sleeping
Sleeping
GVAmaresh
commited on
Commit
·
e416868
1
Parent(s):
3fc2ce9
dev: check working
Browse files
app.py
CHANGED
@@ -33,41 +33,150 @@ def reencode_audio(input_path, output_path):
|
|
33 |
|
34 |
#-----------------------------------------------------------------------------------------
|
35 |
|
36 |
-
import os
|
37 |
-
from dotenv import load_dotenv
|
38 |
-
from googleapiclient.discovery import build
|
39 |
-
from google.auth.transport.requests import Request
|
40 |
-
from google.oauth2.credentials import Credentials
|
41 |
-
from google.oauth2 import service_account
|
42 |
-
|
43 |
-
SCOPES = ['https://www.googleapis.com/auth/drive']
|
44 |
-
|
45 |
-
|
46 |
-
details = {
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
}
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
def authenticate_with_env_vars(details):
|
59 |
-
creds = Credentials.from_authorized_user_info(details, SCOPES)
|
60 |
-
if not creds or not creds.valid:
|
61 |
-
if creds and creds.expired and creds.refresh_token:
|
62 |
-
creds.refresh(Request())
|
63 |
-
else:
|
64 |
-
raise ValueError("Credentials are invalid and cannot be refreshed.")
|
65 |
-
return creds
|
66 |
|
67 |
#-----------------------------------------------------------------------------------------
|
|
|
|
|
68 |
|
69 |
file_id = "1zhisRgRi2qBFX73VFhzh-Ho93MORQqVa"
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
url = f"https://drive.google.com/uc?id={file_id}"
|
72 |
|
73 |
-
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
#-----------------------------------------------------------------------------------------
|
35 |
|
36 |
+
# import os
|
37 |
+
# from dotenv import load_dotenv
|
38 |
+
# from googleapiclient.discovery import build
|
39 |
+
# from google.auth.transport.requests import Request
|
40 |
+
# from google.oauth2.credentials import Credentials
|
41 |
+
# from google.oauth2 import service_account
|
42 |
+
|
43 |
+
# SCOPES = ['https://www.googleapis.com/auth/drive']
|
44 |
+
|
45 |
+
|
46 |
+
# details = {
|
47 |
+
# "refresh_token": "1//0gYLCF5OE4fTmCgYIARAAGBASNwF-L9Irp3Ik0q5OtsQClcLwW7sxPZSuMboe7wyjteuSuOD_WvavEHfhuTvkSjkLHitkh76XaD4",
|
48 |
+
# "token": "ya29.a0ARW5m753vyDgN_C7kUnnYTkeCfknSnDDj8tuVCe99dL2ieN3IzvCPVoN5kVg49CAYDz-pS5AgpjH7whiy7dr7QhwX4EiGQreJCzu109nlH6kxultrNup5q-_W2dNepbOa5YV8iH7OwP28RjQVR7fs9IlMO7BfnA9hw-WQqXNaCgYKAXMSARMSFQHGX2MieHrC7CpySZFYpoZWln6vxA0175",
|
49 |
+
# "token_uri": "https://oauth2.googleapis.com/token",
|
50 |
+
# "client_id": "573421158717-a2tulr4s7gg6or7sd76336busnmk22vu.apps.googleusercontent.com",
|
51 |
+
# "client_secret": "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",
|
52 |
+
# "scopes": ["https://www.googleapis.com/auth/drive"],
|
53 |
+
# "universe_domain": "googleapis.com",
|
54 |
+
# "account": "",
|
55 |
+
# }
|
56 |
+
|
57 |
+
|
58 |
+
# def authenticate_with_env_vars(details):
|
59 |
+
# creds = Credentials.from_authorized_user_info(details, SCOPES)
|
60 |
+
# if not creds or not creds.valid:
|
61 |
+
# if creds and creds.expired and creds.refresh_token:
|
62 |
+
# creds.refresh(Request())
|
63 |
+
# else:
|
64 |
+
# raise ValueError("Credentials are invalid and cannot be refreshed.")
|
65 |
+
# return creds
|
66 |
+
|
67 |
+
#-----------------------------------------------------------------------------------------
|
68 |
+
from fastapi import UploadFile
|
69 |
+
from googleapiclient.http import MediaIoBaseUpload
|
70 |
+
import io
|
71 |
+
import PyPDF2
|
72 |
+
|
73 |
+
Folder_Name = "Document_DB"
|
74 |
+
file_metadata = {
|
75 |
+
"name": "Fake",
|
76 |
+
"mimeType": "application/vnd.google-apps.folder",
|
77 |
}
|
78 |
|
79 |
+
def check_folder(service):
|
80 |
+
try:
|
81 |
+
resource = service.files()
|
82 |
+
result = resource.list(
|
83 |
+
q=f"mimeType = 'application/vnd.google-apps.folder' and 'root' in parents",
|
84 |
+
fields="nextPageToken, files(id, name)",
|
85 |
+
).execute()
|
86 |
+
list_folders = result.get("files")
|
87 |
+
|
88 |
+
folder_id = None
|
89 |
+
|
90 |
+
for folder in list_folders:
|
91 |
+
if folder["name"] == Folder_Name:
|
92 |
+
folder_id = folder["id"]
|
93 |
+
break
|
94 |
+
|
95 |
+
if not folder_id:
|
96 |
+
folder = service.files().create(body=file_metadata, fields="id").execute()
|
97 |
+
folder_id = folder["id"]
|
98 |
+
|
99 |
+
return folder_id, "success"
|
100 |
+
except Exception as e:
|
101 |
+
print(f"Error occurred while pushing file to DB: {e}")
|
102 |
+
return None, str(e)
|
103 |
+
|
104 |
+
def extract_text_from_pdf(pdf_file_content):
|
105 |
+
extracted_text = ""
|
106 |
+
try:
|
107 |
+
pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file_content))
|
108 |
+
num_pages = len(pdf_reader.pages)
|
109 |
+
for i in range(num_pages):
|
110 |
+
page = pdf_reader.pages[i]
|
111 |
+
page_text = page.extract_text()
|
112 |
+
if "ABSTRACT" in page_text:
|
113 |
+
extracted_text += page_text + "\n"
|
114 |
+
break
|
115 |
+
return extracted_text
|
116 |
+
except Exception as e:
|
117 |
+
print("An error occurred:", e)
|
118 |
+
return None
|
119 |
+
|
120 |
+
async def extract_text_url(file:UploadFile):
|
121 |
+
try:
|
122 |
+
file_content = await file.read()
|
123 |
+
extract_text = extract_text_from_pdf(file_content)
|
124 |
+
return extract_text, "success"
|
125 |
+
except Exception as e:
|
126 |
+
print(f"Error occurred while pushing file to DB: {e}")
|
127 |
+
return None, str(e)
|
128 |
+
|
129 |
+
|
130 |
+
async def push_file_db(service, file: UploadFile):
|
131 |
+
try:
|
132 |
+
folder_id, status = check_folder(service)
|
133 |
+
|
134 |
+
if not folder_id:
|
135 |
+
return [None, None, status]
|
136 |
+
|
137 |
+
file_content = await file.read()
|
138 |
+
|
139 |
+
file_metadata = {"name": file.filename, "parents": [folder_id]}
|
140 |
+
media = MediaIoBaseUpload(io.BytesIO(file_content), mimetype="application/pdf")
|
141 |
+
print("hh1")
|
142 |
+
new_file = (
|
143 |
+
service.files()
|
144 |
+
.create(body=file_metadata, media_body=media, fields="id")
|
145 |
+
.execute()
|
146 |
+
)
|
147 |
+
print("hh2")
|
148 |
+
service.permissions().create(
|
149 |
+
fileId=new_file["id"],
|
150 |
+
body={"role": "reader", "type": "anyone"},
|
151 |
+
fields="id",
|
152 |
+
).execute()
|
153 |
+
|
154 |
+
extracted_text = extract_text_from_pdf(file_content)
|
155 |
+
|
156 |
+
return new_file.get("id"), extracted_text, "success"
|
157 |
+
|
158 |
+
except Exception as e:
|
159 |
+
print(f"Error occurred while pushing file to DB: {e}")
|
160 |
+
return None, None, str(e)
|
161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
#-----------------------------------------------------------------------------------------
|
164 |
+
import os
|
165 |
+
import gdown
|
166 |
|
167 |
file_id = "1zhisRgRi2qBFX73VFhzh-Ho93MORQqVa"
|
168 |
+
output_dir = "./downloads"
|
169 |
+
output_file = "file.h5"
|
170 |
+
|
171 |
+
if not os.path.exists(output_dir):
|
172 |
+
os.makedirs(output_dir)
|
173 |
+
|
174 |
+
output_path = os.path.join(output_dir, output_file)
|
175 |
+
|
176 |
url = f"https://drive.google.com/uc?id={file_id}"
|
177 |
|
178 |
+
try:
|
179 |
+
gdown.download(url, output_path, quiet=False)
|
180 |
+
print(f"File downloaded successfully to: {output_path}")
|
181 |
+
except Exception as e:
|
182 |
+
print(f"Error downloading file: {e}")
|