GVAmaresh commited on
Commit
e416868
·
1 Parent(s): 3fc2ce9

dev: check working

Browse files
Files changed (1) hide show
  1. app.py +138 -29
app.py CHANGED
@@ -33,41 +33,150 @@ def reencode_audio(input_path, output_path):
33
 
34
  #-----------------------------------------------------------------------------------------
35
 
36
- import os
37
- from dotenv import load_dotenv
38
- from googleapiclient.discovery import build
39
- from google.auth.transport.requests import Request
40
- from google.oauth2.credentials import Credentials
41
- from google.oauth2 import service_account
42
-
43
- SCOPES = ['https://www.googleapis.com/auth/drive']
44
-
45
-
46
- details = {
47
- "refresh_token": "1//0gYLCF5OE4fTmCgYIARAAGBASNwF-L9Irp3Ik0q5OtsQClcLwW7sxPZSuMboe7wyjteuSuOD_WvavEHfhuTvkSjkLHitkh76XaD4",
48
- "token": "ya29.a0ARW5m753vyDgN_C7kUnnYTkeCfknSnDDj8tuVCe99dL2ieN3IzvCPVoN5kVg49CAYDz-pS5AgpjH7whiy7dr7QhwX4EiGQreJCzu109nlH6kxultrNup5q-_W2dNepbOa5YV8iH7OwP28RjQVR7fs9IlMO7BfnA9hw-WQqXNaCgYKAXMSARMSFQHGX2MieHrC7CpySZFYpoZWln6vxA0175",
49
- "token_uri": "https://oauth2.googleapis.com/token",
50
- "client_id": "573421158717-a2tulr4s7gg6or7sd76336busnmk22vu.apps.googleusercontent.com",
51
- "client_secret": "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",
52
- "scopes": ["https://www.googleapis.com/auth/drive"],
53
- "universe_domain": "googleapis.com",
54
- "account": "",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  }
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- def authenticate_with_env_vars(details):
59
- creds = Credentials.from_authorized_user_info(details, SCOPES)
60
- if not creds or not creds.valid:
61
- if creds and creds.expired and creds.refresh_token:
62
- creds.refresh(Request())
63
- else:
64
- raise ValueError("Credentials are invalid and cannot be refreshed.")
65
- return creds
66
 
67
  #-----------------------------------------------------------------------------------------
 
 
68
 
69
  file_id = "1zhisRgRi2qBFX73VFhzh-Ho93MORQqVa"
70
- output = "/path/to/save/file.h5"
 
 
 
 
 
 
 
71
  url = f"https://drive.google.com/uc?id={file_id}"
72
 
73
- gdown.download(url, output, quiet=False)
 
 
 
 
 
33
 
34
  #-----------------------------------------------------------------------------------------
35
 
36
+ # import os
37
+ # from dotenv import load_dotenv
38
+ # from googleapiclient.discovery import build
39
+ # from google.auth.transport.requests import Request
40
+ # from google.oauth2.credentials import Credentials
41
+ # from google.oauth2 import service_account
42
+
43
+ # SCOPES = ['https://www.googleapis.com/auth/drive']
44
+
45
+
46
+ # details = {
47
+ # "refresh_token": "1//0gYLCF5OE4fTmCgYIARAAGBASNwF-L9Irp3Ik0q5OtsQClcLwW7sxPZSuMboe7wyjteuSuOD_WvavEHfhuTvkSjkLHitkh76XaD4",
48
+ # "token": "ya29.a0ARW5m753vyDgN_C7kUnnYTkeCfknSnDDj8tuVCe99dL2ieN3IzvCPVoN5kVg49CAYDz-pS5AgpjH7whiy7dr7QhwX4EiGQreJCzu109nlH6kxultrNup5q-_W2dNepbOa5YV8iH7OwP28RjQVR7fs9IlMO7BfnA9hw-WQqXNaCgYKAXMSARMSFQHGX2MieHrC7CpySZFYpoZWln6vxA0175",
49
+ # "token_uri": "https://oauth2.googleapis.com/token",
50
+ # "client_id": "573421158717-a2tulr4s7gg6or7sd76336busnmk22vu.apps.googleusercontent.com",
51
+ # "client_secret": "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",
52
+ # "scopes": ["https://www.googleapis.com/auth/drive"],
53
+ # "universe_domain": "googleapis.com",
54
+ # "account": "",
55
+ # }
56
+
57
+
58
+ # def authenticate_with_env_vars(details):
59
+ # creds = Credentials.from_authorized_user_info(details, SCOPES)
60
+ # if not creds or not creds.valid:
61
+ # if creds and creds.expired and creds.refresh_token:
62
+ # creds.refresh(Request())
63
+ # else:
64
+ # raise ValueError("Credentials are invalid and cannot be refreshed.")
65
+ # return creds
66
+
67
+ #-----------------------------------------------------------------------------------------
68
+ from fastapi import UploadFile
69
+ from googleapiclient.http import MediaIoBaseUpload
70
+ import io
71
+ import PyPDF2
72
+
73
+ Folder_Name = "Document_DB"
74
+ file_metadata = {
75
+ "name": "Fake",
76
+ "mimeType": "application/vnd.google-apps.folder",
77
  }
78
 
79
+ def check_folder(service):
80
+ try:
81
+ resource = service.files()
82
+ result = resource.list(
83
+ q=f"mimeType = 'application/vnd.google-apps.folder' and 'root' in parents",
84
+ fields="nextPageToken, files(id, name)",
85
+ ).execute()
86
+ list_folders = result.get("files")
87
+
88
+ folder_id = None
89
+
90
+ for folder in list_folders:
91
+ if folder["name"] == Folder_Name:
92
+ folder_id = folder["id"]
93
+ break
94
+
95
+ if not folder_id:
96
+ folder = service.files().create(body=file_metadata, fields="id").execute()
97
+ folder_id = folder["id"]
98
+
99
+ return folder_id, "success"
100
+ except Exception as e:
101
+ print(f"Error occurred while pushing file to DB: {e}")
102
+ return None, str(e)
103
+
104
+ def extract_text_from_pdf(pdf_file_content):
105
+ extracted_text = ""
106
+ try:
107
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file_content))
108
+ num_pages = len(pdf_reader.pages)
109
+ for i in range(num_pages):
110
+ page = pdf_reader.pages[i]
111
+ page_text = page.extract_text()
112
+ if "ABSTRACT" in page_text:
113
+ extracted_text += page_text + "\n"
114
+ break
115
+ return extracted_text
116
+ except Exception as e:
117
+ print("An error occurred:", e)
118
+ return None
119
+
120
+ async def extract_text_url(file:UploadFile):
121
+ try:
122
+ file_content = await file.read()
123
+ extract_text = extract_text_from_pdf(file_content)
124
+ return extract_text, "success"
125
+ except Exception as e:
126
+ print(f"Error occurred while pushing file to DB: {e}")
127
+ return None, str(e)
128
+
129
+
130
+ async def push_file_db(service, file: UploadFile):
131
+ try:
132
+ folder_id, status = check_folder(service)
133
+
134
+ if not folder_id:
135
+ return [None, None, status]
136
+
137
+ file_content = await file.read()
138
+
139
+ file_metadata = {"name": file.filename, "parents": [folder_id]}
140
+ media = MediaIoBaseUpload(io.BytesIO(file_content), mimetype="application/pdf")
141
+ print("hh1")
142
+ new_file = (
143
+ service.files()
144
+ .create(body=file_metadata, media_body=media, fields="id")
145
+ .execute()
146
+ )
147
+ print("hh2")
148
+ service.permissions().create(
149
+ fileId=new_file["id"],
150
+ body={"role": "reader", "type": "anyone"},
151
+ fields="id",
152
+ ).execute()
153
+
154
+ extracted_text = extract_text_from_pdf(file_content)
155
+
156
+ return new_file.get("id"), extracted_text, "success"
157
+
158
+ except Exception as e:
159
+ print(f"Error occurred while pushing file to DB: {e}")
160
+ return None, None, str(e)
161
 
 
 
 
 
 
 
 
 
162
 
163
  #-----------------------------------------------------------------------------------------
164
+ import os
165
+ import gdown
166
 
167
  file_id = "1zhisRgRi2qBFX73VFhzh-Ho93MORQqVa"
168
+ output_dir = "./downloads"
169
+ output_file = "file.h5"
170
+
171
+ if not os.path.exists(output_dir):
172
+ os.makedirs(output_dir)
173
+
174
+ output_path = os.path.join(output_dir, output_file)
175
+
176
  url = f"https://drive.google.com/uc?id={file_id}"
177
 
178
+ try:
179
+ gdown.download(url, output_path, quiet=False)
180
+ print(f"File downloaded successfully to: {output_path}")
181
+ except Exception as e:
182
+ print(f"Error downloading file: {e}")