Spaces:
Running
Running
Update src/text_processing.py
Browse files- src/text_processing.py +13 -8
src/text_processing.py
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
import os
|
2 |
import fitz
|
3 |
from docx import Document
|
4 |
-
|
5 |
from dotenv import load_dotenv
|
6 |
|
7 |
load_dotenv()
|
8 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
9 |
-
genai.
|
10 |
|
11 |
####################### - TEXT EXTRACTION - #######################
|
12 |
def extract_text_from_pdf(pdf_path):
|
@@ -51,9 +51,11 @@ def split_text_by_semantics(text):
|
|
51 |
"""
|
52 |
|
53 |
try:
|
54 |
-
|
55 |
-
|
|
|
56 |
result_text = response.text.strip()
|
|
|
57 |
|
58 |
chunks = result_text.split("- Phần ")
|
59 |
chunks = [chunk.strip() for chunk in chunks if chunk]
|
@@ -96,9 +98,10 @@ def generate_explaination_for_chunks(chunks, analysis_level='basic', writting_st
|
|
96 |
"""
|
97 |
|
98 |
try:
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
102 |
|
103 |
explanations = []
|
104 |
for idx, chunk in enumerate(chunks, start=1):
|
@@ -111,7 +114,9 @@ def generate_explaination_for_chunks(chunks, analysis_level='basic', writting_st
|
|
111 |
Hãy đảm bảo phần tóm tắt không vượt quá {word_upper_limit} từ và không ít hơn {word_lower_limit}.
|
112 |
"""
|
113 |
|
114 |
-
part_response =
|
|
|
|
|
115 |
explanations.append(part_response.text.strip())
|
116 |
|
117 |
return explanations
|
|
|
1 |
import os
|
2 |
import fitz
|
3 |
from docx import Document
|
4 |
+
from google import genai
|
5 |
from dotenv import load_dotenv
|
6 |
|
7 |
load_dotenv()
|
8 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
9 |
+
client = genai.Client(api_key=GOOGLE_API_KEY)
|
10 |
|
11 |
####################### - TEXT EXTRACTION - #######################
|
12 |
def extract_text_from_pdf(pdf_path):
|
|
|
51 |
"""
|
52 |
|
53 |
try:
|
54 |
+
response = client.models.generate_content(
|
55 |
+
model="gemini-2.0-flash", contents=prompt
|
56 |
+
)
|
57 |
result_text = response.text.strip()
|
58 |
+
print(result_text)
|
59 |
|
60 |
chunks = result_text.split("- Phần ")
|
61 |
chunks = [chunk.strip() for chunk in chunks if chunk]
|
|
|
98 |
"""
|
99 |
|
100 |
try:
|
101 |
+
response = client.models.generate_content(
|
102 |
+
model="gemini-2.0-flash", contents=overview_prompt
|
103 |
+
)
|
104 |
+
print(response)
|
105 |
|
106 |
explanations = []
|
107 |
for idx, chunk in enumerate(chunks, start=1):
|
|
|
114 |
Hãy đảm bảo phần tóm tắt không vượt quá {word_upper_limit} từ và không ít hơn {word_lower_limit}.
|
115 |
"""
|
116 |
|
117 |
+
part_response = response = client.models.generate_content(
|
118 |
+
model="gemini-2.0-flash", contents=part_prompt
|
119 |
+
)
|
120 |
explanations.append(part_response.text.strip())
|
121 |
|
122 |
return explanations
|