mohamedrady commited on
Commit
c3d42ed
ยท
verified ยท
1 Parent(s): d478cd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -31
app.py CHANGED
@@ -6,7 +6,8 @@ from transformers import pipeline
6
  import PyPDF2
7
  import gradio as gr
8
 
9
- # Initialize required tools
 
10
  nlp = spacy.load("en_core_web_sm")
11
  nltk.download('punkt')
12
 
@@ -14,7 +15,6 @@ nltk.download('punkt')
14
  device = 0 if torch.cuda.is_available() else -1
15
  analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)
16
 
17
- # Define functions for text analysis
18
  def spacy_ner_analysis(text):
19
  doc = nlp(text)
20
  entities = [(ent.text, ent.label_) for ent in doc.ents]
@@ -45,33 +45,6 @@ def extract_pdf_text(file_path):
45
  text += page.extract_text()
46
  return text
47
 
48
- def analyze_text(text):
49
- try:
50
- result = analyzer(text)
51
- return result
52
- except Exception as e:
53
- print(f"Error analyzing text: {str(e)}")
54
- return ""
55
-
56
- def process_text(text, output_directory, filename_prefix):
57
- spacy_entities = spacy_ner_analysis(text)
58
- sentences = nltk_extract_sentences(text)
59
- quotes = nltk_extract_quotes(text)
60
- token_count = count_tokens(text)
61
-
62
- # Save results to files
63
- with open(os.path.join(output_directory, f"{filename_prefix}_spacy_entities.txt"), "w", encoding="utf-8") as file:
64
- file.write(str(spacy_entities))
65
-
66
- with open(os.path.join(output_directory, f"{filename_prefix}_sentences.txt"), "w", encoding="utf-8") as file:
67
- file.write("\n".join(sentences))
68
-
69
- with open(os.path.join(output_directory, f"{filename_prefix}_quotes.txt"), "w", encoding="utf-8") as file:
70
- file.write("\n".join(quotes))
71
-
72
- with open(os.path.join(output_directory, f"{filename_prefix}_token_count.txt"), "w", encoding="utf-8") as file:
73
- file.write(str(token_count))
74
-
75
  def analyze_and_complete(file_path):
76
  if file_path.endswith(".pdf"):
77
  text = extract_pdf_text(file_path)
@@ -81,7 +54,6 @@ def analyze_and_complete(file_path):
81
 
82
  output_directory = "/Users/Home/Library/Mobile Documents/com~apple~CloudDocs/osa/ุณูŠู†ุงุฑูŠูˆู‡ุงุช/ู„ูŠุงู„ูŠ ุงู”ู„ู ู„ูŠู„ุฉ"
83
  filename_prefix = os.path.splitext(os.path.basename(file_path))[0]
84
- process_text(text, output_directory, filename_prefix)
85
 
86
  spacy_entities = spacy_ner_analysis(text)
87
  sentences = nltk_extract_sentences(text)
@@ -100,4 +72,4 @@ interface = gr.Interface(
100
  )
101
 
102
  if __name__ == "__main__":
103
- interface.launch(share=True)
 
6
  import PyPDF2
7
  import gradio as gr
8
 
9
+ # Download and initialize required tools
10
+ spacy.cli.download("en_core_web_sm")
11
  nlp = spacy.load("en_core_web_sm")
12
  nltk.download('punkt')
13
 
 
15
  device = 0 if torch.cuda.is_available() else -1
16
  analyzer = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english", device=device)
17
 
 
18
  def spacy_ner_analysis(text):
19
  doc = nlp(text)
20
  entities = [(ent.text, ent.label_) for ent in doc.ents]
 
45
  text += page.extract_text()
46
  return text
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def analyze_and_complete(file_path):
49
  if file_path.endswith(".pdf"):
50
  text = extract_pdf_text(file_path)
 
54
 
55
  output_directory = "/Users/Home/Library/Mobile Documents/com~apple~CloudDocs/osa/ุณูŠู†ุงุฑูŠูˆู‡ุงุช/ู„ูŠุงู„ูŠ ุงู”ู„ู ู„ูŠู„ุฉ"
56
  filename_prefix = os.path.splitext(os.path.basename(file_path))[0]
 
57
 
58
  spacy_entities = spacy_ner_analysis(text)
59
  sentences = nltk_extract_sentences(text)
 
72
  )
73
 
74
  if __name__ == "__main__":
75
+ interface.launch()