ravi259 commited on
Commit
c483373
·
1 Parent(s): 5e279a6

final baserag

Browse files
__pycache__/htmlTemplates.cpython-310.pyc ADDED
Binary file (1.01 kB). View file
 
app.py CHANGED
@@ -1,49 +1,14 @@
1
- import easyocr as ocr #OCR
2
  import streamlit as st #Web App
3
- from PIL import Image #Image Processing
4
  import numpy as np #Image Processing
5
-
6
- # To analyze the PDF layout and extract text
7
- from pdfminer.high_level import extract_pages, extract_text
8
- from pdfminer.layout import LTTextContainer, LTChar, LTRect, LTFigure
9
- # To extract text from tables in PDF
10
- import pdfplumber
11
- # To extract the images from the PDFs
12
- from PIL import Image
13
- from pdf2image import convert_from_path
14
-
15
- import streamlit as st
16
  import pandas as pd
17
 
18
- import gradio as gr
19
  import time
20
- from PyPDF2 import PdfReader
21
- import easyocr as ocr #OCR
22
- import streamlit as st #Web App
23
- from PIL import Image #Image Processing
24
- import numpy as np #Image Processing
25
- # To read the PDF
26
- import PyPDF2
27
- # To analyze the PDF layout and extract text
28
- from pdfminer.high_level import extract_pages, extract_text
29
- from pdfminer.layout import LTTextContainer, LTChar, LTRect, LTFigure
30
- # To extract text from tables in PDF
31
- import pdfplumber
32
- # To extract the images from the PDFs
33
- from PIL import Image
34
- from pdf2image import convert_from_path
35
- # To perform OCR to extract text from images
36
- import pytesseract
37
- # To remove the additional created files
38
  import os
39
  import tiktoken
40
- import streamlit as st
41
- import pandas as pd
42
  from io import StringIO
43
  import time
44
  import json
45
- import openai
46
-
47
 
48
  import requests
49
  from langchain_community.document_loaders import TextLoader
@@ -62,17 +27,6 @@ from langchain.schema.output_parser import StrOutputParser
62
  from langchain.memory import ConversationBufferMemory
63
  from langchain.chains import ConversationChain
64
 
65
- from datasets import Dataset
66
-
67
- from ragas import evaluate
68
- from ragas.metrics import (
69
- faithfulness,
70
- answer_relevancy,
71
- context_recall,
72
- context_precision,
73
- )
74
-
75
- import os
76
  from dotenv import load_dotenv
77
  from htmlTemplates import bot_template, user_template, css
78
 
 
 
1
  import streamlit as st #Web App
 
2
  import numpy as np #Image Processing
 
 
 
 
 
 
 
 
 
 
 
3
  import pandas as pd
4
 
 
5
  import time
6
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  import os
8
  import tiktoken
 
 
9
  from io import StringIO
10
  import time
11
  import json
 
 
12
 
13
  import requests
14
  from langchain_community.document_loaders import TextLoader
 
27
  from langchain.memory import ConversationBufferMemory
28
  from langchain.chains import ConversationChain
29
 
 
 
 
 
 
 
 
 
 
 
 
30
  from dotenv import load_dotenv
31
  from htmlTemplates import bot_template, user_template, css
32
 
requirements.txt CHANGED
@@ -1,33 +1,51 @@
1
  aiohttp==3.9.3
2
  aiosignal==1.3.1
 
3
  annotated-types==0.6.0
4
  anyio==4.3.0
5
  async-timeout==4.0.3
6
  attrs==23.2.0
 
 
7
  certifi==2024.2.2
8
  cffi==1.16.0
9
  charset-normalizer==3.3.2
 
10
  cryptography==42.0.5
11
  dataclasses-json==0.6.4
 
 
12
  distro==1.9.0
13
  exceptiongroup==1.2.0
14
  faiss-cpu==1.8.0
 
15
  frozenlist==1.4.1
 
 
 
16
  greenlet==3.0.3
17
  h11==0.14.0
18
  httpcore==1.0.4
19
  httpx==0.27.0
 
20
  idna==3.6
 
21
  jsonpatch==1.33
22
  jsonpointer==2.4
 
 
23
  langchain==0.1.13
24
  langchain-community==0.0.29
25
  langchain-core==0.1.33
26
  langchain-openai==0.1.1
27
  langchain-text-splitters==0.0.1
28
  langsmith==0.1.31
 
 
29
  marshmallow==3.21.1
 
30
  multidict==6.0.5
 
31
  mypy-extensions==1.0.0
32
  numpy==1.26.4
33
  openai==1.14.2
@@ -38,9 +56,14 @@ pdf2image==1.17.0
38
  pdfminer.six==20231228
39
  pdfplumber==0.11.0
40
  pillow==10.2.0
 
 
 
41
  pycparser==2.21
42
  pydantic==2.6.4
43
  pydantic_core==2.16.3
 
 
44
  PyPDF2==3.0.1
45
  pypdfium2==4.28.0
46
  pytesseract==0.3.10
@@ -48,16 +71,25 @@ python-dateutil==2.9.0.post0
48
  python-dotenv==1.0.1
49
  pytz==2024.1
50
  PyYAML==6.0.1
 
51
  regex==2023.12.25
52
  requests==2.31.0
 
 
53
  six==1.16.0
 
54
  sniffio==1.3.1
55
  SQLAlchemy==2.0.28
 
56
  tenacity==8.2.3
57
  tiktoken==0.6.0
 
 
 
58
  tqdm==4.66.2
59
  typing-inspect==0.9.0
60
  typing_extensions==4.10.0
61
  tzdata==2024.1
62
  urllib3==2.2.1
 
63
  yarl==1.9.4
 
1
  aiohttp==3.9.3
2
  aiosignal==1.3.1
3
+ altair==5.2.0
4
  annotated-types==0.6.0
5
  anyio==4.3.0
6
  async-timeout==4.0.3
7
  attrs==23.2.0
8
+ blinker==1.7.0
9
+ cachetools==5.3.3
10
  certifi==2024.2.2
11
  cffi==1.16.0
12
  charset-normalizer==3.3.2
13
+ click==8.1.7
14
  cryptography==42.0.5
15
  dataclasses-json==0.6.4
16
+ datasets==2.18.0
17
+ dill==0.3.8
18
  distro==1.9.0
19
  exceptiongroup==1.2.0
20
  faiss-cpu==1.8.0
21
+ filelock==3.13.1
22
  frozenlist==1.4.1
23
+ fsspec==2024.2.0
24
+ gitdb==4.0.11
25
+ GitPython==3.1.42
26
  greenlet==3.0.3
27
  h11==0.14.0
28
  httpcore==1.0.4
29
  httpx==0.27.0
30
+ huggingface-hub==0.21.4
31
  idna==3.6
32
+ Jinja2==3.1.3
33
  jsonpatch==1.33
34
  jsonpointer==2.4
35
+ jsonschema==4.21.1
36
+ jsonschema-specifications==2023.12.1
37
  langchain==0.1.13
38
  langchain-community==0.0.29
39
  langchain-core==0.1.33
40
  langchain-openai==0.1.1
41
  langchain-text-splitters==0.0.1
42
  langsmith==0.1.31
43
+ markdown-it-py==3.0.0
44
+ MarkupSafe==2.1.5
45
  marshmallow==3.21.1
46
+ mdurl==0.1.2
47
  multidict==6.0.5
48
+ multiprocess==0.70.16
49
  mypy-extensions==1.0.0
50
  numpy==1.26.4
51
  openai==1.14.2
 
56
  pdfminer.six==20231228
57
  pdfplumber==0.11.0
58
  pillow==10.2.0
59
+ protobuf==4.25.3
60
+ pyarrow==15.0.2
61
+ pyarrow-hotfix==0.6
62
  pycparser==2.21
63
  pydantic==2.6.4
64
  pydantic_core==2.16.3
65
+ pydeck==0.8.1b0
66
+ Pygments==2.17.2
67
  PyPDF2==3.0.1
68
  pypdfium2==4.28.0
69
  pytesseract==0.3.10
 
71
  python-dotenv==1.0.1
72
  pytz==2024.1
73
  PyYAML==6.0.1
74
+ referencing==0.34.0
75
  regex==2023.12.25
76
  requests==2.31.0
77
+ rich==13.7.1
78
+ rpds-py==0.18.0
79
  six==1.16.0
80
+ smmap==5.0.1
81
  sniffio==1.3.1
82
  SQLAlchemy==2.0.28
83
+ streamlit==1.32.2
84
  tenacity==8.2.3
85
  tiktoken==0.6.0
86
+ toml==0.10.2
87
+ toolz==0.12.1
88
+ tornado==6.4
89
  tqdm==4.66.2
90
  typing-inspect==0.9.0
91
  typing_extensions==4.10.0
92
  tzdata==2024.1
93
  urllib3==2.2.1
94
+ xxhash==3.4.1
95
  yarl==1.9.4