PabloVD commited on
Commit
a08531a
·
1 Parent(s): fdb4410

Old version of packages

Browse files
Files changed (3) hide show
  1. app.py +1 -2
  2. requirements.txt +17 -4
  3. worker.py +13 -22
app.py CHANGED
@@ -10,8 +10,7 @@ url = 'https://camels.readthedocs.io/_/downloads/en/latest/pdf/'
10
  r = requests.get(url, stream=True)
11
  document_path = Path('metadata.pdf')
12
  document_path.write_bytes(r.content)
13
-
14
- worker.process_document(document_path)
15
 
16
  def handle_prompt(message, history):
17
  bot_response = worker.process_prompt(message, history)
 
10
  r = requests.get(url, stream=True)
11
  document_path = Path('metadata.pdf')
12
  document_path.write_bytes(r.content)
13
+ worker.process_document(str(document_path))
 
14
 
15
  def handle_prompt(message, history):
16
  bot_response = worker.process_prompt(message, history)
requirements.txt CHANGED
@@ -1,4 +1,17 @@
1
- langchain
2
- langchain-community
3
- langchain-huggingface
4
- chromadb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Flask
2
+ Flask_Cors
3
+ pdf2image
4
+ pypdf
5
+ tiktoken
6
+ pandas==1.5
7
+ langchain==0.0.254
8
+ atlassian-python-api==3.36.0
9
+ chromadb==0.3.25
10
+ huggingface-hub==0.16.4
11
+ torch==2.0.1
12
+ sentence-transformers==2.2.2
13
+ InstructorEmbedding==1.0.0
14
+ p4python==2023.1.2454917
15
+ lxml==4.9.2
16
+ bs4==0.0.1
17
+ ibm-watson-machine-learning
worker.py CHANGED
@@ -1,21 +1,11 @@
1
  import torch
2
  from langchain.chains import RetrievalQA
3
- from langchain_community.embeddings import HuggingFaceInstructEmbeddings
4
- from langchain_community.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain_community.vectorstores import Chroma
7
- from langchain_huggingface import HuggingFaceEndpoint
8
- from sentence_transformers import SentenceTransformer # Use SentenceTransformer module to use Hugging face Model
9
- import pip
10
-
11
- def install(package):
12
- if hasattr(pip, 'main'):
13
- pip.main(['install', package])
14
- else:
15
- pip._internal.main(['install', package])
16
-
17
- # Temporal fix for incompatibility between langchain_huggingface and sentence-transformers<2.6
18
- # install("sentence-transformers==2.2.2")
19
 
20
  # Check for GPU availability and set the appropriate device for computation.
21
  DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -30,10 +20,10 @@ embeddings = None
30
  def init_llm():
31
  global llm_hub, embeddings
32
  # Set up the environment variable for HuggingFace and initialize the desired model.
33
- # tokenfile = open("api_token.txt")
34
- # api_token = tokenfile.readline().replace("\n","")
35
- # tokenfile.close()
36
- # os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
37
 
38
  # repo name for the model
39
  # model_id = "tiiuae/falcon-7b-instruct"
@@ -42,7 +32,8 @@ def init_llm():
42
  # model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
43
 
44
  # load the model into the HuggingFaceHub
45
- llm_hub = HuggingFaceEndpoint(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
 
46
  llm_hub.client.api_url = 'https://api-inference.huggingface.co/models/'+model_id
47
  # llm_hub.invoke('foo bar')
48
 
@@ -50,10 +41,10 @@ def init_llm():
50
  embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
51
  # embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
52
 
53
- emb_model = SentenceTransformer(embedddings_model)
54
 
55
  embeddings = HuggingFaceInstructEmbeddings(
56
- model_name=emb_model,
57
  model_kwargs={"device": DEVICE}
58
  )
59
 
 
1
  import torch
2
  from langchain.chains import RetrievalQA
3
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
4
+ from langchain.document_loaders import PyPDFLoader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.llms import HuggingFaceHub
8
+ import os
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Check for GPU availability and set the appropriate device for computation.
11
  DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
 
20
  def init_llm():
21
  global llm_hub, embeddings
22
  # Set up the environment variable for HuggingFace and initialize the desired model.
23
+ tokenfile = open("api_token.txt")
24
+ api_token = tokenfile.readline().replace("\n","")
25
+ tokenfile.close()
26
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
27
 
28
  # repo name for the model
29
  # model_id = "tiiuae/falcon-7b-instruct"
 
32
  # model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
33
 
34
  # load the model into the HuggingFaceHub
35
+ #llm_hub = HuggingFaceHub(repo_id=model_id, temperature=0.1, max_new_tokens=600, model_kwargs={"max_length":600})
36
+ llm_hub = HuggingFaceHub(repo_id=model_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 600, "max_length": 600})
37
  llm_hub.client.api_url = 'https://api-inference.huggingface.co/models/'+model_id
38
  # llm_hub.invoke('foo bar')
39
 
 
41
  embedddings_model = "sentence-transformers/multi-qa-distilbert-cos-v1"
42
  # embedddings_model = "sentence-transformers/all-MiniLM-L6-v2"
43
 
44
+ # emb_model = SentenceTransformer(embedddings_model)
45
 
46
  embeddings = HuggingFaceInstructEmbeddings(
47
+ model_name=embedddings_model,
48
  model_kwargs={"device": DEVICE}
49
  )
50