mateoluksenberg commited on
Commit
befd71c
·
verified ·
1 Parent(s): 340ee67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -50
app.py CHANGED
@@ -91,62 +91,62 @@ def extract_pptx(path):
91
  return text
92
 
93
 
94
- # def mode_load(path):
95
- # choice = ""
96
- # file_type = path.split(".")[-1]
97
- # print(file_type)
98
- # if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
99
- # if file_type.endswith("pdf"):
100
- # content = extract_pdf(path)
101
- # elif file_type.endswith("docx"):
102
- # content = extract_docx(path)
103
- # elif file_type.endswith("pptx"):
104
- # content = extract_pptx(path)
105
- # else:
106
- # content = extract_text(path)
107
- # choice = "doc"
108
- # print(content[:100])
109
- # return choice, content[:5000]
110
 
111
 
112
- # elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
113
- # content = Image.open(path).convert('RGB')
114
- # choice = "image"
115
- # return choice, content
116
 
117
- # else:
118
- # raise gr.Error("Oops, unsupported files.")
119
 
120
- def mode_load(file_obj):
121
- try:
122
- file_obj.seek(0) # Asegúrate de que el puntero esté al inicio del archivo
123
-
124
- # Detecta el tipo de archivo basándote en los primeros bytes si es posible
125
- file_header = file_obj.read(4)
126
- file_obj.seek(0) # Vuelve al inicio del archivo para procesamiento completo
127
-
128
- if file_header.startswith(b'%PDF'):
129
- content = extract_pdf(file_obj)
130
- choice = "doc"
131
- elif file_obj.name.endswith(".docx"):
132
- content = extract_docx(file_obj)
133
- choice = "doc"
134
- elif file_obj.name.endswith(".pptx"):
135
- content = extract_pptx(file_obj)
136
- choice = "doc"
137
- elif file_obj.name.endswith(".txt") or file_obj.name.endswith(".py") or file_obj.name.endswith(".json") or file_obj.name.endswith(".cpp") or file_obj.name.endswith(".md"):
138
- content = file_obj.read().decode('utf-8', errors='ignore')
139
- choice = "doc"
140
- elif file_obj.name.endswith((".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".webp")):
141
- content = Image.open(file_obj).convert('RGB')
142
- choice = "image"
143
- else:
144
- raise ValueError("Unsupported file type.")
145
 
146
- return choice, content
147
 
148
- except Exception as e:
149
- raise ValueError(f"Error processing file: {str(e)}")
150
 
151
 
152
  @spaces.GPU()
@@ -324,6 +324,11 @@ def simple_chat(message: dict, temperature: float = 0.8, max_length: int = 4096,
324
  conversation.append({"role": "user", "content": message['text']})
325
 
326
  input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
 
 
 
 
 
327
 
328
  generate_kwargs = dict(
329
  max_length=max_length,
 
91
  return text
92
 
93
 
94
+ def mode_load(path):
95
+ choice = ""
96
+ file_type = path.split(".")[-1]
97
+ print(file_type)
98
+ if file_type in ["pdf", "txt", "py", "docx", "pptx", "json", "cpp", "md"]:
99
+ if file_type.endswith("pdf"):
100
+ content = extract_pdf(path)
101
+ elif file_type.endswith("docx"):
102
+ content = extract_docx(path)
103
+ elif file_type.endswith("pptx"):
104
+ content = extract_pptx(path)
105
+ else:
106
+ content = extract_text(path)
107
+ choice = "doc"
108
+ print(content[:100])
109
+ return choice, content[:5000]
110
 
111
 
112
+ elif file_type in ["png", "jpg", "jpeg", "bmp", "tiff", "webp"]:
113
+ content = Image.open(path).convert('RGB')
114
+ choice = "image"
115
+ return choice, content
116
 
117
+ else:
118
+ raise gr.Error("Oops, unsupported files.")
119
 
120
+ # def mode_load(file_obj):
121
+ # try:
122
+ # file_obj.seek(0) # Asegúrate de que el puntero esté al inicio del archivo
123
+
124
+ # # Detecta el tipo de archivo basándote en los primeros bytes si es posible
125
+ # file_header = file_obj.read(4)
126
+ # file_obj.seek(0) # Vuelve al inicio del archivo para procesamiento completo
127
+
128
+ # if file_header.startswith(b'%PDF'):
129
+ # content = extract_pdf(file_obj)
130
+ # choice = "doc"
131
+ # elif file_obj.name.endswith(".docx"):
132
+ # content = extract_docx(file_obj)
133
+ # choice = "doc"
134
+ # elif file_obj.name.endswith(".pptx"):
135
+ # content = extract_pptx(file_obj)
136
+ # choice = "doc"
137
+ # elif file_obj.name.endswith(".txt") or file_obj.name.endswith(".py") or file_obj.name.endswith(".json") or file_obj.name.endswith(".cpp") or file_obj.name.endswith(".md"):
138
+ # content = file_obj.read().decode('utf-8', errors='ignore')
139
+ # choice = "doc"
140
+ # elif file_obj.name.endswith((".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".webp")):
141
+ # content = Image.open(file_obj).convert('RGB')
142
+ # choice = "image"
143
+ # else:
144
+ # raise ValueError("Unsupported file type.")
145
 
146
+ # return choice, content
147
 
148
+ # except Exception as e:
149
+ # raise ValueError(f"Error processing file: {str(e)}")
150
 
151
 
152
  @spaces.GPU()
 
324
  conversation.append({"role": "user", "content": message['text']})
325
 
326
  input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
327
+
328
+ print(" ")
329
+ print("Conv: ")
330
+ print(conversation)
331
+ print(" ")
332
 
333
  generate_kwargs = dict(
334
  max_length=max_length,