Spaces:
Running
Running
from fastapi import FastAPI,Request,File,UploadFile | |
from fastapi.templating import Jinja2Templates | |
from fastapi.staticfiles import StaticFiles | |
from fastapi.responses import HTMLResponse,JSONResponse | |
from fastapi.middleware.cors import CORSMiddleware | |
import pandas as pd | |
import re | |
import io | |
import base64 | |
import matplotlib.pyplot as plt | |
import torch | |
import tensorflow | |
from transformers import pipeline,VisionEncoderDecoderModel,ViTImageProcessor,AutoTokenizer | |
from transformers import BartForConditionalGeneration, BartTokenizer | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import fitz | |
from docx import Document | |
from pptx import Presentation | |
import seaborn as sns | |
import PIL.Image as Image | |
import fitz | |
app=FastAPI() | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer | |
try: | |
interpreter =1 #pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") | |
interpreter_model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
interpreter_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
interpreter_tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning") | |
except Exception as exp: | |
print("[ERROR] Can't load nlpconnect/vit-gpt2-image-captioning") | |
print(str(exp)) | |
try: | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
except Exception as exp: | |
print("[ERROR] Can't load facebook/bart-large-cnn ") | |
print(str(exp)) | |
#try: | |
# summarizer_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") | |
#except OSError as e: | |
# print(f"[INFO] PyTorch weights not found. Falling back to TensorFlow weights.\n{e}") | |
# summarizer_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn", from_tf=True) | |
#summarizer_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") | |
try: | |
generator = pipeline("text-generation", model="deepseek-ai/deepseek-coder-1.3b-instruct", device_map="auto") | |
except Exception as exp: | |
print("[ERROR] Can't load deepseek-ai/deepseek-coder-1.3b-instruct ") | |
print(str(exp)) | |
#try: | |
# generator_model = AutoModelForCausalLM.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True) | |
# tokengenerator_modelizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True) | |
#except Exception as exp : | |
# print("[ERROR] Can't load deepseek-ai/deepseek-coder-1.3b-instruct ") | |
# print(str(exp)) | |
app.mount("/static",StaticFiles(directory='static'),'static') | |
templates = Jinja2Templates(directory='templates') | |
def index(req:Request): | |
return templates.TemplateResponse('index.html',{'request':req}) | |
def index(req:Request): | |
return templates.TemplateResponse('Summarization.html',{'request':req}) | |
def index(req:Request): | |
return templates.TemplateResponse('DataVisualisation.html',{'request':req}) | |
def index(req:Request): | |
return templates.TemplateResponse('ImageInterpretation.html',{'request':req}) | |
app.post("/caption2") | |
async def generate_caption(file: UploadFile = File(...)): | |
contents = await file.read() | |
image = Image.open(io.BytesIO(contents)).convert("RGB") | |
# توليد caption | |
pixel_values = interpreter_processor(images=image, return_tensors="pt").pixel_values | |
output_ids = interpreter_model.generate(pixel_values, max_length=16, num_beams=4) | |
caption = interpreter_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
return {"caption": caption} | |
def caption(file:UploadFile=File(...)): | |
extension = file.filename.split(".")[-1] | |
Supported_extensions = ["png","jpg","jpeg"] | |
if extension not in Supported_extensions: | |
return {"error": "Unsupported file type"} | |
image = Image.open(file.file) | |
caption = interpreter(image) | |
#pixel_values = interpreter_processor(images=image, return_tensors="pt").pixel_values | |
#output_ids = interpreter_model.generate(pixel_values, max_length=16, num_beams=4) | |
#caption = interpreter_tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
#return {"caption":caption} | |
return {"caption": caption[0]['generated_text']} | |
def summerzation(file:UploadFile=File(...)): | |
extension = file.filename.split(".")[-1] | |
if extension == "pdf": | |
text = get_text_from_PDF(file.file) | |
elif extension == "docx": | |
text = get_text_from_DOC(file.file) | |
elif extension == "pptx": | |
text = get_text_from_PPT(file.file) | |
elif extension == "xlsx": | |
text = get_text_from_EXCEL(file.file) | |
else: | |
return {"error": "Unsupported file type"} | |
result="" | |
for i in range(0,len(text),1024): | |
result+=summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['summary_text'] | |
return {"summary": result} | |
def plot(prompt:str,file:UploadFile=File(...)): | |
try: | |
extension = file.filename.split(".")[-1] | |
Supported_extensions = ["xlsx","xls"] | |
if extension not in Supported_extensions: | |
return {"error": "Unsupported file type"} | |
df = pd.read_excel(file.file) | |
message = f""" | |
You are a helpful assistant that helps users write Python code. | |
## Requirements: | |
-you will be given a task and you will write the code to solve the task. | |
-you have a dataset called **df** contains the following information: | |
df.columns:{df.columns.to_list()} | |
df.dtypes:{df.dtypes.to_dict()} | |
-you have to write the code to solve the task using the dataset df. | |
-you can use pandas to manipulate the dataframe. | |
-you can use matplotlib to plot the data. | |
-you can use seaborn to plot the data. | |
-don't use print or input statements in the code. | |
-don't use any other libraries except pandas, matplotlib, seaborn. | |
-don't use any other functions except the ones provided in the libraries. | |
-don't write the code for the dataframe creation. | |
-exclude plt.show() from the code. | |
-you have to write the code in a markdown code block. | |
-make sure that the type of the chart is compatible with the dtypes of the columns | |
-use only the column specified in the task. | |
-you have to extract the column names and the plot type from the prompt bellow and use them in the code. | |
-if the user task is not clear or there is an error like the column names are not in the dataframe, raise an | |
error. | |
##Prompt: {prompt}. | |
""" | |
output = generator(message, max_length=1000) | |
match = re.search(r'```python(.*?)```', output[0]["generated_text"], re.DOTALL) | |
code ='' | |
if not match: | |
return {"error": "Can't generate the plot"} | |
code = match.group(1).replace("plt.show()\n","") | |
safe_globals={ | |
"plt": plt, | |
"sns": sns, | |
"pd": pd, | |
"df": df | |
} | |
try: | |
exec(code,safe_globals) | |
buf = io.BytesIO() | |
plt.savefig(buf, format='png') | |
buf.seek(0) | |
base64_image = base64.b64encode(buf.getvalue()).decode('utf-8') | |
return {"plot": f"data:image/png;base64,{base64_image}"} | |
except Exception as e: | |
return {"error": str(e)} | |
except Exception as exp: | |
return {"error":"Internel Server Error:"+str(exp)} | |
def get_text_from_PDF(file): | |
doc = fitz.open(file, filetype="pdf") | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
return text | |
def get_text_from_PPT(file): | |
prs = Presentation(file) | |
text = "" | |
for slide in prs.slides: | |
for shape in slide.shapes: | |
if hasattr(shape, "text"): | |
text += shape.text | |
return text | |
def get_text_from_DOC(file): | |
doc = Document(file) | |
text = "" | |
for paragraph in doc.paragraphs: | |
text += paragraph.text | |
return text | |
def get_text_from_EXCEL(file): | |
df = pd.read_excel(file) | |
text = df.to_string() | |
return text |