Spaces:
Sleeping
Sleeping
File size: 2,808 Bytes
da9721c 4d76f31 2000233 4d76f31 80f49c6 5764a9a 4d76f31 2000233 f979edc 2000233 4d76f31 486107f a378385 e2cf15d 8b7c1f5 e2cf15d ac80de9 e2cf15d 172c04c c95aef7 a378385 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import requests
from paddleocr import PaddleOCR, draw_ocr
from PIL import Image
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
img = "input_data/ocr_input/japan1.jpg"
text = "ννμ΄ μν° κ²λ μλͺ»μΈκ°μ. λ μ°¨κ°μ΄ λμμ λ°λ»ν μ¬μλ°. κ·Έλ₯ μ’μνλ¨ λ§λ μ λλκ°μ. μμ§νκ² λ λ§νκ³ μΆμ΄μ"
model_id = "deepseek-ai/deepseek-llm-7b-chat"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
def text_inference(text, language):
system_prompt = (
f"Given the following {language} text, extract all words in their base (dictionary) form, including verbs, adjectives, nouns, and particles. "
"Remove all duplicates. Return the base form words as a comma-separated list, and nothing else."
)
user_prompt = f"{system_prompt}\n\nText:\n{text}"
input_ids = tokenizer.apply_chat_template([{"role": "user", "content": user_prompt}], return_tensors="pt").to(model.device)
output_ids = model.generate(input_ids, max_new_tokens=256)
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Parse response: take last line, split by commas
last_line = output_text.strip().split("\n")[-1]
words = [w.strip() for w in last_line.split(",") if w.strip()]
return words
def ocr_inference(img, lang):
ocr = PaddleOCR(use_angle_cls=True, lang=lang,use_gpu=False)
img_path = img
result = ocr.ocr(img_path, cls=True)[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
return txts
def make_flashcards(words, language):
system_prompt = (
f"Given the following {language} words, define each word and create an example sentence using the word, with an explanation. do this for every word. Respond with the word, the definition, the example sentence, and the translation of the example sentence"
)
user_prompt = f"{system_prompt}\n\nText:\n{text}"
input_ids = tokenizer.apply_chat_template([{"role": "user", "content": user_prompt}], return_tensors="pt").to(model.device)
output_ids = model.generate(input_ids, max_new_tokens=256)
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Parse response: take last line, split by commas
last_line = output_text.strip().split("\n")[-1]
output = [w.strip() for w in last_line.split(":") if w.strip()]
return output
words=text_inference(text, "korean")
print("OUTPUT TOUT OUETOI EIFJ IEFJ",words)
print("flashcard output:",make_flashcards(words, "korean"))
|