abdoelsayed commited on
Commit
55e3d6c
·
1 Parent(s): 2dbfde8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +75 -1
README.md CHANGED
@@ -1 +1,75 @@
1
- "hello"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: llama2
3
+ language:
4
+ - en
5
+ - ar
6
+ metrics:
7
+ - accuracy
8
+ - f1
9
+ ---
10
+ # llama-7b-v1-Receipt-Key-Extraction
11
+
12
+ llama-7b-v1-Receipt-Key-Extraction is a 7 billion parameter based on LLamA v1
13
+
14
+
15
+ ## Uses
16
+
17
+ The model is intended for research-only use in English and Arabic for key information extraction for items in receipts.
18
+
19
+ ## How to Get Started with the Model
20
+
21
+ Use the code below to get started with the model.
22
+
23
+ ```bibtex
24
+ # pip install -q transformers
25
+
26
+ import torch
27
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
28
+
29
+ checkpoint = "abdoelsayed/llama-7b-v1-Receipt-Key-Extraction"
30
+ device = "cuda" if torch.cuda.is_available() else "cpu"
31
+
32
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint, model_max_length=512,
33
+ padding_side="right",
34
+ use_fast=False,)
35
+ model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
36
+
37
+ def generate_response(instruction, input_text, max_new_tokens=100, temperature=0.1, num_beams=4 ,top_k=40):
38
+ prompt = f"Below is an instruction that describes a task, paired with an input that provides further context.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:"
39
+ inputs = tokenizer(prompt, return_tensors="pt")
40
+ input_ids = inputs["input_ids"].to(device)
41
+ generation_config = GenerationConfig(
42
+ temperature=temperature,
43
+ top_p=top_p,
44
+ top_k=top_k,
45
+ num_beams=num_beams,
46
+ )
47
+ with torch.no_grad():
48
+ outputs = model.generate(input_ids,generation_config=generation_config, max_new_tokens=max_new_tokens)
49
+ outputs = tokenizer.decode(outputs.sequences[0])
50
+ return output.split("### Response:")[-1].strip().replace("</s>","")
51
+
52
+ instruction = "Extract the class, Brand, Weight, Number of units, Size of units, Price, T.Price, Pack, Unit from the following sentence"
53
+ input_text = "Americana Okra zero 400 gm"
54
+
55
+ response = generate_response(instruction, input_text)
56
+ print(response)
57
+
58
+ ```
59
+
60
+
61
+
62
+ ## How to Cite
63
+
64
+ Please cite this model using this format.
65
+
66
+ ```bibtex
67
+ @misc{abdallah2023amurd,
68
+ title={AMuRD: Annotated Multilingual Receipts Dataset for Cross-lingual Key Information Extraction and Classification},
69
+ author={Abdelrahman Abdallah and Mahmoud Abdalla and Mohamed Elkasaby and Yasser Elbendary and Adam Jatowt},
70
+ year={2023},
71
+ eprint={2309.09800},
72
+ archivePrefix={arXiv},
73
+ primaryClass={cs.CL}
74
+ }
75
+ ```