skaltenp commited on
Commit
03281f9
·
verified ·
1 Parent(s): 2347b5f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import torch
4
+ from transformers import AutoPeftModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
+ from datasets import load_dataset
6
+ from huggingface_hub import login
7
+
8
+ login(token=os.environ.get('HF_TOKEN', None))
9
+
10
+ model_name = "skaltenp/Meta-Llama-3-8B-sepsis_cases-199900595"
11
+
12
+ """
13
+ bnb_config = BitsAndBytesConfig(
14
+ load_in_4bit=True,
15
+ bnb_4bit_quant_type="nf4",
16
+ bnb_4bit_compute_dtype=torch.float16,
17
+ )
18
+ base_model = AutoPeftModelForCausalLM.from_pretrained(
19
+ model_name,
20
+ quantization_config=bnb_config,
21
+ device_map="cuda:0",
22
+ trust_remote_code=True,
23
+ #token=True,
24
+ )
25
+ """
26
+ model = AutoPeftModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
27
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
28
+ tokenizer.pad_token_id = tokenizer.eos_token_id
29
+
30
+ train = load_dataset("skaltenp/sepsis_cases")["train"]
31
+
32
+ def prepare_sample_text(example, tokenizer, remove_indent=False, start=None, end=None):
33
+ """Prepare the text from a sample of the dataset."""
34
+ thread = example["event_list"]
35
+ if start and end:
36
+ thread = thread[start:end]
37
+ text = ""
38
+ for message in thread:
39
+ text += f"{message}{tokenizer.eos_token}\n"
40
+ return text
41
+
42
+ dataset = load_dataset(
43
+ args.dataset_name,
44
+ token=True,
45
+ num_proc=args.num_workers,
46
+ download_mode='force_redownload'
47
+ )
48
+ train_data = dataset["train"].train_test_split(train_size=0.8, shuffle=True, seed=199900595)
49
+ test_data = train_data["test"]
50
+ train_data = train_data["train"].train_test_split(train_size=0.8, shuffle=True, seed=199900595)
51
+ valid_data = train_data["test"]
52
+ train_data = train_data["train"]
53
+
54
+ def generate_answer(question):
55
+ #inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
56
+ inputs = tokenizer(question, return_tensors="pt")
57
+ outputs = model.generate(**inputs, max_length=250, num_return_sequences=1, do_sample=True)
58
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
+ return answer
60
+
61
+ iface = gr.Interface(
62
+ fn=generate_answer,
63
+ inputs="text",
64
+ outputs="text",
65
+ title="Straight Outta Logs",
66
+ examples = [prepare_sample_text(test_data[0], tokenizer, start=0, end=3), prepare_sample_text(test_data[4], tokenizer, start=0, end=5), prepare_sample_text(test_data[50], tokenizer, start=0, end=1)]
67
+ description="Use the examples or copy own Sepsis Case example",
68
+ )
69
+
70
+ iface.launch(share=True) # Deploy the interface