ThomasBlumet commited on
Commit
38d893e
·
2 Parent(s): 1dd0e8c 10a6a3a

Merge branch 'main' of https://huggingface.co/spaces/TeLLMyStory/story-generation-docker

Browse files
Files changed (1) hide show
  1. app.py +11 -11
app.py CHANGED
@@ -14,23 +14,23 @@ model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_
14
 
15
  #transfer model on GPU
16
  #model.to("cuda")
17
- # pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
18
- # max_new_tokens=512,
19
- # do_sample=True,
20
- # temperature=0.7,
21
- # top_p=0.95,
22
- # top_k=40,
23
- # repetition_penalty=1.1)
24
 
25
  # Generate text using the model and tokenizer
26
  #@spaces.GPU(duration=60)
27
  def generate_text(input_text):
28
- input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
29
  #attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
30
- output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
31
  #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
32
- return tokenizer.decode(output[0])
33
- #return pipe(input_text)[0]["generated_text"]
34
 
35
  interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
36
  interface.launch()
 
14
 
15
  #transfer model on GPU
16
  #model.to("cuda")
17
+ pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
18
+ max_new_tokens=512,
19
+ do_sample=True,
20
+ temperature=0.7,
21
+ top_p=0.95,
22
+ top_k=40,
23
+ repetition_penalty=1.1)
24
 
25
  # Generate text using the model and tokenizer
26
  #@spaces.GPU(duration=60)
27
  def generate_text(input_text):
28
+ #input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
29
  #attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
30
+ #output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
31
  #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
32
+ #return tokenizer.decode(output[0])
33
+ return pipe(input_text)[0]["generated_text"]
34
 
35
  interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
36
  interface.launch()