Spaces:

TeLLMyStory
/

story-generation-docker

Paused

ThomasBlumet commited on Jan 11

Commit

38d893e

2 Parent(s): 1dd0e8c 10a6a3a

Merge branch 'main' of https://huggingface.co/spaces/TeLLMyStory/story-generation-docker

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,23 +14,23 @@ model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto",trust_
 #transfer model on GPU
 #model.to("cuda")
-# pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
-#         max_new_tokens=512,
-#         do_sample=True,
-#         temperature=0.7,
-#         top_p=0.95,
-#         top_k=40,
-#         repetition_penalty=1.1)
 # Generate text using the model and tokenizer
 #@spaces.GPU(duration=60)
 def generate_text(input_text):
-    input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
     #attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
-    output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
     #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
-    return tokenizer.decode(output[0])
-    #return pipe(input_text)[0]["generated_text"]
 interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
 interface.launch()

 #transfer model on GPU
 #model.to("cuda")
+pipe = pipeline("text-generation", model=model_name, tokenizer=tokenizer,
+        max_new_tokens=512,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.95,
+        top_k=40,
+        repetition_penalty=1.1)
 # Generate text using the model and tokenizer
 #@spaces.GPU(duration=60)
 def generate_text(input_text):
+    #input_ids = tokenizer.encode(input_text, return_tensors="pt")#.to("cuda")
     #attention_mask = input_ids.ne(tokenizer.pad_token_id).long()
+    #output = model.generate(input_ids, max_new_tokens=512, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)# attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
     #output = model.generate(input_ids) #, attention_mask=attention_mask, max_length=100, num_return_sequences=1, no_repeat_ngram_size=2, top_k=50, top_p=0.95, temperature=0.7, do_sample=True)
+    #return tokenizer.decode(output[0])
+    return pipe(input_text)[0]["generated_text"]
 interface = gr.Interface(fn=generate_text, inputs="text", outputs="text",title="TeLLMyStory",description="Enter your story idea and the model will generate the story based on it.")
 interface.launch()