Spaces:

nicolasni1977
/

Talking_Head_Generator

Runtime error

File size: 4,701 Bytes

efe586f

from config import *
from speech import generate_speech
# from image import generate_image

from animate_face import animate_face
from lips import modify_lips
from improve import improve, vid2frames, restore_frames

import humanize
import datetime as dt
from argparse import ArgumentParser

import shutil
import os
import glob


# message = """Over the holiday season, capturing photos and videos of the festivities with family and friends 
# is an important activity for many. The iPhone has a suite of camera features that can significantly elevate 
# the quality and creativity of your holiday photos and videos."""
# message = """Happy 2024!Here's to a year where your coffee is as strong as your WiFi signal, your resolutions last longer than your phone battery. Let's make 2024 the year we all perfectly time our microwaves to stop at 1 second, just to feel like defusing a bomb. Happy New Year!!"""

def Persona(message,voice,speed,imgfile,driverfile):
	parser = ArgumentParser()
	parser.add_argument("--improve", action="store_true", help="use Real ESRGAN to improve the video")
	parser.add_argument("--skipgen", action="store_true", help="improve the video only")
	parser.add_argument("--path_id", default=str(int(time.time())), help="set the path id to use")
	parser.add_argument("--speech", default=audiofile, help="path to WAV speech file")
	parser.add_argument("--image", default=imgfile, help="path to avatar file")
	args = parser.parse_args()
	tstart = time.time()

	## SET PATH
	path_id = args.path_id
	path = os.path.join("temp", path_id)
	print("path_id:", path_id, "path:", path)
	os.makedirs(path, exist_ok=True)
	outfile = os.path.join("results", path_id + "_small.mp4")
	finalfile = os.path.join("results", path_id + "_large.mp4")

	if not args.skipgen:
    	## GENERATE SPEECH	
		tspeech = "None"
		if args.speech == audiofile:
			print("-----------------------------------------")
			print("generating speech")
			t0 = time.time()
			generate_speech(path, audiofile, voice , message, speed)
			tspeech = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t0)))
			print("\ngenerating speech:", tspeech)
		else:
			print("using:", args.speech)
			shutil.copy(args.speech, os.path.abspath(path))

		
		print("-------------Copy Image File----------------------------")
		
		if os.path.abspath(args.image) != os.path.abspath(imgfile):
			shutil.copy(args.image, imgfile)
		else:
			print(f"Skipped copying as '{args.image}' and '{imgfile}' are the same file.")

		## ANIMATE AVATAR IMAGE

		print("-----------------------------------------")
		print("animating face with driver")
		t2 = time.time()	
		# audiofile determines the length of the driver movie to trim
		# driver movie is imposed on the image file to produce the animated file
		animate_face(path_id, audiofile, driverfile, imgfile, animatedfile)
		tanimate = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t2)))
		print("\nanimating face:", tanimate)

		## MODIFY LIPS TO FIT THE SPEECH

		print("-----------------------------------------")
		print("modifying lips")
		t3 = time.time()
		os.makedirs("results", exist_ok=True)
		
		modify_lips(path_id, audiofile, animatedfile, outfile)
		tlips = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t3)))
		print("\nmodifying lips:", tlips)

	## IMPROVE THE OUTPUT VIDEO
	if args.improve:
		t4 = time.time()
		print("-----------------------------------------")
		print("converting video to frames")
		shutil.rmtree(os.path.join(path, "improve"), ignore_errors=True)
		os.makedirs(os.path.join(path, "improve", "disassembled"), exist_ok=True)
		os.makedirs(os.path.join(path, "improve", "improved"), exist_ok=True)	
		
		vid2frames(outfile, os.path.join(path, "improve", "disassembled"))
		print("-----------------------------------------")
		print("improving face")
		improve(os.path.join(path, "improve", "disassembled"), os.path.join(path, "improve", "improved"))
		print("-----------------------------------------")
		print("restoring frames")
		
		restore_frames(os.path.join(path, audiofile), finalfile, os.path.join(path, "improve", "improved"))		
		timprove = humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - t4)))
		print("\nimproving video:", timprove)
	
	print("done")
	print("Overall timing")
	print("--------------")
	if not args.skipgen:
		# print("generating speech:", tspeech)
		# print("generating avatar image:", timage)
		print("animating face:", tanimate)
		print("modifying lips:", tlips)
	if args.improve:
		print("improving finished video:", timprove)
	print("total time:", humanize.naturaldelta(dt.timedelta(seconds=int(time.time() - tstart))))

	return outfile

# if __name__ == '__main__':
# 	main()