Spaces:

haepada
/

roots

Running

App Files Files Community

roots / app.py

haepada

Update app.py

8b6ea6c verified 5 months ago

raw

history blame

8.46 kB

	import gradio as gr
	import numpy as np
	import librosa
	from transformers import pipeline
	from datetime import datetime
	import os
	import requests

	# Inference API 설정
	API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0"
	headers = {"Authorization": "Bearer hf_..."} # 여기에 HuggingFace 토큰 입력

	# AI 모델 초기화
	speech_recognizer = pipeline(
	"automatic-speech-recognition",
	model="kresnik/wav2vec2-large-xlsr-korean"
	)
	emotion_classifier = pipeline(
	"audio-classification",
	model="MIT/ast-finetuned-speech-commands-v2"
	)
	text_analyzer = pipeline(
	"sentiment-analysis",
	model="nlptown/bert-base-multilingual-uncased-sentiment"
	)

	def create_interface():
	with gr.Blocks(theme=gr.themes.Soft()) as app:
	state = gr.State({
	"user_name": "",
	"reflections": [],
	"voice_analysis": None,
	"final_prompt": ""
	})
	def generate_image_from_prompt(prompt):
	"""HuggingFace Inference API를 통한 이미지 생성"""
	try:
	response = requests.post(API_URL, headers=headers, json={
	"inputs": prompt,
	"parameters": {
	"negative_prompt": "ugly, blurry, poor quality, distorted",
	"num_inference_steps": 30,
	"guidance_scale": 7.5
	}
	})

	if response.status_code == 200:
	return response.content # 바이너리 이미지 데이터 반환
	else:
	return None
	except Exception as e:
	print(f"Error generating image: {e}")
	return None



	# 헤더
	header = gr.Markdown("# 디지털 굿판")
	user_display = gr.Markdown("")

	with gr.Tabs() as tabs:
	# 입장
	with gr.Tab("입장"):
	gr.Markdown("""# 디지털 굿판에 오신 것을 환영합니다""")
	name_input = gr.Textbox(label="이름을 알려주세요")
	start_btn = gr.Button("여정 시작하기")

	# 청신
	with gr.Tab("청신"):
	with gr.Row():
	# 절대 경로로 변경
	audio_path = os.path.abspath(os.path.join("assets", "main_music.mp3"))
	audio = gr.Audio(
	value=audio_path,
	type="filepath",
	label="온천천의 소리",
	interactive=False,
	autoplay=True
	)
	with gr.Column():
	reflection_input = gr.Textbox(
	label="현재 순간의 감상을 적어주세요",
	lines=3
	)
	save_btn = gr.Button("감상 저장하기")
	reflections_display = gr.Dataframe(
	headers=["시간", "감상", "감정 분석"],
	label="기록된 감상들"
	)

	# 기원
	with gr.Tab("기원"):
	gr.Markdown("## 기원 - 목소리로 전하기")
	with gr.Row():
	with gr.Column():
	voice_input = gr.Audio(
	label="나누고 싶은 이야기를 들려주세요",
	sources=["microphone"],
	type="filepath",
	interactive=True
	)
	clear_btn = gr.Button("녹음 지우기")

	with gr.Column():
	transcribed_text = gr.Textbox(
	label="인식된 텍스트",
	interactive=False
	)
	voice_emotion = gr.Textbox(
	label="음성 감정 분석",
	interactive=False
	)
	text_emotion = gr.Textbox(
	label="텍스트 감정 분석",
	interactive=False
	)
	analyze_btn = gr.Button("분석하기")

	# 이벤트 핸들러 추가
	generate_btn.click(
	fn=generate_image_from_prompt,
	inputs=[final_prompt],
	outputs=[result_image]
	)
	# 송신 탭 부분 수정
	with gr.Tab("송신"):
	gr.Markdown("## 송신 - 시각화 결과")
	with gr.Column():
	final_prompt = gr.Textbox(
	label="생성된 프롬프트",
	interactive=False,
	lines=3
	)
	generate_btn = gr.Button("이미지 생성하기")
	result_image = gr.Image(
	label="생성된 이미지",
	type="pil"
	)

	def clear_voice_input():
	"""음성 입력 초기화"""
	return None

	def analyze_voice(audio_path, state):
	"""음성 분석"""
	if audio_path is None:
	return state, "음성을 먼저 녹음해주세요.", "", "", ""

	try:
	# 오디오 로드
	y, sr = librosa.load(audio_path, sr=16000)

	# 음성 인식
	transcription = speech_recognizer(y)
	text = transcription["text"]

	# 감정 분석
	voice_emotions = emotion_classifier(y)
	text_sentiment = text_analyzer(text)[0]

	# 프롬프트 생성
	prompt = generate_prompt(text, voice_emotions[0], text_sentiment)

	return (
	state,
	text,
	f"음성 감정: {voice_emotions[0]['label']} ({voice_emotions[0]['score']:.2f})",
	f"텍스트 감정: {text_sentiment['label']} ({text_sentiment['score']:.2f})",
	prompt
	)
	except Exception as e:
	return state, f"오류 발생: {str(e)}", "", "", ""

	def generate_prompt(text, voice_emotion, text_sentiment):
	"""프롬프트 생성"""
	emotion_colors = {
	"happy": "따뜻한 노란색과 주황색",
	"sad": "깊은 파랑색과 보라색",
	"angry": "강렬한 빨강색과 검정색",
	"neutral": "부드러운 회색과 베이지색"
	}

	color = emotion_colors.get(voice_emotion['label'], "자연스러운 색상")
	prompt = f"한국 전통 민화 스타일의 추상화, {color} 사용. "
	prompt += f"음성의 감정({voice_emotion['label']})과 텍스트의 감정({text_sentiment['label']})이 조화를 이루며, "
	prompt += f"음성의 특징을 반영한 동적인 구도. 발화 내용: '{text}'"

	return prompt

	def save_reflection(text, state):
	"""감상 저장"""
	if not text.strip():
	return state, state["reflections"]

	current_time = datetime.now().strftime("%H:%M:%S")
	sentiment = text_analyzer(text)[0]
	new_reflection = [current_time, text, f"{sentiment['label']} ({sentiment['score']:.2f})"]

	if "reflections" not in state:
	state["reflections"] = []

	state["reflections"].append(new_reflection)
	return state, state["reflections"]

	# 이벤트 연결
	start_btn.click(
	fn=lambda name: (f"# 환영합니다, {name}님의 디지털 굿판", gr.update(selected="청신")),
	inputs=[name_input],
	outputs=[user_display, tabs]
	)

	save_btn.click(
	fn=save_reflection,
	inputs=[reflection_input, state],
	outputs=[state, reflections_display]
	)

	clear_btn.click(
	fn=clear_voice_input,
	inputs=[],
	outputs=[voice_input]
	)

	analyze_btn.click(
	fn=analyze_voice,
	inputs=[voice_input, state],
	outputs=[state, transcribed_text, voice_emotion, text_emotion, final_prompt]
	)

	return app

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()