Spaces:

haepada
/

roots

Sleeping

App Files Files Community

roots / app.py

haepada

Update app.py

44d14de verified 8 months ago

raw

history blame

12.7 kB

	import gradio as gr
	import numpy as np
	import librosa
	from transformers import pipeline
	from datetime import datetime
	import os
	import requests

	# 환경변수에서 토큰 가져오기
	HF_API_TOKEN = os.getenv("roots")
	if not HF_API_TOKEN:
	raise ValueError("roots token not found in environment variables")

	# Inference API 설정
	API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-base-1.0"
	headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}

	# AI 모델 초기화
	speech_recognizer = pipeline(
	"automatic-speech-recognition",
	model="kresnik/wav2vec2-large-xlsr-korean"
	)
	text_analyzer = pipeline(
	"sentiment-analysis",
	model="nlptown/bert-base-multilingual-uncased-sentiment"
	)

	def map_acoustic_to_emotion(features):
	"""음향학적 특성을 감정으로 매핑"""
	# 음성 특성 정규화
	energy_norm = min(features["energy"] * 100, 100) # 에너지 레벨 (0-100)
	tempo_norm = min(features["tempo"] / 200, 1) # 템포 정규화 (0-1)
	pitch_norm = min(features["pitch"] * 2, 1) # 피치 정규화 (0-1)

	# 상세 감정 분석
	emotions = {
	"primary": "",
	"intensity": energy_norm,
	"confidence": 0.0,
	"secondary": "",
	"characteristics": []
	}

	# 주요 감정 결정
	if energy_norm > 70:
	if tempo_norm > 0.6:
	emotions["primary"] = "기쁨/열정"
	emotions["characteristics"].append("빠르고 활기찬 말하기 패턴")
	else:
	emotions["primary"] = "분노/강조"
	emotions["characteristics"].append("강한 음성 강도")
	emotions["confidence"] = energy_norm / 100

	elif pitch_norm > 0.6:
	if energy_norm > 50:
	emotions["primary"] = "놀람/흥분"
	emotions["characteristics"].append("높은 음고와 강한 강세")
	else:
	emotions["primary"] = "관심/호기심"
	emotions["characteristics"].append("음고 변화가 큼")
	emotions["confidence"] = pitch_norm

	elif energy_norm < 30:
	if tempo_norm < 0.4:
	emotions["primary"] = "슬픔/우울"
	emotions["characteristics"].append("느리고 약한 음성")
	else:
	emotions["primary"] = "피로/무기력"
	emotions["characteristics"].append("낮은 에너지 레벨")
	emotions["confidence"] = (30 - energy_norm) / 30

	else:
	if tempo_norm > 0.5:
	emotions["primary"] = "평온/안정"
	emotions["characteristics"].append("균형잡힌 말하기 패턴")
	else:
	emotions["primary"] = "차분/진지"
	emotions["characteristics"].append("안정적인 음성 특성")
	emotions["confidence"] = 0.5

	# 음성 특성 상세 분석
	emotions["details"] = {
	"energy_level": f"{energy_norm:.1f}%",
	"speech_rate": f"{'빠름' if tempo_norm > 0.6 else '보통' if tempo_norm > 0.4 else '느림'}",
	"pitch_variation": f"{'높음' if pitch_norm > 0.6 else '보통' if pitch_norm > 0.3 else '낮음'}",
	"voice_volume": f"{'큼' if features['volume'] > 0.7 else '보통' if features['volume'] > 0.3 else '작음'}"
	}

	return emotions

	def generate_image_from_prompt(prompt):
	"""이미지 생성 함수"""
	print(f"Generating image with prompt: {prompt}")
	try:
	if not prompt:
	print("No prompt provided")
	return None

	response = requests.post(
	API_URL,
	headers=headers,
	json={
	"inputs": prompt,
	"parameters": {
	"negative_prompt": "ugly, blurry, poor quality, distorted",
	"num_inference_steps": 30,
	"guidance_scale": 7.5
	}
	}
	)

	if response.status_code == 200:
	print("Image generated successfully")
	return response.content
	else:
	print(f"Error: {response.status_code}")
	print(f"Response: {response.text}")
	return None

	except Exception as e:
	print(f"Error generating image: {str(e)}")
	return None

	def generate_detailed_prompt(text, emotions, text_sentiment):
	"""감정 기반 상세 프롬프트 생성"""
	emotion_colors = {
	"기쁨/열정": "밝은 노랑과 따뜻한 주황색",
	"분노/강조": "강렬한 빨강과 짙은 검정",
	"놀람/흥분": "선명한 파랑과 밝은 보라",
	"관심/호기심": "연한 하늘색과 민트색",
	"슬픔/우울": "어두운 파랑과 회색",
	"피로/무기력": "탁한 갈색과 짙은 회색",
	"평온/안정": "부드러운 초록과 베이지",
	"차분/진지": "차분한 남색과 깊은 보라"
	}

	# 감정 강도에 따른 시각적 표현
	if emotions["intensity"] > 70:
	visual_style = "역동적인 붓질과 강한 대비"
	elif emotions["intensity"] > 40:
	visual_style = "균형잡힌 구도와 중간 톤의 조화"
	else:
	visual_style = "부드러운 그라데이션과 차분한 톤"

	# 프롬프트 구성
	prompt = f"한국 전통 민화 스타일의 추상화, {emotion_colors.get(emotions['primary'], '자연스러운 색상')} 기반. "
	prompt += f"{visual_style}로 표현된 {emotions['primary']}의 감정. "
	prompt += f"음성의 특징({', '.join(emotions['characteristics'])})을 화면의 동적 요소로 표현. "
	prompt += f"발화 내용 '{text}'에서 느껴지는 감정(강도: {text_sentiment['score']}/5)을 은유적 이미지로 담아내기."

	return prompt

	def create_interface():
	with gr.Blocks(theme=gr.themes.Soft()) as app:
	state = gr.State({
	"user_name": "",
	"reflections": [],
	"voice_analysis": None,
	"final_prompt": ""
	})

	# 헤더
	header = gr.Markdown("# 디지털 굿판")
	user_display = gr.Markdown("")

	with gr.Tabs() as tabs:
	# 입장
	with gr.Tab("입장"):
	gr.Markdown("""# 디지털 굿판에 오신 것을 환영합니다""")
	name_input = gr.Textbox(label="이름을 알려주세요")
	start_btn = gr.Button("여정 시작하기")

	# 청신
	with gr.Tab("청신"):
	with gr.Row():
	audio_path = os.path.abspath(os.path.join("assets", "main_music.mp3"))
	audio = gr.Audio(
	value=audio_path,
	type="filepath",
	label="온천천의 소리",
	interactive=False,
	autoplay=True
	)
	with gr.Column():
	reflection_input = gr.Textbox(
	label="현재 순간의 감상을 적어주세요",
	lines=3
	)
	save_btn = gr.Button("감상 저장하기")
	reflections_display = gr.Dataframe(
	headers=["시간", "감상", "감정 분석"],
	label="기록된 감상들"
	)

	# 기원
	with gr.Tab("기원"):
	gr.Markdown("## 기원 - 목소리로 전하기")
	with gr.Row():
	with gr.Column():
	voice_input = gr.Audio(
	label="나누고 싶은 이야기를 들려주세요",
	sources=["microphone"],
	type="filepath",
	interactive=True
	)
	clear_btn = gr.Button("녹음 지우기")

	with gr.Column():
	transcribed_text = gr.Textbox(
	label="인식된 텍스트",
	interactive=False
	)
	voice_emotion = gr.Textbox(
	label="음성 감정 분석",
	interactive=False
	)
	text_emotion = gr.Textbox(
	label="텍스트 감정 분석",
	interactive=False
	)
	analyze_btn = gr.Button("분석하기")

	# 송신
	with gr.Tab("송신"):
	gr.Markdown("## 송신 - 시각화 결과")
	with gr.Column():
	final_prompt = gr.Textbox(
	label="생성된 프롬프트",
	interactive=False,
	lines=3
	)
	generate_btn = gr.Button("이미지 생성하기")
	result_image = gr.Image(
	label="생성된 이미지",
	type="pil"
	)

	# 인터페이스 함수들
	def start_journey(name):
	"""여정 시작"""
	return f"# 환영합니다, {name}님의 디지털 굿판", gr.update(selected="청신")

	def clear_voice_input():
	"""음성 입력 초기화"""
	return None

	def analyze_voice(audio_path, state):
	"""음성 분석"""
	if audio_path is None:
	return state, "음성을 먼저 녹음해주세요.", "", "", ""

	try:
	y, sr = librosa.load(audio_path, sr=16000)

	# 음향학적 특성 분석
	acoustic_features = {
	"energy": float(np.mean(librosa.feature.rms(y=y))),
	"tempo": float(librosa.beat.tempo(y)[0]),
	"pitch": float(np.mean(librosa.feature.zero_crossing_rate(y))),
	"volume": float(np.mean(np.abs(y)))
	}

	# 감정 분석
	emotions = map_acoustic_to_emotion(acoustic_features)

	# 음성 인식
	transcription = speech_recognizer(y)
	text = transcription["text"]

	# 텍스트 감정 분석
	text_sentiment = text_analyzer(text)[0]

	# 결과 포맷팅
	voice_result = (
	f"음성 감정: {emotions['primary']} "
	f"(강도: {emotions['intensity']:.1f}%, 신뢰도: {emotions['confidence']:.2f})\n"
	f"특징: {', '.join(emotions['characteristics'])}\n"
	f"상세 분석:\n"
	f"- 에너지 레벨: {emotions['details']['energy_level']}\n"
	f"- 말하기 속도: {emotions['details']['speech_rate']}\n"
	f"- 음높이 변화: {emotions['details']['pitch_variation']}\n"
	f"- 음성 크기: {emotions['details']['voice_volume']}"
	)

	text_result = f"텍스트 감정 분석 (1-5): {text_sentiment['score']}"

	# 프롬프트 생성
	prompt = generate_detailed_prompt(text, emotions, text_sentiment)

	return state, text, voice_result, text_result, prompt
	except Exception as e:
	return state, f"오류 발생: {str(e)}", "", "", ""

	# 이벤트 연결
	start_btn.click(
	fn=lambda name: (f"# 환영합니다, {name}님의 디지털 굿판", gr.update(selected="청신")),
	inputs=[name_input],
	outputs=[user_display, tabs]
	)

	save_btn.click(
	fn=save_reflection,
	inputs=[reflection_input, state],
	outputs=[state, reflections_display]
	)

	clear_btn.click(
	fn=clear_voice_input,
	inputs=[],
	outputs=[voice_input]
	)

	analyze_btn.click(
	fn=analyze_voice,
	inputs=[voice_input, state],
	outputs=[state, transcribed_text, voice_emotion, text_emotion, final_prompt]
	)

	generate_btn.click(
	fn=generate_image_from_prompt,
	inputs=[final_prompt],
	outputs=[result_image]
	)

	if __name__ == "__main__":
	demo = create_interface()
	demo.launch(debug=True)