Spaces:
Running
Running
fixed transcription
Browse files- __pycache__/analyze.cpython-310.pyc +0 -0
- __pycache__/process.cpython-310.pyc +0 -0
- __pycache__/transcription.cpython-310.pyc +0 -0
- app.py +9 -2
- static/feedback.js +0 -2
- transcription.py +14 -38
__pycache__/analyze.cpython-310.pyc
CHANGED
Binary files a/__pycache__/analyze.cpython-310.pyc and b/__pycache__/analyze.cpython-310.pyc differ
|
|
__pycache__/process.cpython-310.pyc
CHANGED
Binary files a/__pycache__/process.cpython-310.pyc and b/__pycache__/process.cpython-310.pyc differ
|
|
__pycache__/transcription.cpython-310.pyc
CHANGED
Binary files a/__pycache__/transcription.cpython-310.pyc and b/__pycache__/transcription.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -59,6 +59,7 @@ def reset_member():
|
|
59 |
global total_audio
|
60 |
print(total_audio)
|
61 |
process.delete_files_in_directory(total_audio)
|
|
|
62 |
try:
|
63 |
data = request.get_json()
|
64 |
if not data or "names" not in data:
|
@@ -97,11 +98,10 @@ def transcription():
|
|
97 |
global transcription_text
|
98 |
global total_audio
|
99 |
try:
|
100 |
-
audio_directory = transcripter.merge_segments(total_audio)
|
101 |
transcription_text = transcripter.create_transcription(audio_directory)
|
102 |
with open(transcription_text,'r',encoding='utf-8') as file:
|
103 |
file_content = file.read()
|
104 |
-
print(file_content)
|
105 |
return jsonify({'transcription': file_content}),200
|
106 |
except Exception as e:
|
107 |
return jsonify({"error": str(e)}),500
|
@@ -110,6 +110,13 @@ def transcription():
|
|
110 |
@app.route('/analyze',methods =['GET','POST'])
|
111 |
def analyze():
|
112 |
global transcription_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
analyzer = TextAnalyzer(transcription_text, harassment_keywords)
|
114 |
api_key = os.environ.get("DEEPSEEK")
|
115 |
if api_key is None:
|
|
|
59 |
global total_audio
|
60 |
print(total_audio)
|
61 |
process.delete_files_in_directory(total_audio)
|
62 |
+
process.delete_files_in_directory('/tmp/data/transcription_audio')
|
63 |
try:
|
64 |
data = request.get_json()
|
65 |
if not data or "names" not in data:
|
|
|
98 |
global transcription_text
|
99 |
global total_audio
|
100 |
try:
|
101 |
+
audio_directory = transcripter.merge_segments(total_audio,'/tmp/data/transcription_audio')
|
102 |
transcription_text = transcripter.create_transcription(audio_directory)
|
103 |
with open(transcription_text,'r',encoding='utf-8') as file:
|
104 |
file_content = file.read()
|
|
|
105 |
return jsonify({'transcription': file_content}),200
|
106 |
except Exception as e:
|
107 |
return jsonify({"error": str(e)}),500
|
|
|
110 |
@app.route('/analyze',methods =['GET','POST'])
|
111 |
def analyze():
|
112 |
global transcription_text
|
113 |
+
if transcription_text == "":
|
114 |
+
try:
|
115 |
+
audio_directory = transcripter.merge_segments(total_audio,'/tmp/data/transcription_audio')
|
116 |
+
transcription_text = transcripter.create_transcription(audio_directory)
|
117 |
+
except Exception as e:
|
118 |
+
return jsonify({'error making transcription': e})
|
119 |
+
|
120 |
analyzer = TextAnalyzer(transcription_text, harassment_keywords)
|
121 |
api_key = os.environ.get("DEEPSEEK")
|
122 |
if api_key is None:
|
static/feedback.js
CHANGED
@@ -15,8 +15,6 @@ async function getAnalysis() {
|
|
15 |
const loader = document.getElementById("loader");
|
16 |
loader.style.display = "block";
|
17 |
try {
|
18 |
-
await getTranscription();
|
19 |
-
|
20 |
const response = await fetch("/analyze");
|
21 |
if (!response.ok) {
|
22 |
throw new Error(`HTTP error! status: ${response.status}`);
|
|
|
15 |
const loader = document.getElementById("loader");
|
16 |
loader.style.display = "block";
|
17 |
try {
|
|
|
|
|
18 |
const response = await fetch("/analyze");
|
19 |
if (!response.ok) {
|
20 |
throw new Error(`HTTP error! status: ${response.status}`);
|
transcription.py
CHANGED
@@ -19,7 +19,6 @@ class TranscriptionMaker():
|
|
19 |
self.output_dir = output_dir
|
20 |
os.makedirs(self.output_dir, exist_ok=True)
|
21 |
|
22 |
-
|
23 |
#音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する
|
24 |
def create_transcription(self,audio_directory):
|
25 |
results = []
|
@@ -45,6 +44,7 @@ class TranscriptionMaker():
|
|
45 |
"end": segment.end,
|
46 |
"text": segment.text
|
47 |
})
|
|
|
48 |
#ファイルの書き込み。ファイル名は"transcription.txt"
|
49 |
output_file=os.path.join(self.output_dir,"transcription.txt")
|
50 |
try:
|
@@ -56,54 +56,30 @@ class TranscriptionMaker():
|
|
56 |
raise
|
57 |
return output_file
|
58 |
|
59 |
-
|
60 |
-
def merge_segments(self,segments_dir,output_dir
|
61 |
if not os.path.exists(output_dir):
|
62 |
os.makedirs(output_dir, exist_ok=True)
|
63 |
|
64 |
files = sorted([f for f in os.listdir(segments_dir) if f.endswith('.wav')])
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
69 |
|
70 |
for file in files:
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
# 番号が連続していない場合、新しいグループを作成
|
75 |
-
if previous_index is not None and file_index != previous_index + 1:
|
76 |
-
# 現在のグループを結合して保存
|
77 |
-
if current_group:
|
78 |
-
merged_files.append(current_group)
|
79 |
-
current_group = []
|
80 |
-
|
81 |
-
# 現在のファイルをグループに追加
|
82 |
-
current_group.append(file)
|
83 |
-
previous_index = file_index
|
84 |
|
85 |
-
|
86 |
-
if current_group:
|
87 |
-
merged_files.append(current_group)
|
88 |
-
|
89 |
-
# グループごとに結合して保存
|
90 |
-
for i, group in enumerate(merged_files):
|
91 |
-
combined_audio = AudioSegment.empty()
|
92 |
-
for file in group:
|
93 |
-
file_path = os.path.join(segments_dir, file)
|
94 |
-
segment = AudioSegment.from_file(file_path)
|
95 |
-
combined_audio += segment
|
96 |
-
# 出力ファイル名を設定して保存
|
97 |
-
output_file = os.path.join(output_dir, self.generate_filename(3))
|
98 |
-
combined_audio.export(output_file, format='wav')
|
99 |
|
|
|
100 |
return output_dir
|
101 |
-
|
102 |
-
def generate_random_string(self,length):
|
103 |
-
letters = string.ascii_letters + string.digits
|
104 |
-
return ''.join(random.choice(letters) for i in range(length))
|
105 |
|
106 |
-
def generate_filename(self
|
107 |
current_time = datetime.now().strftime("%Y%m%d%H%M%S")
|
108 |
filename = f"{current_time}.wav"
|
109 |
return filename
|
|
|
19 |
self.output_dir = output_dir
|
20 |
os.makedirs(self.output_dir, exist_ok=True)
|
21 |
|
|
|
22 |
#音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する
|
23 |
def create_transcription(self,audio_directory):
|
24 |
results = []
|
|
|
44 |
"end": segment.end,
|
45 |
"text": segment.text
|
46 |
})
|
47 |
+
|
48 |
#ファイルの書き込み。ファイル名は"transcription.txt"
|
49 |
output_file=os.path.join(self.output_dir,"transcription.txt")
|
50 |
try:
|
|
|
56 |
raise
|
57 |
return output_file
|
58 |
|
59 |
+
#ディレクトリ内の音声ファイルをくっつける
|
60 |
+
def merge_segments(self, segments_dir, output_dir="/tmp/data/merged_segment"):
|
61 |
if not os.path.exists(output_dir):
|
62 |
os.makedirs(output_dir, exist_ok=True)
|
63 |
|
64 |
files = sorted([f for f in os.listdir(segments_dir) if f.endswith('.wav')])
|
65 |
|
66 |
+
if len(files) <= 1:
|
67 |
+
print('No need to merge')
|
68 |
+
return output_dir
|
69 |
+
|
70 |
+
combined_audio = AudioSegment.empty()
|
71 |
|
72 |
for file in files:
|
73 |
+
file_path = os.path.join(segments_dir, file)
|
74 |
+
segment = AudioSegment.from_file(file_path)
|
75 |
+
combined_audio += segment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
+
output_file = os.path.join(output_dir, self.generate_filename())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
+
combined_audio.export(output_file, format="wav")
|
80 |
return output_dir
|
|
|
|
|
|
|
|
|
81 |
|
82 |
+
def generate_filename(self):
|
83 |
current_time = datetime.now().strftime("%Y%m%d%H%M%S")
|
84 |
filename = f"{current_time}.wav"
|
85 |
return filename
|