A-yum1 commited on
Commit
34cbda6
·
1 Parent(s): c106db4

fixed transcription

Browse files
__pycache__/analyze.cpython-310.pyc CHANGED
Binary files a/__pycache__/analyze.cpython-310.pyc and b/__pycache__/analyze.cpython-310.pyc differ
 
__pycache__/process.cpython-310.pyc CHANGED
Binary files a/__pycache__/process.cpython-310.pyc and b/__pycache__/process.cpython-310.pyc differ
 
__pycache__/transcription.cpython-310.pyc CHANGED
Binary files a/__pycache__/transcription.cpython-310.pyc and b/__pycache__/transcription.cpython-310.pyc differ
 
app.py CHANGED
@@ -59,6 +59,7 @@ def reset_member():
59
  global total_audio
60
  print(total_audio)
61
  process.delete_files_in_directory(total_audio)
 
62
  try:
63
  data = request.get_json()
64
  if not data or "names" not in data:
@@ -97,11 +98,10 @@ def transcription():
97
  global transcription_text
98
  global total_audio
99
  try:
100
- audio_directory = transcripter.merge_segments(total_audio)
101
  transcription_text = transcripter.create_transcription(audio_directory)
102
  with open(transcription_text,'r',encoding='utf-8') as file:
103
  file_content = file.read()
104
- print(file_content)
105
  return jsonify({'transcription': file_content}),200
106
  except Exception as e:
107
  return jsonify({"error": str(e)}),500
@@ -110,6 +110,13 @@ def transcription():
110
  @app.route('/analyze',methods =['GET','POST'])
111
  def analyze():
112
  global transcription_text
 
 
 
 
 
 
 
113
  analyzer = TextAnalyzer(transcription_text, harassment_keywords)
114
  api_key = os.environ.get("DEEPSEEK")
115
  if api_key is None:
 
59
  global total_audio
60
  print(total_audio)
61
  process.delete_files_in_directory(total_audio)
62
+ process.delete_files_in_directory('/tmp/data/transcription_audio')
63
  try:
64
  data = request.get_json()
65
  if not data or "names" not in data:
 
98
  global transcription_text
99
  global total_audio
100
  try:
101
+ audio_directory = transcripter.merge_segments(total_audio,'/tmp/data/transcription_audio')
102
  transcription_text = transcripter.create_transcription(audio_directory)
103
  with open(transcription_text,'r',encoding='utf-8') as file:
104
  file_content = file.read()
 
105
  return jsonify({'transcription': file_content}),200
106
  except Exception as e:
107
  return jsonify({"error": str(e)}),500
 
110
  @app.route('/analyze',methods =['GET','POST'])
111
  def analyze():
112
  global transcription_text
113
+ if transcription_text == "":
114
+ try:
115
+ audio_directory = transcripter.merge_segments(total_audio,'/tmp/data/transcription_audio')
116
+ transcription_text = transcripter.create_transcription(audio_directory)
117
+ except Exception as e:
118
+ return jsonify({'error making transcription': e})
119
+
120
  analyzer = TextAnalyzer(transcription_text, harassment_keywords)
121
  api_key = os.environ.get("DEEPSEEK")
122
  if api_key is None:
static/feedback.js CHANGED
@@ -15,8 +15,6 @@ async function getAnalysis() {
15
  const loader = document.getElementById("loader");
16
  loader.style.display = "block";
17
  try {
18
- await getTranscription();
19
-
20
  const response = await fetch("/analyze");
21
  if (!response.ok) {
22
  throw new Error(`HTTP error! status: ${response.status}`);
 
15
  const loader = document.getElementById("loader");
16
  loader.style.display = "block";
17
  try {
 
 
18
  const response = await fetch("/analyze");
19
  if (!response.ok) {
20
  throw new Error(`HTTP error! status: ${response.status}`);
transcription.py CHANGED
@@ -19,7 +19,6 @@ class TranscriptionMaker():
19
  self.output_dir = output_dir
20
  os.makedirs(self.output_dir, exist_ok=True)
21
 
22
-
23
  #音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する
24
  def create_transcription(self,audio_directory):
25
  results = []
@@ -45,6 +44,7 @@ class TranscriptionMaker():
45
  "end": segment.end,
46
  "text": segment.text
47
  })
 
48
  #ファイルの書き込み。ファイル名は"transcription.txt"
49
  output_file=os.path.join(self.output_dir,"transcription.txt")
50
  try:
@@ -56,54 +56,30 @@ class TranscriptionMaker():
56
  raise
57
  return output_file
58
 
59
- #ファイル名が連続しているならくっつける
60
- def merge_segments(self,segments_dir,output_dir = "/tmp/data/merged_segment"):
61
  if not os.path.exists(output_dir):
62
  os.makedirs(output_dir, exist_ok=True)
63
 
64
  files = sorted([f for f in os.listdir(segments_dir) if f.endswith('.wav')])
65
 
66
- merged_files = []
67
- current_group = []
68
- previous_index = None
 
 
69
 
70
  for file in files:
71
- # ファイル名から番号を抽出(例: "0.wav" -> 0)
72
- file_index = int(file.split('.')[0])
73
-
74
- # 番号が連続していない場合、新しいグループを作成
75
- if previous_index is not None and file_index != previous_index + 1:
76
- # 現在のグループを結合して保存
77
- if current_group:
78
- merged_files.append(current_group)
79
- current_group = []
80
-
81
- # 現在のファイルをグループに追加
82
- current_group.append(file)
83
- previous_index = file_index
84
 
85
- # 最後のグループを追加
86
- if current_group:
87
- merged_files.append(current_group)
88
-
89
- # グループごとに結合して保存
90
- for i, group in enumerate(merged_files):
91
- combined_audio = AudioSegment.empty()
92
- for file in group:
93
- file_path = os.path.join(segments_dir, file)
94
- segment = AudioSegment.from_file(file_path)
95
- combined_audio += segment
96
- # 出力ファイル名を設定して保存
97
- output_file = os.path.join(output_dir, self.generate_filename(3))
98
- combined_audio.export(output_file, format='wav')
99
 
 
100
  return output_dir
101
-
102
- def generate_random_string(self,length):
103
- letters = string.ascii_letters + string.digits
104
- return ''.join(random.choice(letters) for i in range(length))
105
 
106
- def generate_filename(self,random_length):
107
  current_time = datetime.now().strftime("%Y%m%d%H%M%S")
108
  filename = f"{current_time}.wav"
109
  return filename
 
19
  self.output_dir = output_dir
20
  os.makedirs(self.output_dir, exist_ok=True)
21
 
 
22
  #音声ファイルのディレクトリを受け取り、書き起こしファイルを作成する
23
  def create_transcription(self,audio_directory):
24
  results = []
 
44
  "end": segment.end,
45
  "text": segment.text
46
  })
47
+
48
  #ファイルの書き込み。ファイル名は"transcription.txt"
49
  output_file=os.path.join(self.output_dir,"transcription.txt")
50
  try:
 
56
  raise
57
  return output_file
58
 
59
+ #ディレクトリ内の音声ファイルをくっつける
60
+ def merge_segments(self, segments_dir, output_dir="/tmp/data/merged_segment"):
61
  if not os.path.exists(output_dir):
62
  os.makedirs(output_dir, exist_ok=True)
63
 
64
  files = sorted([f for f in os.listdir(segments_dir) if f.endswith('.wav')])
65
 
66
+ if len(files) <= 1:
67
+ print('No need to merge')
68
+ return output_dir
69
+
70
+ combined_audio = AudioSegment.empty()
71
 
72
  for file in files:
73
+ file_path = os.path.join(segments_dir, file)
74
+ segment = AudioSegment.from_file(file_path)
75
+ combined_audio += segment
 
 
 
 
 
 
 
 
 
 
76
 
77
+ output_file = os.path.join(output_dir, self.generate_filename())
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ combined_audio.export(output_file, format="wav")
80
  return output_dir
 
 
 
 
81
 
82
+ def generate_filename(self):
83
  current_time = datetime.now().strftime("%Y%m%d%H%M%S")
84
  filename = f"{current_time}.wav"
85
  return filename