Spaces:

Hammad712
/

recitation-compare

Sleeping

App Files Files Community

Hammad712 commited on Mar 16

Commit

1dbeaf5

verified ·

1 Parent(s): 94ba3d3

Update main.py

Browse files

Files changed (1) hide show

main.py +91 -12

main.py CHANGED Viewed

@@ -1,16 +1,15 @@
-from fastapi import FastAPI, HTTPException, UploadFile, File, Form
 from pydantic import BaseModel
-from typing import Optional
 import torch
 import librosa
 import numpy as np
 import os
 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
-from librosa.sequence import dtw
 import tempfile
 import shutil
 from dotenv import load_dotenv
 import uvicorn
 # Load environment variables
 load_dotenv()
@@ -22,16 +21,73 @@ class ComparisonResult(BaseModel):
     similarity_score: float
     interpretation: str
 class QuranRecitationComparer:
     def __init__(self, model_name="jonatasgrosman/wav2vec2-large-xlsr-53-arabic", token=None):
         """Initialize the Quran recitation comparer with a specific Wav2Vec2 model."""
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         # Load model and processor once during initialization
         if token:
             self.processor = Wav2Vec2Processor.from_pretrained(model_name, use_auth_token=token)
             self.model = Wav2Vec2ForCTC.from_pretrained(model_name, use_auth_token=token)
         else:
             self.processor = Wav2Vec2Processor.from_pretrained(model_name)
             self.model = Wav2Vec2ForCTC.from_pretrained(model_name)
@@ -40,18 +96,21 @@ class QuranRecitationComparer:
         # Cache for embeddings to avoid recomputation
         self.embedding_cache = {}
     def load_audio(self, file_path, target_sr=16000, trim_silence=True, normalize=True):
         """Load and preprocess an audio file."""
         if not os.path.exists(file_path):
             raise FileNotFoundError(f"Audio file not found: {file_path}")
         y, sr = librosa.load(file_path, sr=target_sr)
         if normalize:
             y = librosa.util.normalize(y)
         if trim_silence:
             y, _ = librosa.effects.trim(y, top_db=30)
         return y
@@ -74,7 +133,7 @@ class QuranRecitationComparer:
     def compute_dtw_distance(self, features1, features2):
         """Compute the DTW distance between two sequences of features."""
-        D, wp = dtw(X=features1, Y=features2, metric='euclidean')
         distance = D[-1, -1]
         normalized_distance = distance / len(wp)
         return normalized_distance
@@ -105,13 +164,16 @@ class QuranRecitationComparer:
     def get_embedding_for_file(self, file_path):
         """Get embedding for a file, using cache if available."""
         if file_path in self.embedding_cache:
             return self.embedding_cache[file_path]
         audio = self.load_audio(file_path)
         embedding = self.get_deep_embedding(audio)
         # Store in cache for future use
         self.embedding_cache[file_path] = embedding
         return embedding
@@ -128,21 +190,26 @@ class QuranRecitationComparer:
             float: Similarity score
             str: Interpretation of similarity
         """
         # Get embeddings (using cache if available)
         embedding1 = self.get_embedding_for_file(file_path1)
         embedding2 = self.get_embedding_for_file(file_path2)
         # Compute DTW distance
         norm_distance = self.compute_dtw_distance(embedding1.T, embedding2.T)
         # Interpret results
         interpretation, similarity_score = self.interpret_similarity(norm_distance)
         return similarity_score, interpretation
     def clear_cache(self):
         """Clear the embedding cache to free memory."""
         self.embedding_cache = {}
 # Global variable for the comparer instance
 comparer = None
@@ -152,11 +219,15 @@ async def startup_event():
     """Initialize the model when the application starts."""
     global comparer
     print("Initializing model... This may take a moment.")
-    comparer = QuranRecitationComparer(
-        model_name="jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
-        token=HF_TOKEN
-    )
-    print("Model initialized and ready for predictions!")
 @app.get("/")
 async def root():
@@ -179,7 +250,9 @@ async def compare_files(
     if not comparer:
         raise HTTPException(status_code=500, detail="Model not initialized. Please try again later.")
     temp_dir = tempfile.mkdtemp()
     try:
         # Save uploaded files to temporary directory
@@ -187,10 +260,14 @@ async def compare_files(
         temp_file2 = os.path.join(temp_dir, file2.filename)
         with open(temp_file1, "wb") as f:
-            shutil.copyfileobj(file1.file, f)
         with open(temp_file2, "wb") as f:
-            shutil.copyfileobj(file2.file, f)
         # Compare the files
         similarity_score, interpretation = comparer.predict(temp_file1, temp_file2)
@@ -201,10 +278,12 @@ async def compare_files(
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error processing files: {str(e)}")
     finally:
         # Clean up temporary files
         shutil.rmtree(temp_dir, ignore_errors=True)
 @app.post("/clear-cache")
@@ -217,4 +296,4 @@ async def clear_cache():
     return {"message": "Embedding cache cleared successfully"}
 if __name__ == "__main__":
-    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)

+from fastapi import FastAPI, HTTPException, UploadFile, File
 from pydantic import BaseModel
 import torch
 import librosa
 import numpy as np
 import os
 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 import tempfile
 import shutil
 from dotenv import load_dotenv
 import uvicorn
+import scipy.spatial.distance as distance
 # Load environment variables
 load_dotenv()
     similarity_score: float
     interpretation: str
+# Custom implementation of DTW to replace librosa.sequence.dtw
+def custom_dtw(X, Y, metric='euclidean'):
+    """
+    Custom Dynamic Time Warping implementation.
+    Args:
+        X: First sequence
+        Y: Second sequence
+        metric: Distance metric ('euclidean' or 'cosine')
+    Returns:
+        D: Cost matrix
+        wp: Warping path
+    """
+    # Get sequence lengths
+    n, m = len(X), len(Y)
+    # Initialize cost matrix
+    D = np.zeros((n + 1, m + 1))
+    D[0, 1:] = np.inf
+    D[1:, 0] = np.inf
+    D[0, 0] = 0
+    # Fill cost matrix
+    for i in range(1, n + 1):
+        for j in range(1, m + 1):
+            if metric == 'euclidean':
+                cost = np.sum((X[i-1] - Y[j-1])**2)
+            elif metric == 'cosine':
+                cost = 1 - np.dot(X[i-1], Y[j-1]) / (np.linalg.norm(X[i-1]) * np.linalg.norm(Y[j-1]))
+            D[i, j] = cost + min(D[i-1, j], D[i, j-1], D[i-1, j-1])
+    # Backtracking
+    wp = [(n, m)]
+    i, j = n, m
+    while i > 0 or j > 0:
+        if i == 0:
+            j -= 1
+        elif j == 0:
+            i -= 1
+        else:
+            min_idx = np.argmin([D[i-1, j-1], D[i-1, j], D[i, j-1]])
+            if min_idx == 0:
+                i -= 1
+                j -= 1
+            elif min_idx == 1:
+                i -= 1
+            else:
+                j -= 1
+        wp.append((i, j))
+    wp.reverse()
+    return D, wp
 class QuranRecitationComparer:
     def __init__(self, model_name="jonatasgrosman/wav2vec2-large-xlsr-53-arabic", token=None):
         """Initialize the Quran recitation comparer with a specific Wav2Vec2 model."""
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Using device: {self.device}")
         # Load model and processor once during initialization
         if token:
+            print(f"Loading model {model_name} with token...")
             self.processor = Wav2Vec2Processor.from_pretrained(model_name, use_auth_token=token)
             self.model = Wav2Vec2ForCTC.from_pretrained(model_name, use_auth_token=token)
         else:
+            print(f"Loading model {model_name} without token...")
             self.processor = Wav2Vec2Processor.from_pretrained(model_name)
             self.model = Wav2Vec2ForCTC.from_pretrained(model_name)
         # Cache for embeddings to avoid recomputation
         self.embedding_cache = {}
+        print("Model loaded successfully!")
     def load_audio(self, file_path, target_sr=16000, trim_silence=True, normalize=True):
         """Load and preprocess an audio file."""
         if not os.path.exists(file_path):
             raise FileNotFoundError(f"Audio file not found: {file_path}")
+        print(f"Loading audio: {file_path}")
         y, sr = librosa.load(file_path, sr=target_sr)
         if normalize:
             y = librosa.util.normalize(y)
         if trim_silence:
+            # Use librosa.effects.trim which should be available in most versions
             y, _ = librosa.effects.trim(y, top_db=30)
         return y
     def compute_dtw_distance(self, features1, features2):
         """Compute the DTW distance between two sequences of features."""
+        D, wp = custom_dtw(X=features1, Y=features2, metric='euclidean')
         distance = D[-1, -1]
         normalized_distance = distance / len(wp)
         return normalized_distance
     def get_embedding_for_file(self, file_path):
         """Get embedding for a file, using cache if available."""
         if file_path in self.embedding_cache:
+            print(f"Using cached embedding for {file_path}")
             return self.embedding_cache[file_path]
+        print(f"Computing new embedding for {file_path}")
         audio = self.load_audio(file_path)
         embedding = self.get_deep_embedding(audio)
         # Store in cache for future use
         self.embedding_cache[file_path] = embedding
+        print(f"Embedding shape: {embedding.shape}")
         return embedding
             float: Similarity score
             str: Interpretation of similarity
         """
+        print(f"Comparing {file_path1} and {file_path2}")
         # Get embeddings (using cache if available)
         embedding1 = self.get_embedding_for_file(file_path1)
         embedding2 = self.get_embedding_for_file(file_path2)
         # Compute DTW distance
+        print("Computing DTW distance...")
         norm_distance = self.compute_dtw_distance(embedding1.T, embedding2.T)
+        print(f"Normalized distance: {norm_distance}")
         # Interpret results
         interpretation, similarity_score = self.interpret_similarity(norm_distance)
+        print(f"Similarity score: {similarity_score}, Interpretation: {interpretation}")
         return similarity_score, interpretation
     def clear_cache(self):
         """Clear the embedding cache to free memory."""
         self.embedding_cache = {}
+        print("Embedding cache cleared")
 # Global variable for the comparer instance
 comparer = None
     """Initialize the model when the application starts."""
     global comparer
     print("Initializing model... This may take a moment.")
+    try:
+        comparer = QuranRecitationComparer(
+            model_name="jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
+            token=HF_TOKEN
+        )
+        print("Model initialized and ready for predictions!")
+    except Exception as e:
+        print(f"Error initializing model: {str(e)}")
+        raise
 @app.get("/")
 async def root():
     if not comparer:
         raise HTTPException(status_code=500, detail="Model not initialized. Please try again later.")
+    print(f"Received files: {file1.filename} and {file2.filename}")
     temp_dir = tempfile.mkdtemp()
+    print(f"Created temporary directory: {temp_dir}")
     try:
         # Save uploaded files to temporary directory
         temp_file2 = os.path.join(temp_dir, file2.filename)
         with open(temp_file1, "wb") as f:
+            content = await file1.read()
+            f.write(content)
         with open(temp_file2, "wb") as f:
+            content = await file2.read()
+            f.write(content)
+        print(f"Files saved to: {temp_file1} and {temp_file2}")
         # Compare the files
         similarity_score, interpretation = comparer.predict(temp_file1, temp_file2)
         )
     except Exception as e:
+        print(f"Error processing files: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Error processing files: {str(e)}")
     finally:
         # Clean up temporary files
+        print(f"Cleaning up temporary directory: {temp_dir}")
         shutil.rmtree(temp_dir, ignore_errors=True)
 @app.post("/clear-cache")
     return {"message": "Embedding cache cleared successfully"}
 if __name__ == "__main__":
+    uvicorn.run("main:app", host="0.0.0.0", port=7860, log_level="info")