Spaces:

Hammad712
/

recitation-compare

Sleeping

App Files Files Community

Hammad712 commited on Mar 16

Commit

94ba3d3

verified ·

1 Parent(s): 1d61cef

Update main.py

Browse files

Files changed (1) hide show

main.py +91 -54

main.py CHANGED Viewed

@@ -1,24 +1,36 @@
-import os
 import torch
 import librosa
 import numpy as np
-import tempfile
 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 from librosa.sequence import dtw
-from fastapi import FastAPI, UploadFile, File, HTTPException
-from fastapi.responses import JSONResponse
 import shutil
-# Define the QuranRecitationComparer class as provided
 class QuranRecitationComparer:
-    def __init__(self, model_name="jonatasgrosman/wav2vec2-large-xlsr-53-arabic", auth_token=None):
         """Initialize the Quran recitation comparer with a specific Wav2Vec2 model."""
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         # Load model and processor once during initialization
-        if auth_token:
-            self.processor = Wav2Vec2Processor.from_pretrained(model_name, token=auth_token)
-            self.model = Wav2Vec2ForCTC.from_pretrained(model_name, token=auth_token)
         else:
             self.processor = Wav2Vec2Processor.from_pretrained(model_name)
             self.model = Wav2Vec2ForCTC.from_pretrained(model_name)
@@ -107,77 +119,102 @@ class QuranRecitationComparer:
         """
         Predict the similarity between two audio files.
         This method can be called repeatedly without reloading the model.
         """
         # Get embeddings (using cache if available)
         embedding1 = self.get_embedding_for_file(file_path1)
         embedding2 = self.get_embedding_for_file(file_path2)
-        # Compute DTW distance (transposing so that each column represents a frame)
         norm_distance = self.compute_dtw_distance(embedding1.T, embedding2.T)
         # Interpret results
         interpretation, similarity_score = self.interpret_similarity(norm_distance)
-        print(f"Similarity Score: {similarity_score:.1f}/100")
-        print(f"Interpretation: {interpretation}")
         return similarity_score, interpretation
     def clear_cache(self):
         """Clear the embedding cache to free memory."""
         self.embedding_cache = {}
-# Create FastAPI application
-app = FastAPI(
-    title="Quran Recitation Comparison API",
-    description="API for comparing similarity between Quran recitations",
-    version="1.0.0"
-)
-# Global instance of the comparer
 comparer = None
 @app.on_event("startup")
 async def startup_event():
     global comparer
-    # Optionally, set the HF authentication token from an environment variable
-    auth_token = os.getenv("HF_TOKEN", None)
-    comparer = QuranRecitationComparer(auth_token=auth_token)
-    print("Model initialized and ready for predictions.")
-# Root endpoint
 @app.get("/")
 async def root():
-    return {"message": "Welcome to the Quran Recitation Comparison API"}
-# Compare endpoint that accepts two audio files
-@app.post("/compare")
-async def compare_recitations(file1: UploadFile = File(...), file2: UploadFile = File(...)):
-    if comparer is None:
-        raise HTTPException(status_code=503, detail="Model not initialized")
     try:
-        # Save the uploaded files to temporary files
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp1:
-            tmp1.write(await file1.read())
-            file_path1 = tmp1.name
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp2:
-            tmp2.write(await file2.read())
-            file_path2 = tmp2.name
-        # Use the comparer to predict similarity
-        similarity_score, interpretation = comparer.predict(file_path1, file_path2)
         # Clean up temporary files
-        os.remove(file_path1)
-        os.remove(file_path2)
-        return {"similarity_score": similarity_score, "interpretation": interpretation}
-    except Exception as e:
-        raise HTTPException(status_code=400, detail=str(e))
-# Run the application with uvicorn if this module is executed directly.
 if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run("main:app", host="0.0.0.0", port=7860, reload=False)

+from fastapi import FastAPI, HTTPException, UploadFile, File, Form
+from pydantic import BaseModel
+from typing import Optional
 import torch
 import librosa
 import numpy as np
+import os
 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 from librosa.sequence import dtw
+import tempfile
 import shutil
+from dotenv import load_dotenv
+import uvicorn
+# Load environment variables
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
+app = FastAPI(title="Quran Recitation Comparer API")
+class ComparisonResult(BaseModel):
+    similarity_score: float
+    interpretation: str
 class QuranRecitationComparer:
+    def __init__(self, model_name="jonatasgrosman/wav2vec2-large-xlsr-53-arabic", token=None):
         """Initialize the Quran recitation comparer with a specific Wav2Vec2 model."""
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         # Load model and processor once during initialization
+        if token:
+            self.processor = Wav2Vec2Processor.from_pretrained(model_name, use_auth_token=token)
+            self.model = Wav2Vec2ForCTC.from_pretrained(model_name, use_auth_token=token)
         else:
             self.processor = Wav2Vec2Processor.from_pretrained(model_name)
             self.model = Wav2Vec2ForCTC.from_pretrained(model_name)
         """
         Predict the similarity between two audio files.
         This method can be called repeatedly without reloading the model.
+        Args:
+            file_path1 (str): Path to first audio file
+            file_path2 (str): Path to second audio file
+        Returns:
+            float: Similarity score
+            str: Interpretation of similarity
         """
         # Get embeddings (using cache if available)
         embedding1 = self.get_embedding_for_file(file_path1)
         embedding2 = self.get_embedding_for_file(file_path2)
+        # Compute DTW distance
         norm_distance = self.compute_dtw_distance(embedding1.T, embedding2.T)
         # Interpret results
         interpretation, similarity_score = self.interpret_similarity(norm_distance)
         return similarity_score, interpretation
     def clear_cache(self):
         """Clear the embedding cache to free memory."""
         self.embedding_cache = {}
+# Global variable for the comparer instance
 comparer = None
 @app.on_event("startup")
 async def startup_event():
+    """Initialize the model when the application starts."""
     global comparer
+    print("Initializing model... This may take a moment.")
+    comparer = QuranRecitationComparer(
+        model_name="jonatasgrosman/wav2vec2-large-xlsr-53-arabic",
+        token=HF_TOKEN
+    )
+    print("Model initialized and ready for predictions!")
 @app.get("/")
 async def root():
+    """Root endpoint to check if the API is running."""
+    return {"message": "Quran Recitation Comparer API is running", "status": "active"}
+@app.post("/compare", response_model=ComparisonResult)
+async def compare_files(
+    file1: UploadFile = File(...),
+    file2: UploadFile = File(...)
+):
+    """
+    Compare two audio files and return similarity metrics.
+    - **file1**: First audio file (MP3, WAV, etc.)
+    - **file2**: Second audio file (MP3, WAV, etc.)
+    Returns similarity score and interpretation.
+    """
+    if not comparer:
+        raise HTTPException(status_code=500, detail="Model not initialized. Please try again later.")
+    temp_dir = tempfile.mkdtemp()
     try:
+        # Save uploaded files to temporary directory
+        temp_file1 = os.path.join(temp_dir, file1.filename)
+        temp_file2 = os.path.join(temp_dir, file2.filename)
+        with open(temp_file1, "wb") as f:
+            shutil.copyfileobj(file1.file, f)
+        with open(temp_file2, "wb") as f:
+            shutil.copyfileobj(file2.file, f)
+        # Compare the files
+        similarity_score, interpretation = comparer.predict(temp_file1, temp_file2)
+        return ComparisonResult(
+            similarity_score=similarity_score,
+            interpretation=interpretation
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing files: {str(e)}")
+    finally:
         # Clean up temporary files
+        shutil.rmtree(temp_dir, ignore_errors=True)
+@app.post("/clear-cache")
+async def clear_cache():
+    """Clear the embedding cache to free memory."""
+    if not comparer:
+        raise HTTPException(status_code=500, detail="Model not initialized.")
+    comparer.clear_cache()
+    return {"message": "Embedding cache cleared successfully"}
 if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)