Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	
		Rivalcoder
		
	commited on
		
		
					Commit 
							
							·
						
						ec96972
	
1
								Parent(s):
							
							9715d9d
								
Add application file
Browse files- .dockerignore +26 -0
- .gitignore +61 -0
- Dockerfile +23 -0
- HUGGINGFACE_DEPLOYMENT.md +112 -0
- README_HF.md +112 -0
- app.py +150 -0
- embedder.py +12 -0
- llm.py +69 -0
- main.py +151 -0
- parser.py +27 -0
- requirements.txt +10 -0
- retriever.py +9 -0
- test_deployment.py +75 -0
    	
        .dockerignore
    ADDED
    
    | @@ -0,0 +1,26 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            .git
         | 
| 2 | 
            +
            .gitignore
         | 
| 3 | 
            +
            README.md
         | 
| 4 | 
            +
            DEPLOYMENT.md
         | 
| 5 | 
            +
            render.yaml
         | 
| 6 | 
            +
            start.sh
         | 
| 7 | 
            +
            __pycache__
         | 
| 8 | 
            +
            *.pyc
         | 
| 9 | 
            +
            *.pyo
         | 
| 10 | 
            +
            *.pyd
         | 
| 11 | 
            +
            .Python
         | 
| 12 | 
            +
            env
         | 
| 13 | 
            +
            pip-log.txt
         | 
| 14 | 
            +
            pip-delete-this-directory.txt
         | 
| 15 | 
            +
            .tox
         | 
| 16 | 
            +
            .coverage
         | 
| 17 | 
            +
            .coverage.*
         | 
| 18 | 
            +
            .cache
         | 
| 19 | 
            +
            nosetests.xml
         | 
| 20 | 
            +
            coverage.xml
         | 
| 21 | 
            +
            *.cover
         | 
| 22 | 
            +
            *.log
         | 
| 23 | 
            +
            .git
         | 
| 24 | 
            +
            .mypy_cache
         | 
| 25 | 
            +
            .pytest_cache
         | 
| 26 | 
            +
            .hypothesis 
         | 
    	
        .gitignore
    ADDED
    
    | @@ -0,0 +1,61 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Environment variables
         | 
| 2 | 
            +
            .env
         | 
| 3 | 
            +
            .env.local
         | 
| 4 | 
            +
            .env.production
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # Python
         | 
| 7 | 
            +
            __pycache__/
         | 
| 8 | 
            +
            *.py[cod]
         | 
| 9 | 
            +
            *$py.class
         | 
| 10 | 
            +
            *.so
         | 
| 11 | 
            +
            .Python
         | 
| 12 | 
            +
            build/
         | 
| 13 | 
            +
            develop-eggs/
         | 
| 14 | 
            +
            dist/
         | 
| 15 | 
            +
            downloads/
         | 
| 16 | 
            +
            eggs/
         | 
| 17 | 
            +
            .eggs/
         | 
| 18 | 
            +
            lib/
         | 
| 19 | 
            +
            lib64/
         | 
| 20 | 
            +
            parts/
         | 
| 21 | 
            +
            sdist/
         | 
| 22 | 
            +
            var/
         | 
| 23 | 
            +
            wheels/
         | 
| 24 | 
            +
            *.egg-info/
         | 
| 25 | 
            +
            .installed.cfg
         | 
| 26 | 
            +
            *.egg
         | 
| 27 | 
            +
            MANIFEST
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            # Virtual environments
         | 
| 30 | 
            +
            venv/
         | 
| 31 | 
            +
            env/
         | 
| 32 | 
            +
            ENV/
         | 
| 33 | 
            +
            env.bak/
         | 
| 34 | 
            +
            venv.bak/
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            # IDE
         | 
| 37 | 
            +
            .vscode/
         | 
| 38 | 
            +
            .idea/
         | 
| 39 | 
            +
            *.swp
         | 
| 40 | 
            +
            *.swo
         | 
| 41 | 
            +
            *~
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            # OS
         | 
| 44 | 
            +
            .DS_Store
         | 
| 45 | 
            +
            Thumbs.db
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            # Logs
         | 
| 48 | 
            +
            *.log
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            # Temporary files
         | 
| 51 | 
            +
            *.tmp
         | 
| 52 | 
            +
            *.temp
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            # FAISS index files
         | 
| 55 | 
            +
            *.index
         | 
| 56 | 
            +
            *.faiss
         | 
| 57 | 
            +
             | 
| 58 | 
            +
            # PDF files (if you don't want to commit them)
         | 
| 59 | 
            +
            *.pdf 
         | 
| 60 | 
            +
             | 
| 61 | 
            +
            DEPLOYMENT.md
         | 
    	
        Dockerfile
    ADDED
    
    | @@ -0,0 +1,23 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            FROM python:3.9-slim
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            WORKDIR /app
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            # Install system dependencies
         | 
| 6 | 
            +
            RUN apt-get update && apt-get install -y \
         | 
| 7 | 
            +
                build-essential \
         | 
| 8 | 
            +
                && rm -rf /var/lib/apt/lists/*
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            # Copy requirements first for better caching
         | 
| 11 | 
            +
            COPY requirements.txt .
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            # Install Python dependencies
         | 
| 14 | 
            +
            RUN pip install --no-cache-dir -r requirements.txt
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            # Copy application code
         | 
| 17 | 
            +
            COPY . .
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            # Expose port
         | 
| 20 | 
            +
            EXPOSE 7860
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            # Run the application
         | 
| 23 | 
            +
            CMD ["python", "app.py"] 
         | 
    	
        HUGGINGFACE_DEPLOYMENT.md
    ADDED
    
    | @@ -0,0 +1,112 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Hugging Face Spaces Deployment Guide
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            This guide will help you deploy your HackRx Insurance Policy Assistant to Hugging Face Spaces.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            ## Prerequisites
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            1. A Hugging Face account (free at https://huggingface.co)
         | 
| 8 | 
            +
            2. A Google Gemini API key
         | 
| 9 | 
            +
            3. Your code pushed to a Git repository (GitHub, GitLab, etc.)
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            ## Step 1: Prepare Your Repository
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            Your repository should contain the following files:
         | 
| 14 | 
            +
            - `app.py` - Main application entry point
         | 
| 15 | 
            +
            - `Dockerfile` - Docker configuration
         | 
| 16 | 
            +
            - `requirements.txt` - Python dependencies
         | 
| 17 | 
            +
            - `parser.py`, `embedder.py`, `retriever.py`, `llm.py` - Application modules
         | 
| 18 | 
            +
            - `.dockerignore` - Docker build optimization
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            ## Step 2: Create a Hugging Face Space
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            1. Go to https://huggingface.co/spaces
         | 
| 23 | 
            +
            2. Click "Create new Space"
         | 
| 24 | 
            +
            3. Choose the following settings:
         | 
| 25 | 
            +
               - **Owner**: Your username
         | 
| 26 | 
            +
               - **Space name**: `hackrx-insurance-assistant` (or your preferred name)
         | 
| 27 | 
            +
               - **Space SDK**: `Docker`
         | 
| 28 | 
            +
               - **License**: Choose appropriate license
         | 
| 29 | 
            +
               - **Visibility**: Public or Private (your choice)
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            ## Step 3: Connect Your Repository
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            1. In your new Space, go to the "Settings" tab
         | 
| 34 | 
            +
            2. Under "Repository", click "Connect to existing repository"
         | 
| 35 | 
            +
            3. Select your Git provider (GitHub, GitLab, etc.)
         | 
| 36 | 
            +
            4. Choose your repository
         | 
| 37 | 
            +
            5. Click "Connect"
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            ## Step 4: Configure Environment Variables
         | 
| 40 | 
            +
             | 
| 41 | 
            +
            1. In your Space settings, go to the "Repository secrets" section
         | 
| 42 | 
            +
            2. Add the following secret:
         | 
| 43 | 
            +
               - **Name**: `GOOGLE_API_KEY`
         | 
| 44 | 
            +
               - **Value**: Your Google Gemini API key
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            ## Step 5: Deploy
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            1. Push your code to your Git repository
         | 
| 49 | 
            +
            2. Hugging Face Spaces will automatically detect the changes and start building
         | 
| 50 | 
            +
            3. You can monitor the build progress in the "Logs" tab
         | 
| 51 | 
            +
            4. Once built successfully, your API will be available at `https://your-space-name.hf.space`
         | 
| 52 | 
            +
             | 
| 53 | 
            +
            ## Step 6: Test Your Deployment
         | 
| 54 | 
            +
             | 
| 55 | 
            +
            ### Health Check
         | 
| 56 | 
            +
            ```bash
         | 
| 57 | 
            +
            curl https://your-space-name.hf.space/
         | 
| 58 | 
            +
            ```
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            ### Test API Endpoint
         | 
| 61 | 
            +
            ```bash
         | 
| 62 | 
            +
            curl -X POST https://your-space-name.hf.space/api/v1/hackrx/run \
         | 
| 63 | 
            +
              -H "Content-Type: application/json" \
         | 
| 64 | 
            +
              -H "Authorization: Bearer your_token_here" \
         | 
| 65 | 
            +
              -d '{
         | 
| 66 | 
            +
                "documents": "https://example.com/insurance-policy.pdf",
         | 
| 67 | 
            +
                "questions": ["What is the coverage amount?"]
         | 
| 68 | 
            +
              }'
         | 
| 69 | 
            +
            ```
         | 
| 70 | 
            +
             | 
| 71 | 
            +
            ## Troubleshooting
         | 
| 72 | 
            +
             | 
| 73 | 
            +
            ### Common Issues
         | 
| 74 | 
            +
             | 
| 75 | 
            +
            1. **Build Fails**: Check the logs in the "Logs" tab for error messages
         | 
| 76 | 
            +
            2. **Environment Variable Not Set**: Ensure `GOOGLE_API_KEY` is set in Space secrets
         | 
| 77 | 
            +
            3. **Port Issues**: The application runs on port 7860 (default for Hugging Face Spaces)
         | 
| 78 | 
            +
            4. **Memory Issues**: If you encounter memory issues, consider optimizing the Dockerfile
         | 
| 79 | 
            +
             | 
| 80 | 
            +
            ### Debugging
         | 
| 81 | 
            +
             | 
| 82 | 
            +
            1. Check the build logs in the "Logs" tab
         | 
| 83 | 
            +
            2. Monitor the application logs for runtime errors
         | 
| 84 | 
            +
            3. Test locally first to ensure everything works
         | 
| 85 | 
            +
             | 
| 86 | 
            +
            ## API Documentation
         | 
| 87 | 
            +
             | 
| 88 | 
            +
            Once deployed, your API will have the following endpoints:
         | 
| 89 | 
            +
             | 
| 90 | 
            +
            - `GET /` - Health check
         | 
| 91 | 
            +
            - `GET /health` - API status
         | 
| 92 | 
            +
            - `POST /api/v1/hackrx/run` - Process PDF from URL
         | 
| 93 | 
            +
            - `POST /api/v1/hackrx/local` - Process local PDF file
         | 
| 94 | 
            +
             | 
| 95 | 
            +
            ## Cost Considerations
         | 
| 96 | 
            +
             | 
| 97 | 
            +
            - Hugging Face Spaces offers free hosting for public spaces
         | 
| 98 | 
            +
            - Private spaces may have usage limits
         | 
| 99 | 
            +
            - Consider the cost of Google Gemini API calls
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            ## Security Notes
         | 
| 102 | 
            +
             | 
| 103 | 
            +
            - Keep your API keys secure
         | 
| 104 | 
            +
            - Use appropriate authentication for production use
         | 
| 105 | 
            +
            - Consider rate limiting for public APIs
         | 
| 106 | 
            +
             | 
| 107 | 
            +
            ## Updates
         | 
| 108 | 
            +
             | 
| 109 | 
            +
            To update your deployment:
         | 
| 110 | 
            +
            1. Push changes to your Git repository
         | 
| 111 | 
            +
            2. Hugging Face Spaces will automatically rebuild and deploy
         | 
| 112 | 
            +
            3. Monitor the build process in the "Logs" tab 
         | 
    	
        README_HF.md
    ADDED
    
    | @@ -0,0 +1,112 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # HackRx Insurance Policy Assistant
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            A FastAPI application that processes PDF documents and answers questions using AI, deployed on Hugging Face Spaces.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            ## Features
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            - PDF document parsing and text extraction
         | 
| 8 | 
            +
            - Vector-based document search using FAISS
         | 
| 9 | 
            +
            - AI-powered question answering using Google Gemini
         | 
| 10 | 
            +
            - RESTful API endpoints for document processing
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            ## API Endpoints
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            ### Health Check
         | 
| 15 | 
            +
            - `GET /` - Root endpoint
         | 
| 16 | 
            +
            - `GET /health` - API status check
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            ### Process PDF from URL
         | 
| 19 | 
            +
            - `POST /api/v1/hackrx/run`
         | 
| 20 | 
            +
            - **Headers**: `Authorization: Bearer <your_token>`
         | 
| 21 | 
            +
            - **Body**:
         | 
| 22 | 
            +
            ```json
         | 
| 23 | 
            +
            {
         | 
| 24 | 
            +
              "documents": "https://example.com/document.pdf",
         | 
| 25 | 
            +
              "questions": ["What is the coverage amount?", "What are the exclusions?"]
         | 
| 26 | 
            +
            }
         | 
| 27 | 
            +
            ```
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            ### Process Local PDF File
         | 
| 30 | 
            +
            - `POST /api/v1/hackrx/local`
         | 
| 31 | 
            +
            - **Body**:
         | 
| 32 | 
            +
            ```json
         | 
| 33 | 
            +
            {
         | 
| 34 | 
            +
              "document_path": "/app/files/document.pdf",
         | 
| 35 | 
            +
              "questions": ["What is the coverage amount?", "What are the exclusions?"]
         | 
| 36 | 
            +
            }
         | 
| 37 | 
            +
            ```
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            ## Environment Variables
         | 
| 40 | 
            +
             | 
| 41 | 
            +
            Set these in your Hugging Face Space settings:
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            - `GOOGLE_API_KEY` - Your Google Gemini API key
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            ## Usage Examples
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            ### Using curl
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            ```bash
         | 
| 50 | 
            +
            # Health check
         | 
| 51 | 
            +
            curl https://your-space-name.hf.space/
         | 
| 52 | 
            +
             | 
| 53 | 
            +
            # Process PDF from URL
         | 
| 54 | 
            +
            curl -X POST https://your-space-name.hf.space/api/v1/hackrx/run \
         | 
| 55 | 
            +
              -H "Content-Type: application/json" \
         | 
| 56 | 
            +
              -H "Authorization: Bearer your_token_here" \
         | 
| 57 | 
            +
              -d '{
         | 
| 58 | 
            +
                "documents": "https://example.com/insurance-policy.pdf",
         | 
| 59 | 
            +
                "questions": ["What is the coverage amount?", "What are the exclusions?"]
         | 
| 60 | 
            +
              }'
         | 
| 61 | 
            +
            ```
         | 
| 62 | 
            +
             | 
| 63 | 
            +
            ### Using Python
         | 
| 64 | 
            +
             | 
| 65 | 
            +
            ```python
         | 
| 66 | 
            +
            import requests
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            # Health check
         | 
| 69 | 
            +
            response = requests.get("https://your-space-name.hf.space/")
         | 
| 70 | 
            +
            print(response.json())
         | 
| 71 | 
            +
             | 
| 72 | 
            +
            # Process PDF
         | 
| 73 | 
            +
            url = "https://your-space-name.hf.space/api/v1/hackrx/run"
         | 
| 74 | 
            +
            headers = {
         | 
| 75 | 
            +
                "Content-Type": "application/json",
         | 
| 76 | 
            +
                "Authorization": "Bearer your_token_here"
         | 
| 77 | 
            +
            }
         | 
| 78 | 
            +
            data = {
         | 
| 79 | 
            +
                "documents": "https://example.com/insurance-policy.pdf",
         | 
| 80 | 
            +
                "questions": ["What is the coverage amount?", "What are the exclusions?"]
         | 
| 81 | 
            +
            }
         | 
| 82 | 
            +
             | 
| 83 | 
            +
            response = requests.post(url, headers=headers, json=data)
         | 
| 84 | 
            +
            print(response.json())
         | 
| 85 | 
            +
            ```
         | 
| 86 | 
            +
             | 
| 87 | 
            +
            ## Local Development
         | 
| 88 | 
            +
             | 
| 89 | 
            +
            To run the application locally:
         | 
| 90 | 
            +
             | 
| 91 | 
            +
            ```bash
         | 
| 92 | 
            +
            pip install -r requirements.txt
         | 
| 93 | 
            +
            python app.py
         | 
| 94 | 
            +
            ```
         | 
| 95 | 
            +
             | 
| 96 | 
            +
            The API will be available at `http://localhost:7860`
         | 
| 97 | 
            +
             | 
| 98 | 
            +
            ## Deployment
         | 
| 99 | 
            +
             | 
| 100 | 
            +
            This application is configured for deployment on Hugging Face Spaces using Docker. The following files are included:
         | 
| 101 | 
            +
             | 
| 102 | 
            +
            - `app.py` - Main application entry point
         | 
| 103 | 
            +
            - `Dockerfile` - Docker configuration
         | 
| 104 | 
            +
            - `.dockerignore` - Docker build optimization
         | 
| 105 | 
            +
            - `requirements.txt` - Python dependencies
         | 
| 106 | 
            +
             | 
| 107 | 
            +
            ## Model Information
         | 
| 108 | 
            +
             | 
| 109 | 
            +
            - **Framework**: FastAPI
         | 
| 110 | 
            +
            - **AI Model**: Google Gemini
         | 
| 111 | 
            +
            - **Vector Database**: FAISS
         | 
| 112 | 
            +
            - **Document Processing**: PyMuPDF 
         | 
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,150 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
            import warnings
         | 
| 3 | 
            +
            import logging
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            # Suppress TensorFlow warnings
         | 
| 6 | 
            +
            os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
         | 
| 7 | 
            +
            os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
         | 
| 8 | 
            +
            os.environ['TF_LOGGING_LEVEL'] = 'ERROR'
         | 
| 9 | 
            +
            os.environ['TF_ENABLE_DEPRECATION_WARNINGS'] = '0'
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            # Suppress specific TensorFlow deprecation warnings
         | 
| 12 | 
            +
            warnings.filterwarnings('ignore', category=DeprecationWarning, module='tensorflow')
         | 
| 13 | 
            +
            logging.getLogger('tensorflow').setLevel(logging.ERROR)
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            from fastapi import FastAPI, Request, HTTPException, Depends, Header
         | 
| 16 | 
            +
            from fastapi.middleware.cors import CORSMiddleware
         | 
| 17 | 
            +
            from pydantic import BaseModel
         | 
| 18 | 
            +
            from parser import parse_pdf_from_url, parse_pdf_from_file
         | 
| 19 | 
            +
            from embedder import build_faiss_index
         | 
| 20 | 
            +
            from retriever import retrieve_chunks
         | 
| 21 | 
            +
            from llm import query_gemini
         | 
| 22 | 
            +
            import uvicorn
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            app = FastAPI(title="HackRx Insurance Policy Assistant", version="1.0.0")
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            # Add CORS middleware
         | 
| 27 | 
            +
            app.add_middleware(
         | 
| 28 | 
            +
                CORSMiddleware,
         | 
| 29 | 
            +
                allow_origins=["*"],
         | 
| 30 | 
            +
                allow_credentials=True,
         | 
| 31 | 
            +
                allow_methods=["*"],
         | 
| 32 | 
            +
                allow_headers=["*"],
         | 
| 33 | 
            +
            )
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            @app.get("/")
         | 
| 36 | 
            +
            async def root():
         | 
| 37 | 
            +
                return {"message": "HackRx Insurance Policy Assistant API is running!"}
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            @app.get("/health")
         | 
| 40 | 
            +
            async def health_check():
         | 
| 41 | 
            +
                return {"status": "healthy", "message": "API is ready to process requests"}
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            class QueryRequest(BaseModel):
         | 
| 44 | 
            +
                documents: str
         | 
| 45 | 
            +
                questions: list[str]
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            class LocalQueryRequest(BaseModel):
         | 
| 48 | 
            +
                document_path: str
         | 
| 49 | 
            +
                questions: list[str]
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            def verify_token(authorization: str = Header(None)):
         | 
| 52 | 
            +
                if not authorization or not authorization.startswith("Bearer "):
         | 
| 53 | 
            +
                    raise HTTPException(status_code=401, detail="Invalid authorization header")
         | 
| 54 | 
            +
                
         | 
| 55 | 
            +
                token = authorization.replace("Bearer ", "")
         | 
| 56 | 
            +
                # For demo purposes, accept any token. In production, validate against a database
         | 
| 57 | 
            +
                if not token:
         | 
| 58 | 
            +
                    raise HTTPException(status_code=401, detail="Invalid token")
         | 
| 59 | 
            +
                
         | 
| 60 | 
            +
                return token
         | 
| 61 | 
            +
             | 
| 62 | 
            +
            @app.post("/api/v1/hackrx/run")
         | 
| 63 | 
            +
            async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         | 
| 64 | 
            +
                try:
         | 
| 65 | 
            +
                    print(f"Processing {len(request.questions)} questions...")
         | 
| 66 | 
            +
                    
         | 
| 67 | 
            +
                    text_chunks = parse_pdf_from_url(request.documents)
         | 
| 68 | 
            +
                    print(f"Extracted {len(text_chunks)} text chunks from PDF")
         | 
| 69 | 
            +
                    
         | 
| 70 | 
            +
                    index, texts = build_faiss_index(text_chunks)
         | 
| 71 | 
            +
                    
         | 
| 72 | 
            +
                    # Get relevant chunks for all questions at once
         | 
| 73 | 
            +
                    all_chunks = set()
         | 
| 74 | 
            +
                    for question in request.questions:
         | 
| 75 | 
            +
                        top_chunks = retrieve_chunks(index, texts, question)
         | 
| 76 | 
            +
                        all_chunks.update(top_chunks)
         | 
| 77 | 
            +
                    
         | 
| 78 | 
            +
                    # Process all questions in a single LLM call
         | 
| 79 | 
            +
                    print(f"Processing all {len(request.questions)} questions in batch...")
         | 
| 80 | 
            +
                    response = query_gemini(request.questions, list(all_chunks))
         | 
| 81 | 
            +
                    
         | 
| 82 | 
            +
                    # Extract answers from the JSON response
         | 
| 83 | 
            +
                    if isinstance(response, dict) and "answers" in response:
         | 
| 84 | 
            +
                        answers = response["answers"]
         | 
| 85 | 
            +
                        # Ensure we have the right number of answers
         | 
| 86 | 
            +
                        while len(answers) < len(request.questions):
         | 
| 87 | 
            +
                            answers.append("Not Found")
         | 
| 88 | 
            +
                        answers = answers[:len(request.questions)]
         | 
| 89 | 
            +
                    else:
         | 
| 90 | 
            +
                        # Fallback if response is not in expected format
         | 
| 91 | 
            +
                        answers = [response] if isinstance(response, str) else []
         | 
| 92 | 
            +
                        # Ensure we have the right number of answers
         | 
| 93 | 
            +
                        while len(answers) < len(request.questions):
         | 
| 94 | 
            +
                            answers.append("Not Found")
         | 
| 95 | 
            +
                        answers = answers[:len(request.questions)]
         | 
| 96 | 
            +
                    
         | 
| 97 | 
            +
                    print(f"Generated {len(answers)} answers")
         | 
| 98 | 
            +
                    return { "answers": answers }
         | 
| 99 | 
            +
                    
         | 
| 100 | 
            +
                except Exception as e:
         | 
| 101 | 
            +
                    print(f"Error: {str(e)}")
         | 
| 102 | 
            +
                    raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
         | 
| 103 | 
            +
             | 
| 104 | 
            +
            @app.post("/api/v1/hackrx/local")
         | 
| 105 | 
            +
            async def run_local_query(request: LocalQueryRequest):
         | 
| 106 | 
            +
                try:
         | 
| 107 | 
            +
                    print(f"Processing local document: {request.document_path}")
         | 
| 108 | 
            +
                    print(f"Processing {len(request.questions)} questions...")
         | 
| 109 | 
            +
                    
         | 
| 110 | 
            +
                    # Parse local PDF file
         | 
| 111 | 
            +
                    text_chunks = parse_pdf_from_file(request.document_path)
         | 
| 112 | 
            +
                    print(f"Extracted {len(text_chunks)} text chunks from local PDF")
         | 
| 113 | 
            +
                    
         | 
| 114 | 
            +
                    index, texts = build_faiss_index(text_chunks)
         | 
| 115 | 
            +
                    
         | 
| 116 | 
            +
                    # Get relevant chunks for all questions at once
         | 
| 117 | 
            +
                    all_chunks = set()
         | 
| 118 | 
            +
                    for question in request.questions:
         | 
| 119 | 
            +
                        top_chunks = retrieve_chunks(index, texts, question)
         | 
| 120 | 
            +
                        all_chunks.update(top_chunks)
         | 
| 121 | 
            +
                    
         | 
| 122 | 
            +
                    # Process all questions in a single LLM call
         | 
| 123 | 
            +
                    print(f"Processing all {len(request.questions)} questions in batch...")
         | 
| 124 | 
            +
                    response = query_gemini(request.questions, list(all_chunks))
         | 
| 125 | 
            +
                    
         | 
| 126 | 
            +
                    # Extract answers from the JSON response
         | 
| 127 | 
            +
                    if isinstance(response, dict) and "answers" in response:
         | 
| 128 | 
            +
                        answers = response["answers"]
         | 
| 129 | 
            +
                        # Ensure we have the right number of answers
         | 
| 130 | 
            +
                        while len(answers) < len(request.questions):
         | 
| 131 | 
            +
                            answers.append("Not Found")
         | 
| 132 | 
            +
                        answers = answers[:len(request.questions)]
         | 
| 133 | 
            +
                    else:
         | 
| 134 | 
            +
                        # Fallback if response is not in expected format
         | 
| 135 | 
            +
                        answers = [response] if isinstance(response, str) else []
         | 
| 136 | 
            +
                        # Ensure we have the right number of answers
         | 
| 137 | 
            +
                        while len(answers) < len(request.questions):
         | 
| 138 | 
            +
                            answers.append("Not Found")
         | 
| 139 | 
            +
                        answers = answers[:len(request.questions)]
         | 
| 140 | 
            +
                    
         | 
| 141 | 
            +
                    print(f"Generated {len(answers)} answers")
         | 
| 142 | 
            +
                    return { "answers": answers }
         | 
| 143 | 
            +
                    
         | 
| 144 | 
            +
                except Exception as e:
         | 
| 145 | 
            +
                    print(f"Error: {str(e)}")
         | 
| 146 | 
            +
                    raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
         | 
| 147 | 
            +
             | 
| 148 | 
            +
            if __name__ == "__main__":
         | 
| 149 | 
            +
                port = int(os.environ.get("PORT", 7860))
         | 
| 150 | 
            +
                uvicorn.run("app:app", host="0.0.0.0", port=port) 
         | 
    	
        embedder.py
    ADDED
    
    | @@ -0,0 +1,12 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import faiss
         | 
| 2 | 
            +
            from sentence_transformers import SentenceTransformer
         | 
| 3 | 
            +
            import numpy as np
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            model = SentenceTransformer("all-MiniLM-L6-v2")
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            def build_faiss_index(chunks):
         | 
| 8 | 
            +
                embeddings = model.encode(chunks)
         | 
| 9 | 
            +
                dimension = embeddings.shape[1]
         | 
| 10 | 
            +
                index = faiss.IndexFlatL2(dimension)
         | 
| 11 | 
            +
                index.add(np.array(embeddings))
         | 
| 12 | 
            +
                return index, chunks
         | 
    	
        llm.py
    ADDED
    
    | @@ -0,0 +1,69 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import google.generativeai as genai
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
            +
            import json
         | 
| 4 | 
            +
            from dotenv import load_dotenv
         | 
| 5 | 
            +
            load_dotenv()
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            api_key = os.getenv("GOOGLE_API_KEY")
         | 
| 8 | 
            +
            if not api_key:
         | 
| 9 | 
            +
                raise ValueError("GOOGLE_API_KEY environment variable is not set. Please add it to your .env file")
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            print(f"Google API Key loaded: {api_key[:10]}..." if api_key else "No API key found")
         | 
| 12 | 
            +
            genai.configure(api_key=api_key)
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            def query_gemini(questions, contexts):
         | 
| 15 | 
            +
                try:
         | 
| 16 | 
            +
                    context = "\n\n".join(contexts)
         | 
| 17 | 
            +
                    
         | 
| 18 | 
            +
                    # Create a numbered list of questions
         | 
| 19 | 
            +
                    questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
         | 
| 20 | 
            +
                    
         | 
| 21 | 
            +
                    prompt = f"""You are an insurance policy assistant. Based on the below document snippets, answer the following questions precisely.
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            IMPORTANT INSTRUCTIONS:
         | 
| 24 | 
            +
            1. Only respond based on the context provided. If information is not found in the context, respond with "Not Found".
         | 
| 25 | 
            +
            2. Provide clear, concise answers that directly address each question.
         | 
| 26 | 
            +
            3. Return your response in the exact JSON format shown below.
         | 
| 27 | 
            +
            4. Give complete, informative responses based on the provided context.
         | 
| 28 | 
            +
            5. Answer each question in the order provided.
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            Context:
         | 
| 31 | 
            +
            {context}
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            Questions:
         | 
| 34 | 
            +
            {questions_text}
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            Return your response in this exact JSON format:
         | 
| 37 | 
            +
            {{
         | 
| 38 | 
            +
                "answers": [
         | 
| 39 | 
            +
                    "Answer to question 1",
         | 
| 40 | 
            +
                    "Answer to question 2",
         | 
| 41 | 
            +
                    "Answer to question 3",
         | 
| 42 | 
            +
                    ...
         | 
| 43 | 
            +
                ]
         | 
| 44 | 
            +
            }}
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            Ensure each answer is comprehensive and directly addresses the corresponding question. If information is not found in the context for any question, respond with "Not Found" for that question."""
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                    model = genai.GenerativeModel('gemini-2.0-flash-exp')
         | 
| 49 | 
            +
                    response = model.generate_content(prompt)
         | 
| 50 | 
            +
                    response_text = response.text.strip()
         | 
| 51 | 
            +
                    
         | 
| 52 | 
            +
                    # Try to parse the response as JSON
         | 
| 53 | 
            +
                    try:
         | 
| 54 | 
            +
                        # Remove any markdown code blocks if present
         | 
| 55 | 
            +
                        if response_text.startswith("```json"):
         | 
| 56 | 
            +
                            response_text = response_text.replace("```json", "").replace("```", "").strip()
         | 
| 57 | 
            +
                        elif response_text.startswith("```"):
         | 
| 58 | 
            +
                            response_text = response_text.replace("```", "").strip()
         | 
| 59 | 
            +
                        
         | 
| 60 | 
            +
                        parsed_response = json.loads(response_text)
         | 
| 61 | 
            +
                        return parsed_response
         | 
| 62 | 
            +
                    except json.JSONDecodeError:
         | 
| 63 | 
            +
                        # If JSON parsing fails, return a structured response
         | 
| 64 | 
            +
                        print(f"Failed to parse JSON response: {response_text}")
         | 
| 65 | 
            +
                        return {"answers": ["Error parsing response"] * len(questions)}
         | 
| 66 | 
            +
                        
         | 
| 67 | 
            +
                except Exception as e:
         | 
| 68 | 
            +
                    print(f"Error in query_gemini: {str(e)}")
         | 
| 69 | 
            +
                    return {"answers": [f"Error generating response: {str(e)}"] * len(questions)}
         | 
    	
        main.py
    ADDED
    
    | @@ -0,0 +1,151 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import os
         | 
| 2 | 
            +
            import warnings
         | 
| 3 | 
            +
            import logging
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            # Suppress TensorFlow warnings
         | 
| 6 | 
            +
            os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
         | 
| 7 | 
            +
            os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
         | 
| 8 | 
            +
            os.environ['TF_LOGGING_LEVEL'] = 'ERROR'
         | 
| 9 | 
            +
            os.environ['TF_ENABLE_DEPRECATION_WARNINGS'] = '0'
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            # Suppress specific TensorFlow deprecation warnings
         | 
| 12 | 
            +
            warnings.filterwarnings('ignore', category=DeprecationWarning, module='tensorflow')
         | 
| 13 | 
            +
            logging.getLogger('tensorflow').setLevel(logging.ERROR)
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            from fastapi import FastAPI, Request, HTTPException, Depends, Header
         | 
| 16 | 
            +
            from fastapi.middleware.cors import CORSMiddleware
         | 
| 17 | 
            +
            from pydantic import BaseModel
         | 
| 18 | 
            +
            from parser import parse_pdf_from_url, parse_pdf_from_file
         | 
| 19 | 
            +
            from embedder import build_faiss_index
         | 
| 20 | 
            +
            from retriever import retrieve_chunks
         | 
| 21 | 
            +
            from llm import query_gemini
         | 
| 22 | 
            +
            import uvicorn
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            app = FastAPI(title="HackRx Insurance Policy Assistant", version="1.0.0")
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            # Add CORS middleware
         | 
| 27 | 
            +
            app.add_middleware(
         | 
| 28 | 
            +
                CORSMiddleware,
         | 
| 29 | 
            +
                allow_origins=["*"],
         | 
| 30 | 
            +
                allow_credentials=True,
         | 
| 31 | 
            +
                allow_methods=["*"],
         | 
| 32 | 
            +
                allow_headers=["*"],
         | 
| 33 | 
            +
            )
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            @app.get("/")
         | 
| 36 | 
            +
            async def root():
         | 
| 37 | 
            +
                return {"message": "HackRx Insurance Policy Assistant API is running!"}
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            @app.get("/health")
         | 
| 40 | 
            +
            async def health_check():
         | 
| 41 | 
            +
                return {"status": "healthy", "message": "API is ready to process requests"}
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            class QueryRequest(BaseModel):
         | 
| 44 | 
            +
                documents: str
         | 
| 45 | 
            +
                questions: list[str]
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            class LocalQueryRequest(BaseModel):
         | 
| 48 | 
            +
                document_path: str
         | 
| 49 | 
            +
                questions: list[str]
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            def verify_token(authorization: str = Header(None)):
         | 
| 52 | 
            +
                if not authorization or not authorization.startswith("Bearer "):
         | 
| 53 | 
            +
                    raise HTTPException(status_code=401, detail="Invalid authorization header")
         | 
| 54 | 
            +
                
         | 
| 55 | 
            +
                token = authorization.replace("Bearer ", "")
         | 
| 56 | 
            +
                # For demo purposes, accept any token. In production, validate against a database
         | 
| 57 | 
            +
                if not token:
         | 
| 58 | 
            +
                    raise HTTPException(status_code=401, detail="Invalid token")
         | 
| 59 | 
            +
                
         | 
| 60 | 
            +
                return token
         | 
| 61 | 
            +
             | 
| 62 | 
            +
            @app.post("/api/v1/hackrx/run")
         | 
| 63 | 
            +
            async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         | 
| 64 | 
            +
                try:
         | 
| 65 | 
            +
                    print(f"Processing {len(request.questions)} questions...")
         | 
| 66 | 
            +
                    
         | 
| 67 | 
            +
                    text_chunks = parse_pdf_from_url(request.documents)
         | 
| 68 | 
            +
                    print(f"Extracted {len(text_chunks)} text chunks from PDF")
         | 
| 69 | 
            +
                    
         | 
| 70 | 
            +
                    index, texts = build_faiss_index(text_chunks)
         | 
| 71 | 
            +
                    
         | 
| 72 | 
            +
                    # Get relevant chunks for all questions at once
         | 
| 73 | 
            +
                    all_chunks = set()
         | 
| 74 | 
            +
                    for question in request.questions:
         | 
| 75 | 
            +
                        top_chunks = retrieve_chunks(index, texts, question)
         | 
| 76 | 
            +
                        all_chunks.update(top_chunks)
         | 
| 77 | 
            +
                    
         | 
| 78 | 
            +
                    # Process all questions in a single LLM call
         | 
| 79 | 
            +
                    print(f"Processing all {len(request.questions)} questions in batch...")
         | 
| 80 | 
            +
                    response = query_gemini(request.questions, list(all_chunks))
         | 
| 81 | 
            +
                    
         | 
| 82 | 
            +
                    # Extract answers from the JSON response
         | 
| 83 | 
            +
                    if isinstance(response, dict) and "answers" in response:
         | 
| 84 | 
            +
                        answers = response["answers"]
         | 
| 85 | 
            +
                        # Ensure we have the right number of answers
         | 
| 86 | 
            +
                        while len(answers) < len(request.questions):
         | 
| 87 | 
            +
                            answers.append("Not Found")
         | 
| 88 | 
            +
                        answers = answers[:len(request.questions)]
         | 
| 89 | 
            +
                    else:
         | 
| 90 | 
            +
                        # Fallback if response is not in expected format
         | 
| 91 | 
            +
                        answers = [response] if isinstance(response, str) else []
         | 
| 92 | 
            +
                        # Ensure we have the right number of answers
         | 
| 93 | 
            +
                        while len(answers) < len(request.questions):
         | 
| 94 | 
            +
                            answers.append("Not Found")
         | 
| 95 | 
            +
                        answers = answers[:len(request.questions)]
         | 
| 96 | 
            +
                    
         | 
| 97 | 
            +
                    print(f"Generated {len(answers)} answers")
         | 
| 98 | 
            +
                    return { "answers": answers }
         | 
| 99 | 
            +
                    
         | 
| 100 | 
            +
                except Exception as e:
         | 
| 101 | 
            +
                    print(f"Error: {str(e)}")
         | 
| 102 | 
            +
                    raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
         | 
| 103 | 
            +
             | 
| 104 | 
            +
            @app.post("/api/v1/hackrx/local")
         | 
| 105 | 
            +
            async def run_local_query(request: LocalQueryRequest):
         | 
| 106 | 
            +
                try:
         | 
| 107 | 
            +
                    print(f"Processing local document: {request.document_path}")
         | 
| 108 | 
            +
                    print(f"Processing {len(request.questions)} questions...")
         | 
| 109 | 
            +
                    
         | 
| 110 | 
            +
                    # Parse local PDF file
         | 
| 111 | 
            +
                    text_chunks = parse_pdf_from_file(request.document_path)
         | 
| 112 | 
            +
                    print(f"Extracted {len(text_chunks)} text chunks from local PDF")
         | 
| 113 | 
            +
                    
         | 
| 114 | 
            +
                    index, texts = build_faiss_index(text_chunks)
         | 
| 115 | 
            +
                    
         | 
| 116 | 
            +
                    # Get relevant chunks for all questions at once
         | 
| 117 | 
            +
                    all_chunks = set()
         | 
| 118 | 
            +
                    for question in request.questions:
         | 
| 119 | 
            +
                        top_chunks = retrieve_chunks(index, texts, question)
         | 
| 120 | 
            +
                        all_chunks.update(top_chunks)
         | 
| 121 | 
            +
                    
         | 
| 122 | 
            +
                    # Process all questions in a single LLM call
         | 
| 123 | 
            +
                    print(f"Processing all {len(request.questions)} questions in batch...")
         | 
| 124 | 
            +
                    response = query_gemini(request.questions, list(all_chunks))
         | 
| 125 | 
            +
                    
         | 
| 126 | 
            +
                    # Extract answers from the JSON response
         | 
| 127 | 
            +
                    if isinstance(response, dict) and "answers" in response:
         | 
| 128 | 
            +
                        answers = response["answers"]
         | 
| 129 | 
            +
                        # Ensure we have the right number of answers
         | 
| 130 | 
            +
                        while len(answers) < len(request.questions):
         | 
| 131 | 
            +
                            answers.append("Not Found")
         | 
| 132 | 
            +
                        answers = answers[:len(request.questions)]
         | 
| 133 | 
            +
                    else:
         | 
| 134 | 
            +
                        # Fallback if response is not in expected format
         | 
| 135 | 
            +
                        answers = [response] if isinstance(response, str) else []
         | 
| 136 | 
            +
                        # Ensure we have the right number of answers
         | 
| 137 | 
            +
                        while len(answers) < len(request.questions):
         | 
| 138 | 
            +
                            answers.append("Not Found")
         | 
| 139 | 
            +
                        answers = answers[:len(request.questions)]
         | 
| 140 | 
            +
                    
         | 
| 141 | 
            +
                    print(f"Generated {len(answers)} answers")
         | 
| 142 | 
            +
                    return { "answers": answers }
         | 
| 143 | 
            +
                    
         | 
| 144 | 
            +
                except Exception as e:
         | 
| 145 | 
            +
                    print(f"Error: {str(e)}")
         | 
| 146 | 
            +
                    raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
         | 
| 147 | 
            +
             | 
| 148 | 
            +
            if __name__ == "__main__":
         | 
| 149 | 
            +
                port = int(os.environ.get("PORT", 10000))
         | 
| 150 | 
            +
                uvicorn.run("main:app", host="0.0.0.0", port=port)
         | 
| 151 | 
            +
             | 
    	
        parser.py
    ADDED
    
    | @@ -0,0 +1,27 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import fitz  # PyMuPDF
         | 
| 2 | 
            +
            import requests
         | 
| 3 | 
            +
            from io import BytesIO
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            def parse_pdf_from_url(url):
         | 
| 6 | 
            +
                res = requests.get(url)
         | 
| 7 | 
            +
                doc = fitz.open(stream=BytesIO(res.content), filetype="pdf")
         | 
| 8 | 
            +
                chunks = []
         | 
| 9 | 
            +
                for page in doc:
         | 
| 10 | 
            +
                    text = page.get_text()
         | 
| 11 | 
            +
                    if text.strip():
         | 
| 12 | 
            +
                        chunks.append(text)
         | 
| 13 | 
            +
                return chunks
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            def parse_pdf_from_file(file_path):
         | 
| 16 | 
            +
                """Parse a local PDF file and extract text chunks"""
         | 
| 17 | 
            +
                try:
         | 
| 18 | 
            +
                    doc = fitz.open(file_path)
         | 
| 19 | 
            +
                    chunks = []
         | 
| 20 | 
            +
                    for page in doc:
         | 
| 21 | 
            +
                        text = page.get_text()
         | 
| 22 | 
            +
                        if text.strip():
         | 
| 23 | 
            +
                            chunks.append(text)
         | 
| 24 | 
            +
                    doc.close()
         | 
| 25 | 
            +
                    return chunks
         | 
| 26 | 
            +
                except Exception as e:
         | 
| 27 | 
            +
                    raise Exception(f"Error parsing PDF file {file_path}: {str(e)}")
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,10 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            fastapi
         | 
| 2 | 
            +
            uvicorn
         | 
| 3 | 
            +
            requests
         | 
| 4 | 
            +
            faiss-cpu
         | 
| 5 | 
            +
            sentence-transformers
         | 
| 6 | 
            +
            PyMuPDF
         | 
| 7 | 
            +
            python-dotenv
         | 
| 8 | 
            +
            tf-keras
         | 
| 9 | 
            +
            google-generativeai
         | 
| 10 | 
            +
             | 
    	
        retriever.py
    ADDED
    
    | @@ -0,0 +1,9 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from sentence_transformers import SentenceTransformer
         | 
| 2 | 
            +
            import numpy as np
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            model = SentenceTransformer("all-MiniLM-L6-v2")
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            def retrieve_chunks(index, texts, query, k=5):
         | 
| 7 | 
            +
                query_vec = model.encode([query])
         | 
| 8 | 
            +
                distances, indices = index.search(np.array(query_vec), k)
         | 
| 9 | 
            +
                return [texts[i] for i in indices[0]]
         | 
    	
        test_deployment.py
    ADDED
    
    | @@ -0,0 +1,75 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/usr/bin/env python3
         | 
| 2 | 
            +
            """
         | 
| 3 | 
            +
            Test script for Hugging Face Spaces deployment
         | 
| 4 | 
            +
            """
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            import requests
         | 
| 7 | 
            +
            import json
         | 
| 8 | 
            +
            import sys
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            def test_health_check(base_url):
         | 
| 11 | 
            +
                """Test the health check endpoint"""
         | 
| 12 | 
            +
                try:
         | 
| 13 | 
            +
                    response = requests.get(f"{base_url}/")
         | 
| 14 | 
            +
                    print(f"Health check status: {response.status_code}")
         | 
| 15 | 
            +
                    print(f"Response: {response.json()}")
         | 
| 16 | 
            +
                    return response.status_code == 200
         | 
| 17 | 
            +
                except Exception as e:
         | 
| 18 | 
            +
                    print(f"Health check failed: {e}")
         | 
| 19 | 
            +
                    return False
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            def test_api_endpoint(base_url, api_key):
         | 
| 22 | 
            +
                """Test the main API endpoint"""
         | 
| 23 | 
            +
                try:
         | 
| 24 | 
            +
                    url = f"{base_url}/api/v1/hackrx/run"
         | 
| 25 | 
            +
                    headers = {
         | 
| 26 | 
            +
                        "Content-Type": "application/json",
         | 
| 27 | 
            +
                        "Authorization": f"Bearer {api_key}"
         | 
| 28 | 
            +
                    }
         | 
| 29 | 
            +
                    data = {
         | 
| 30 | 
            +
                        "documents": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
         | 
| 31 | 
            +
                        "questions": ["What is this document about?"]
         | 
| 32 | 
            +
                    }
         | 
| 33 | 
            +
                    
         | 
| 34 | 
            +
                    response = requests.post(url, headers=headers, json=data)
         | 
| 35 | 
            +
                    print(f"API test status: {response.status_code}")
         | 
| 36 | 
            +
                    print(f"Response: {response.json()}")
         | 
| 37 | 
            +
                    return response.status_code == 200
         | 
| 38 | 
            +
                except Exception as e:
         | 
| 39 | 
            +
                    print(f"API test failed: {e}")
         | 
| 40 | 
            +
                    return False
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            def main():
         | 
| 43 | 
            +
                if len(sys.argv) < 2:
         | 
| 44 | 
            +
                    print("Usage: python test_deployment.py <base_url> [api_key]")
         | 
| 45 | 
            +
                    print("Example: python test_deployment.py https://your-space-name.hf.space your_api_key")
         | 
| 46 | 
            +
                    sys.exit(1)
         | 
| 47 | 
            +
                
         | 
| 48 | 
            +
                base_url = sys.argv[1].rstrip('/')
         | 
| 49 | 
            +
                api_key = sys.argv[2] if len(sys.argv) > 2 else "test_token"
         | 
| 50 | 
            +
                
         | 
| 51 | 
            +
                print(f"Testing deployment at: {base_url}")
         | 
| 52 | 
            +
                print("=" * 50)
         | 
| 53 | 
            +
                
         | 
| 54 | 
            +
                # Test health check
         | 
| 55 | 
            +
                print("1. Testing health check...")
         | 
| 56 | 
            +
                health_ok = test_health_check(base_url)
         | 
| 57 | 
            +
                
         | 
| 58 | 
            +
                # Test API endpoint
         | 
| 59 | 
            +
                print("\n2. Testing API endpoint...")
         | 
| 60 | 
            +
                api_ok = test_api_endpoint(base_url, api_key)
         | 
| 61 | 
            +
                
         | 
| 62 | 
            +
                # Summary
         | 
| 63 | 
            +
                print("\n" + "=" * 50)
         | 
| 64 | 
            +
                print("DEPLOYMENT TEST SUMMARY")
         | 
| 65 | 
            +
                print("=" * 50)
         | 
| 66 | 
            +
                print(f"Health check: {'✅ PASS' if health_ok else '❌ FAIL'}")
         | 
| 67 | 
            +
                print(f"API endpoint: {'✅ PASS' if api_ok else '❌ FAIL'}")
         | 
| 68 | 
            +
                
         | 
| 69 | 
            +
                if health_ok and api_ok:
         | 
| 70 | 
            +
                    print("\n🎉 Deployment is working correctly!")
         | 
| 71 | 
            +
                else:
         | 
| 72 | 
            +
                    print("\n⚠️  Some tests failed. Check the logs above for details.")
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            if __name__ == "__main__":
         | 
| 75 | 
            +
                main() 
         | 
