Spaces:

Rivalcoder
/

Issurance_Agent_Rag

Runtime error

App Files Files Community

Rivalcoder commited on 1 day ago

Commit

836bc0e

1 Parent(s): 6dd4fed

New Version Updated

Browse files

Files changed (11) hide show

pdf_parser.py → Old_Files/pdf_parser.py +0 -0
app.py +77 -66
content_readers/__init__.py +52 -0
content_readers/image_extractor.py +11 -0
content_readers/pdf_extractor.py +42 -0
content_readers/web_extractor.py +11 -0
content_readers/zip_extractor.py +39 -0
db_logger.py +47 -0
embedder.py +1 -1
requirements.txt +1 -0
utils.py +10 -0

pdf_parser.py → Old_Files/pdf_parser.py RENAMED Viewed

File without changes

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import logging
 import time
 import json
 import hashlib
-from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor
 from threading import Lock
 import re
@@ -13,7 +12,6 @@ import re
 cache_dir = os.path.join(os.getcwd(), ".cache")
 os.makedirs(cache_dir, exist_ok=True)
 os.environ['HF_HOME'] = cache_dir
-os.environ['TRANSFORMERS_CACHE'] = cache_dir
 # Suppress TensorFlow warnings
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
@@ -24,16 +22,41 @@ os.environ['TF_ENABLE_DEPRECATION_WARNINGS'] = '0'
 warnings.filterwarnings('ignore', category=DeprecationWarning, module='tensorflow')
 logging.getLogger('tensorflow').setLevel(logging.ERROR)
-from fastapi import FastAPI, HTTPException, Depends, Header, Query
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from pdf_parser import parse_pdf_from_url_multithreaded as parse_pdf_from_url, parse_pdf_from_file_multithreaded as parse_pdf_from_file
 from embedder import build_faiss_index, preload_model
 from retriever import retrieve_chunks
 from llm import query_gemini
 import uvicorn
-app = FastAPI(title="HackRx Insurance Policy Assistant", version="1.0.0")
 app.add_middleware(
     CORSMiddleware,
@@ -43,12 +66,6 @@ app.add_middleware(
     allow_headers=["*"],
 )
-@app.on_event("startup")
-async def startup_event():
-    print("Starting up HackRx Insurance Policy Assistant...")
-    print("Preloading sentence transformer model...")
-    preload_model()
-    print("Model preloading completed. API is ready to serve requests!")
 @app.get("/")
 async def root():
@@ -58,6 +75,7 @@ async def root():
 async def health_check():
     return {"status": "healthy"}
 class QueryRequest(BaseModel):
     documents: str
     questions: list[str]
@@ -66,6 +84,7 @@ class LocalQueryRequest(BaseModel):
     document_path: str
     questions: list[str]
 def verify_token(authorization: str = Header(None)):
     if not authorization or not authorization.startswith("Bearer "):
         raise HTTPException(status_code=401, detail="Invalid authorization header")
@@ -83,25 +102,17 @@ def get_document_id_from_url(url: str) -> str:
 def question_has_https_link(q: str) -> bool:
     return bool(re.search(r"https://[^\s]+", q))
 # Document cache with thread safety
 doc_cache = {}
 doc_cache_lock = Lock()
-# ----------------- CACHE CLEAR ENDPOINT -----------------
 @app.delete("/api/v1/cache/clear")
-async def clear_cache(doc_id: str = Query(None, description="Optional document ID to clear"),
-                      url: str = Query(None, description="Optional document URL to clear"),
-                      doc_only: bool = Query(False, description="If true, only clear document cache")):
-    """
-    Clear cache data.
-    - No params: Clears ALL caches.
-    - doc_id: Clears caches for that document only.
-    - url: Same as doc_id but computed automatically from URL.
-    - doc_only: Clears only document cache.
-    """
     cleared = {}
-    # If URL is provided, convert to doc_id
     if url:
         doc_id = get_document_id_from_url(url)
@@ -119,19 +130,20 @@ async def clear_cache(doc_id: str = Query(None, description="Optional document I
     return {"status": "success", "cleared": cleared}
 @app.post("/api/v1/hackrx/run")
-async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
     start_time = time.time()
     timing_data = {}
     try:
         print("=== INPUT JSON ===")
         print(json.dumps({"documents": request.documents, "questions": request.questions}, indent=2))
         print("==================\n")
-        print(f"Processing {len(request.questions)} questions...")
-        # PDF Parsing and FAISS Caching (keep document caching for speed)
-        doc_id = get_document_id_from_url(request.documents)
         with doc_cache_lock:
             if doc_id in doc_cache:
                 print("✅ Using cached document...")
@@ -142,7 +154,7 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
             else:
                 print("⚙️ Parsing and indexing new document...")
                 pdf_start = time.time()
-                text_chunks = parse_pdf_from_url(request.documents)
                 timing_data['pdf_parsing'] = round(time.time() - pdf_start, 2)
                 index_start = time.time()
@@ -155,18 +167,13 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
                     "texts": texts
                 }
-        # Retrieve chunks for all questions — no QA caching
         retrieval_start = time.time()
         all_chunks = set()
-        question_positions = {}
         for idx, question in enumerate(request.questions):
             top_chunks = retrieve_chunks(index, texts, question)
             all_chunks.update(top_chunks)
-            question_positions.setdefault(question, []).append(idx)
         timing_data['chunk_retrieval'] = round(time.time() - retrieval_start, 2)
-        print(f"Retrieved {len(all_chunks)} unique chunks for all questions")
-        # Query Gemini LLM fresh for all questions
         context_chunks = list(all_chunks)
         batch_size = 10
         batches = [(i, request.questions[i:i + batch_size]) for i in range(0, len(request.questions), batch_size)]
@@ -190,38 +197,41 @@ async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
         timing_data['llm_processing'] = round(time.time() - llm_start, 2)
         responses = [results_dict.get(i, "Not Found") for i in range(len(request.questions))]
-        timing_data['total_time'] = round(time.time() - start_time, 2)
-        print(f"\n=== TIMING BREAKDOWN ===")
-        for k, v in timing_data.items():
-            print(f"{k}: {v}s")
-        print(f"=======================\n")
-        print(f"=== OUTPUT JSON ===")
-        print(json.dumps({"answers": responses}, indent=2))
-        print(f"==================\n")
         return {"answers": responses}
     except Exception as e:
-        print(f"Error: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 @app.post("/api/v1/hackrx/local")
-async def run_local_query(request: LocalQueryRequest):
     start_time = time.time()
     timing_data = {}
     try:
         print("=== INPUT JSON ===")
         print(json.dumps({"document_path": request.document_path, "questions": request.questions}, indent=2))
         print("==================\n")
-        print(f"Processing {len(request.questions)} questions locally...")
         pdf_start = time.time()
-        text_chunks = parse_pdf_from_file(request.document_path)
         timing_data['pdf_parsing'] = round(time.time() - pdf_start, 2)
-        print(f"Extracted {len(text_chunks)} text chunks from PDF")
         index_start = time.time()
         index, texts = build_faiss_index(text_chunks)
@@ -233,12 +243,10 @@ async def run_local_query(request: LocalQueryRequest):
             top_chunks = retrieve_chunks(index, texts, question)
             all_chunks.update(top_chunks)
         timing_data['chunk_retrieval'] = round(time.time() - retrieval_start, 2)
-        print(f"Retrieved {len(all_chunks)} unique chunks")
-        questions = request.questions
         context_chunks = list(all_chunks)
         batch_size = 20
-        batches = [(i, questions[i:i + batch_size]) for i in range(0, len(questions), batch_size)]
         llm_start = time.time()
         results_dict = {}
@@ -258,24 +266,27 @@ async def run_local_query(request: LocalQueryRequest):
                         results_dict[start_idx + j] = f"Error: {str(e)}"
         timing_data['llm_processing'] = round(time.time() - llm_start, 2)
-        responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
-        timing_data['total_time'] = round(time.time() - start_time, 2)
-        print(f"\n=== TIMING BREAKDOWN ===")
-        for k, v in timing_data.items():
-            print(f"{k}: {v}s")
-        print(f"=======================\n")
-        print(f"=== OUTPUT JSON ===")
-        print(json.dumps({"answers": responses}, indent=2))
-        print(f"==================\n")
         return {"answers": responses}
     except Exception as e:
-        print(f"Error: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
     uvicorn.run("app:app", host="0.0.0.0", port=port)

 import time
 import json
 import hashlib
 from concurrent.futures import ThreadPoolExecutor
 from threading import Lock
 import re
 cache_dir = os.path.join(os.getcwd(), ".cache")
 os.makedirs(cache_dir, exist_ok=True)
 os.environ['HF_HOME'] = cache_dir
 # Suppress TensorFlow warnings
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 warnings.filterwarnings('ignore', category=DeprecationWarning, module='tensorflow')
 logging.getLogger('tensorflow').setLevel(logging.ERROR)
+from fastapi import FastAPI, HTTPException, Depends, Header, Query, Request
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+from content_readers import parse_document_url, parse_document_file
 from embedder import build_faiss_index, preload_model
 from retriever import retrieve_chunks
 from llm import query_gemini
 import uvicorn
+from contextlib import asynccontextmanager
+# Import Supabase logger
+from db_logger import log_query
+# Helper to get real client IP
+def get_client_ip(request: Request):
+    forwarded_for = request.headers.get("x-forwarded-for")
+    if forwarded_for:
+        return forwarded_for.split(",")[0].strip()
+    real_ip = request.headers.get("x-real-ip")
+    if real_ip:
+        return real_ip
+    return request.client.host
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    print("Starting up HackRx Insurance Policy Assistant...")
+    print("Preloading sentence transformer model...")
+    preload_model()
+    print("Model preloading completed. API is ready to serve requests!")
+    yield
+app = FastAPI(title="HackRx Insurance Policy Assistant", version="3.2.6", lifespan=lifespan)
 app.add_middleware(
     CORSMiddleware,
     allow_headers=["*"],
 )
 @app.get("/")
 async def root():
 async def health_check():
     return {"status": "healthy"}
 class QueryRequest(BaseModel):
     documents: str
     questions: list[str]
     document_path: str
     questions: list[str]
 def verify_token(authorization: str = Header(None)):
     if not authorization or not authorization.startswith("Bearer "):
         raise HTTPException(status_code=401, detail="Invalid authorization header")
 def question_has_https_link(q: str) -> bool:
     return bool(re.search(r"https://[^\s]+", q))
 # Document cache with thread safety
 doc_cache = {}
 doc_cache_lock = Lock()
 @app.delete("/api/v1/cache/clear")
+async def clear_cache(doc_id: str = Query(None),
+                      url: str = Query(None),
+                      doc_only: bool = Query(False)):
     cleared = {}
     if url:
         doc_id = get_document_id_from_url(url)
     return {"status": "success", "cleared": cleared}
 @app.post("/api/v1/hackrx/run")
+async def run_query(request: QueryRequest, fastapi_request: Request, token: str = Depends(verify_token)):
     start_time = time.time()
     timing_data = {}
     try:
+        user_ip = get_client_ip(fastapi_request)
+        user_agent = fastapi_request.headers.get("user-agent", "Unknown")
         print("=== INPUT JSON ===")
         print(json.dumps({"documents": request.documents, "questions": request.questions}, indent=2))
         print("==================\n")
+        doc_id = get_document_id_from_url(request.documents or "")
         with doc_cache_lock:
             if doc_id in doc_cache:
                 print("✅ Using cached document...")
             else:
                 print("⚙️ Parsing and indexing new document...")
                 pdf_start = time.time()
+                text_chunks = parse_document_url(request.documents)
                 timing_data['pdf_parsing'] = round(time.time() - pdf_start, 2)
                 index_start = time.time()
                     "texts": texts
                 }
         retrieval_start = time.time()
         all_chunks = set()
         for idx, question in enumerate(request.questions):
             top_chunks = retrieve_chunks(index, texts, question)
             all_chunks.update(top_chunks)
         timing_data['chunk_retrieval'] = round(time.time() - retrieval_start, 2)
         context_chunks = list(all_chunks)
         batch_size = 10
         batches = [(i, request.questions[i:i + batch_size]) for i in range(0, len(request.questions), batch_size)]
         timing_data['llm_processing'] = round(time.time() - llm_start, 2)
         responses = [results_dict.get(i, "Not Found") for i in range(len(request.questions))]
+        total_time = time.time() - start_time
+        timing_data['total_time'] = round(total_time, 2)
+        # Log to Supabase with user_agent + geo_location
+        for q, a in zip(request.questions, responses):
+            log_query(
+                document_source=request.documents or "UNKNOWN",
+                question=q,
+                answer=a,
+                ip_address=user_ip,
+                user_agent=user_agent,
+                response_time=total_time
+            )
         return {"answers": responses}
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 @app.post("/api/v1/hackrx/local")
+async def run_local_query(request: LocalQueryRequest, fastapi_request: Request):
     start_time = time.time()
     timing_data = {}
     try:
+        user_ip = get_client_ip(fastapi_request)
+        user_agent = fastapi_request.headers.get("user-agent", "Unknown")
         print("=== INPUT JSON ===")
         print(json.dumps({"document_path": request.document_path, "questions": request.questions}, indent=2))
         print("==================\n")
         pdf_start = time.time()
+        text_chunks = parse_document_file(request.document_path)
         timing_data['pdf_parsing'] = round(time.time() - pdf_start, 2)
         index_start = time.time()
         index, texts = build_faiss_index(text_chunks)
             top_chunks = retrieve_chunks(index, texts, question)
             all_chunks.update(top_chunks)
         timing_data['chunk_retrieval'] = round(time.time() - retrieval_start, 2)
         context_chunks = list(all_chunks)
         batch_size = 20
+        batches = [(i, request.questions[i:i + batch_size]) for i in range(0, len(request.questions), batch_size)]
         llm_start = time.time()
         results_dict = {}
                         results_dict[start_idx + j] = f"Error: {str(e)}"
         timing_data['llm_processing'] = round(time.time() - llm_start, 2)
+        responses = [results_dict.get(i, "Not Found") for i in range(len(request.questions))]
+        total_time = time.time() - start_time
+        timing_data['total_time'] = round(total_time, 2)
+        # Log to Supabase with user_agent + geo_location
+        for q, a in zip(request.questions, responses):
+            log_query(
+                document_source=request.document_path or "UNKNOWN",
+                question=q,
+                answer=a,
+                ip_address=user_ip,
+                user_agent=user_agent,
+                response_time=total_time
+            )
         return {"answers": responses}
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
     uvicorn.run("app:app", host="0.0.0.0", port=port)

content_readers/__init__.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from io import BytesIO
+import requests
+import os
+from .pdf_extractor import parse_pdf_from_url_multithreaded, parse_pdf_from_file_multithreaded
+from .image_extractor import is_image, extract_text_from_image_bytes
+from .web_extractor import extract_text_from_html
+from .zip_extractor import extract_from_zip_bytes
+def parse_document_url(url):
+    try:
+        res = requests.get(url)
+        content = res.content
+        content_type = res.headers.get("content-type", "").lower()
+    except Exception as e:
+        return [f"Download error: {str(e)}"]
+    if "text/html" in content_type or url.endswith(".html"):
+        return extract_text_from_html(content)
+    if "zip" in content_type or url.endswith(".zip"):
+        zip_results = extract_from_zip_bytes(content)
+        return [f"{name}: {text}" for name, texts in zip_results.items() for text in texts]
+    if "image" in content_type or is_image(content):
+        text = extract_text_from_image_bytes(content)
+        return [text] if text else ["No data found (image empty)"]
+    if "pdf" in content_type or url.endswith(".pdf"):
+        return parse_pdf_from_url_multithreaded(BytesIO(content))
+    return ["Unsupported file type"]
+def parse_document_file(file_path):
+    if file_path.lower().endswith(".zip"):
+        with open(file_path, "rb") as f:
+            zip_results = extract_from_zip_bytes(f.read())
+        return [f"{name}: {text}" for name, texts in zip_results.items() for text in texts]
+    if file_path.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff", ".webp")):
+        with open(file_path, "rb") as f:
+            text = extract_text_from_image_bytes(f.read())
+        return [text] if text else ["No data found (image empty)"]
+    if file_path.lower().endswith(".pdf"):
+        return parse_pdf_from_file_multithreaded(file_path)
+    if file_path.lower().endswith(".html"):
+        with open(file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+        return extract_text_from_html(content)
+    return ["Unsupported file type"]

content_readers/image_extractor.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import imghdr
+from PIL import Image
+import pytesseract
+from io import BytesIO
+def is_image(content):
+    return imghdr.what(None, h=content) in ["jpeg", "png", "bmp", "gif", "tiff", "webp"]
+def extract_text_from_image_bytes(image_bytes):
+    image = Image.open(BytesIO(image_bytes))
+    return pytesseract.image_to_string(image).strip()

content_readers/pdf_extractor.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import fitz  # PyMuPDF
+from concurrent.futures import ThreadPoolExecutor
+def _extract_text(page):
+    text = page.get_text()
+    return text.strip() if text and text.strip() else None
+def parse_pdf_from_url_multithreaded(content, max_workers=2, chunk_size=1):
+    try:
+        with fitz.open(stream=content, filetype="pdf") as doc:
+            pages = list(doc)
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                texts = list(executor.map(_extract_text, pages))
+            if chunk_size > 1:
+                chunks = []
+                for i in range(0, len(texts), chunk_size):
+                    chunk = ' '.join([t for t in texts[i:i+chunk_size] if t])
+                    if chunk:
+                        chunks.append(chunk)
+                return chunks if chunks else ["No data found in this document (empty PDF)"]
+            return [t for t in texts if t] or ["No data found in this document (empty PDF)"]
+    except Exception as e:
+        print(f"❌ Failed to parse as PDF: {str(e)}")
+        return [f"No data found in this document (not PDF or corrupted)"]
+def parse_pdf_from_file_multithreaded(file_path, max_workers=2, chunk_size=1):
+    try:
+        with fitz.open(file_path) as doc:
+            pages = list(doc)
+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                texts = list(executor.map(_extract_text, pages))
+            if chunk_size > 1:
+                chunks = []
+                for i in range(0, len(texts), chunk_size):
+                    chunk = ' '.join([t for t in texts[i:i+chunk_size] if t])
+                    if chunk:
+                        chunks.append(chunk)
+                return chunks if chunks else ["No data found in this document (local PDF empty)"]
+            return [t for t in texts if t] or ["No data found in this document (local PDF empty)"]
+    except Exception as e:
+        print(f"❌ Failed to open local file: {str(e)}")
+        return [f"No data found in this document (local file error)"]

content_readers/web_extractor.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from bs4 import BeautifulSoup
+def extract_text_from_html(content):
+    try:
+        soup = BeautifulSoup(content, "html.parser")
+        text = soup.get_text(separator="\n")
+        lines = [t.strip() for t in text.splitlines() if t.strip()]
+        return lines if lines else ["No data found in this document (empty HTML)"]
+    except Exception as e:
+        print(f"❌ HTML parse failed: {str(e)}")
+        return [f"No data found in this document (HTML error)"]

content_readers/zip_extractor.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import zipfile
+from io import BytesIO
+from .pdf_extractor import parse_pdf_from_url_multithreaded
+from .image_extractor import is_image, extract_text_from_image_bytes
+def extract_from_zip_bytes(zip_bytes):
+    """
+    Extract and process files inside a ZIP archive.
+    Returns a dictionary: {filename: extracted_text_list}
+    """
+    results = {}
+    try:
+        with zipfile.ZipFile(BytesIO(zip_bytes)) as z:
+            for file_name in z.namelist():
+                try:
+                    file_data = z.read(file_name)
+                except Exception as e:
+                    results[file_name] = [f"❌ Failed to read file: {e}"]
+                    continue
+                # PDF files
+                if file_name.lower().endswith(".pdf"):
+                    results[file_name] = parse_pdf_from_url_multithreaded(BytesIO(file_data))
+                # Image files
+                elif is_image(file_data):
+                    text = extract_text_from_image_bytes(file_data)
+                    results[file_name] = [text] if text else ["No data found (image empty)"]
+                # Unsupported files
+                else:
+                    results[file_name] = ["⚠ Unsupported file type inside ZIP"]
+        return results if results else {"ZIP": ["No supported files found in archive"]}
+    except zipfile.BadZipFile:
+        return {"ZIP": ["Invalid or corrupted ZIP file"]}
+    except Exception as e:
+        return {"ZIP": [f"Error processing ZIP: {e}"]}

db_logger.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+from datetime import datetime
+from supabase import create_client, Client
+import requests
+SUPABASE_URL = os.getenv("SUPABASE_URL")
+SUPABASE_KEY = os.getenv("SUPABASE_KEY")
+supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
+def get_geo_location(ip: str) -> str:
+    """
+    Fetch approximate geo-location for the given IP address.
+    Returns 'Unknown' if lookup fails.
+    """
+    try:
+        if ip.startswith("127.") or ip == "localhost":
+            return "Localhost"
+        resp = requests.get(f"https://ipapi.co/{ip}/country_name/", timeout=3)
+        if resp.status_code == 200:
+            return resp.text.strip() or "Unknown"
+    except Exception:
+        pass
+    return "Unknown"
+def log_query(document_source: str, question: str, answer: str,
+              ip_address: str, response_time: float,
+              user_agent: str = None):
+    """
+    Store a question-answer log in Supabase with geo-location and user-agent.
+    """
+    now_str = datetime.utcnow().isoformat()
+    geo_location = get_geo_location(ip_address)
+    try:
+        supabase.table("qa_logs").insert({
+            "document_source": document_source,
+            "question": question,
+            "answer": answer,
+            "ip_address": ip_address,
+            "geo_location": geo_location,
+            "user_agent": user_agent or "Unknown",
+            "response_time_sec": round(response_time, 2),
+            "created_at": now_str
+        }).execute()
+    except Exception as e:
+        print(f"Failed to log query to Supabase: {e}")

embedder.py CHANGED Viewed

@@ -24,7 +24,7 @@ def preload_model(model_name="paraphrase-MiniLM-L3-v2"):
         print(f"Trying fallback: {fallback_name}")
         _model = SentenceTransformer(fallback_name, cache_folder=cache_dir)
-    print("✅ Model ready.")
     return _model
 def get_model():

         print(f"Trying fallback: {fallback_name}")
         _model = SentenceTransformer(fallback_name, cache_folder=cache_dir)
+    print(" 👍 Model ready.")
     return _model
 def get_model():

requirements.txt CHANGED Viewed

@@ -10,3 +10,4 @@ google-generativeai
 pytesseract
 Pillow
 beautifulsoup4

 pytesseract
 Pillow
 beautifulsoup4
+supabase

utils.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from fastapi import Request
+def get_client_ip(request: Request):
+    forwarded_for = request.headers.get("x-forwarded-for")
+    if forwarded_for:
+        return forwarded_for.split(",")[0].strip()
+    real_ip = request.headers.get("x-real-ip")
+    if real_ip:
+        return real_ip
+    return request.client.host