File size: 1,421 Bytes
1a3b3aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import re
import ast
import requests
from typing import Union

def _find_nctid(text: str) -> Union[str,None]:
    "return nct string if found in text else none"
    match = re.search(r"[Nn][Cc][Tt]0*[1-9]\d{0,7}", text)
    return match[0] if match is not None else match

def _get_registry_outcomes(nct_id: str) -> Union[dict,None]:
    outcomes = None
    r = requests.get(f"https://clinicaltrials.gov/api/v2/studies/{nct_id}", params={"fields":"OutcomesModule"})
    if r.status_code == 200 and "outcomesModule" in r.json()["protocolSection"]:
        outcomes = ast.literal_eval(r.text)["protocolSection"]["outcomesModule"]
    return outcomes

def _reformat_outcomes(outcomes: dict) -> list[dict[str,str]]:
    new_outcomes = []
    for outcome_type, outcome_list in outcomes.items() :
        outcome_type = outcome_type.replace("Outcomes","")
        for outcome_item in outcome_list :
            outcome_item["type"] = outcome_type
            new_outcomes.append(outcome_item)
    return new_outcomes

def extract_nct_outcomes(text:str) -> Union[None,list[dict[str,str]]]:
    """Extract outcomes from a text using CTGOV APIV2 if a nct id is found else return None"""
    outcomes = None
    if text is None : 
        return outcomes
    nct_id = _find_nctid(text)
    if nct_id is not None:
        outcomes = _get_registry_outcomes(nct_id)
        outcomes = _reformat_outcomes(outcomes)
    return outcomes