File size: 5,088 Bytes
a84c0b0
 
 
0920957
 
 
a84c0b0
0920957
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a84c0b0
0920957
 
 
 
 
a84c0b0
0920957
 
 
 
 
 
 
 
 
 
 
 
 
 
a84c0b0
 
0920957
a84c0b0
 
 
 
 
 
 
 
 
0920957
a84c0b0
 
eb06dda
 
0920957
 
a84c0b0
0920957
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb06dda
0920957
 
a84c0b0
 
 
0920957
eb06dda
0920957
eb06dda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0920957
 
 
 
 
eb06dda
0920957
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a84c0b0
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset, Dataset

# OSINT functions
def get_github_stars_forks(owner, repo):
    url = f"https://api.github.com/repos/{owner}/{repo}"
    response = requests.get(url)
    data = response.json()
    return data['stargazers_count'], data['forks_count']

def get_github_issues(owner, repo):
    url = f"https://api.github.com/repos/{owner}/{repo}/issues"
    response = requests.get(url)
    issues = response.json()
    return len(issues)

def get_github_pull_requests(owner, repo):
    url = f"https://api.github.com/repos/{owner}/{repo}/pulls"
    response = requests.get(url)
    pulls = response.json()
    return len(pulls)

def get_github_license(owner, repo):
    url = f"https://api.github.com/repos/{owner}/{repo}/license"
    response = requests.get(url)
    data = response.json()
    return data['license']['name']

def get_last_commit(owner, repo):
    url = f"https://api.github.com/repos/{owner}/{repo}/commits"
    response = requests.get(url)
    commits = response.json()
    return commits[0]['commit']['committer']['date']

def get_github_workflow_status(owner, repo):
    url = f"https://api.github.com/repos/{owner}/{repo}/actions/runs"
    response = requests.get(url)
    runs = response.json()
    return runs['workflow_runs'][0]['status'] if runs['workflow_runs'] else "No workflows found"

# Function to fetch page title from a URL
def fetch_page_title(url):
    try:
        response = requests.get(url)
        st.write(f"Fetching URL: {url} - Status Code: {response.status_code}")
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            title = soup.title.string if soup.title else 'No title found'
            return title
        else:
            return f"Error: Received status code {response.status_code}"
    except Exception as e:
        return f"An error occurred: {e}"

# Main Streamlit app
def main():
    st.title("OSINT Tool")

    # OSINT Repository Analysis
    st.write("### GitHub Repository OSINT Analysis")
    st.write("Enter the GitHub repository owner and name:")

    owner = st.text_input("Repository Owner")
    repo = st.text_input("Repository Name")

    if owner and repo:
        stars, forks = get_github_stars_forks(owner, repo)
        open_issues = get_github_issues(owner, repo)
        open_pulls = get_github_pull_requests(owner, repo)
        license_type = get_github_license(owner, repo)
        last_commit = get_last_commit(owner, repo)
        workflow_status = get_github_workflow_status(owner, repo)

        st.write(f"Stars: {stars}, Forks: {forks}")
        st.write(f"Open Issues: {open_issues}, Open Pull Requests: {open_pulls}")
        st.write(f"License: {license_type}")
        st.write(f"Last Commit: {last_commit}")
        st.write(f"Workflow Status: {workflow_status}")

    # URL Title Fetcher
    st.write("### URL Title Fetcher")
    url = st.text_input("Enter a URL to fetch its title:")
    if url:
        title = fetch_page_title(url)
        st.write(f"Title: {title}")
    
    # Dataset Upload & Model Fine-Tuning
    st.write("### Dataset Upload & Model Fine-Tuning")
    st.write("#### Available OSINT Datasets for Fine-Tuning:")
    osint_datasets = [
        "gonferspanish/OSINT",
        "Inforensics/missing-persons-clue-analysis-osint",
        "jester6136/osint",
        "originalbox/osint"
    ]
    
    selected_dataset = st.selectbox("Choose a dataset for fine-tuning:", osint_datasets)
    dataset = load_dataset(selected_dataset)

    # Display dataset
    st.write(f"Dataset {selected_dataset} loaded successfully!")
    st.write(f"First few records:")
    st.write(dataset['train'].head())

    # Upload CSV for fine-tuning
    dataset_file = st.file_uploader("Upload a CSV file for fine-tuning", type=["csv"])
    if dataset_file:
        df = pd.read_csv(dataset_file)
        st.dataframe(df.head())

    # Fine-tuning Model Selection
    st.write("Select a model for fine-tuning:")
    model_name = st.selectbox("Model", ["bert-base-uncased", "distilbert-base-uncased"])
    if st.button("Fine-tune Model"):
        if dataset_file:
            dataset = Dataset.from_pandas(df)
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name)

            def tokenize_function(examples):
                return tokenizer(examples['text'], padding="max_length", truncation=True)

            tokenized_datasets = dataset.map(tokenize_function, batched=True)
            training_args = TrainingArguments(output_dir="./results", num_train_epochs=1, per_device_train_batch_size=8)
            trainer = Trainer(model=model, args=training_args, train_dataset=tokenized_datasets)
            trainer.train()
            st.write("Model fine-tuned successfully!")

if __name__ == "__main__":
    main()