NCTC_OSINT

Sleeping

App Files Files Community

Canstralian commited on Jan 11

Commit

4315682

verified ·

1 Parent(s): c053342

Update app.py

Browse files

Files changed (1) hide show

app.py +130 -9

app.py CHANGED Viewed

@@ -1,23 +1,30 @@
 import yaml
 import streamlit as st
 from components.sidebar import sidebar
 from components.chat_box import chat_box
 from components.chat_loop import chat_loop
 from components.init_state import init_state
 from components.prompt_engineering_dashboard import prompt_engineering_dashboard
 with open("config.yaml", "r") as file:
     config = yaml.safe_load(file)
 st.set_page_config(
     page_title="( -_•)▄︻テ═一💥 (´༎ຶٹ༎ຶ)NCTC OSINT AGENT BY TRHACKNON ╭∩╮( •̀_•́ )╭∩╮",
     page_icon="𓃮",
 )
 init_state(st.session_state, config)
 html_title = '''
 <style>
 .stTitle {
@@ -30,16 +37,130 @@ html_title = '''
 <h1 class="stTitle">( -_•)▄︻テ═一💥(´༎ຶٹ༎ຶ)NCTC OSINT AGENT💥╾━╤デ╦︻(•̀⤙•́)</h1>
 '''
 st.write(html_title, unsafe_allow_html=True)
-# st.write("# NCTC OSINT AGENT ")
-# Prompt Engineering Dashboard is working but not for production, works great for testing.
-prompt_engineering_dashboard(st.session_state, config)
-sidebar(st.session_state, config)
-chat_box(st.session_state, config)
-chat_loop(st.session_state, config)

 import yaml
 import streamlit as st
+import requests
+from bs4 import BeautifulSoup
+import pandas as pd
+from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
+from datasets import load_dataset, Dataset
 from components.sidebar import sidebar
 from components.chat_box import chat_box
 from components.chat_loop import chat_loop
 from components.init_state import init_state
 from components.prompt_engineering_dashboard import prompt_engineering_dashboard
+# Load config.yaml
 with open("config.yaml", "r") as file:
     config = yaml.safe_load(file)
+# Streamlit page configuration
 st.set_page_config(
     page_title="( -_•)▄︻テ═一💥 (´༎ຶٹ༎ຶ)NCTC OSINT AGENT BY TRHACKNON ╭∩╮( •̀_•́ )╭∩╮",
     page_icon="𓃮",
 )
+# Initialize session state
 init_state(st.session_state, config)
+# Custom HTML for title styling
 html_title = '''
 <style>
 .stTitle {
 <h1 class="stTitle">( -_•)▄︻テ═一💥(´༎ຶٹ༎ຶ)NCTC OSINT AGENT💥╾━╤デ╦︻(•̀⤙•́)</h1>
 '''
+# Display HTML title
 st.write(html_title, unsafe_allow_html=True)
+# OSINT functions
+def get_github_stars_forks(owner, repo):
+    url = f"https://api.github.com/repos/{owner}/{repo}"
+    response = requests.get(url)
+    data = response.json()
+    return data['stargazers_count'], data['forks_count']
+def get_github_issues(owner, repo):
+    url = f"https://api.github.com/repos/{owner}/{repo}/issues"
+    response = requests.get(url)
+    issues = response.json()
+    return len(issues)
+def get_github_pull_requests(owner, repo):
+    url = f"https://api.github.com/repos/{owner}/{repo}/pulls"
+    response = requests.get(url)
+    pulls = response.json()
+    return len(pulls)
+def get_github_license(owner, repo):
+    url = f"https://api.github.com/repos/{owner}/{repo}/license"
+    response = requests.get(url)
+    data = response.json()
+    return data['license']['name']
+def get_last_commit(owner, repo):
+    url = f"https://api.github.com/repos/{owner}/{repo}/commits"
+    response = requests.get(url)
+    commits = response.json()
+    return commits[0]['commit']['committer']['date']
+def get_github_workflow_status(owner, repo):
+    url = f"https://api.github.com/repos/{owner}/{repo}/actions/runs"
+    response = requests.get(url)
+    runs = response.json()
+    return runs['workflow_runs'][0]['status'] if runs['workflow_runs'] else "No workflows found"
+# Function to fetch page title from a URL
+def fetch_page_title(url):
+    try:
+        response = requests.get(url)
+        st.write(f"Fetching URL: {url} - Status Code: {response.status_code}")
+        if response.status_code == 200:
+            soup = BeautifulSoup(response.text, 'html.parser')
+            title = soup.title.string if soup.title else 'No title found'
+            return title
+        else:
+            return f"Error: Received status code {response.status_code}"
+    except Exception as e:
+        return f"An error occurred: {e}"
+# Main Streamlit app
+def main():
+    # Display Prompt Engineering Dashboard (testing phase)
+    prompt_engineering_dashboard(st.session_state, config)
+    # Display sidebar and chat box
+    sidebar(st.session_state, config)
+    chat_box(st.session_state, config)
+    chat_loop(st.session_state, config)
+    # GitHub OSINT Analysis
+    st.write("### GitHub Repository OSINT Analysis")
+    st.write("Enter the GitHub repository owner and name:")
+    owner = st.text_input("Repository Owner")
+    repo = st.text_input("Repository Name")
+    if owner and repo:
+        stars, forks = get_github_stars_forks(owner, repo)
+        open_issues = get_github_issues(owner, repo)
+        open_pulls = get_github_pull_requests(owner, repo)
+        license_type = get_github_license(owner, repo)
+        last_commit = get_last_commit(owner, repo)
+        workflow_status = get_github_workflow_status(owner, repo)
+        st.write(f"Stars: {stars}, Forks: {forks}")
+        st.write(f"Open Issues: {open_issues}, Open Pull Requests: {open_pulls}")
+        st.write(f"License: {license_type}")
+        st.write(f"Last Commit: {last_commit}")
+        st.write(f"Workflow Status: {workflow_status}")
+    # URL Title Fetcher
+    st.write("### URL Title Fetcher")
+    url = st.text_input("Enter a URL to fetch its title:")
+    if url:
+        title = fetch_page_title(url)
+        st.write(f"Title: {title}")
+    # Dataset Upload & Model Fine-Tuning Section
+    st.write("### Dataset Upload & Model Fine-Tuning")
+    dataset_file = st.file_uploader("Upload a CSV file for fine-tuning", type=["csv"])
+    if dataset_file:
+        df = pd.read_csv(dataset_file)
+        st.dataframe(df.head())
+    st.write("Select a model for fine-tuning:")
+    model_name = st.selectbox("Model", ["bert-base-uncased", "distilbert-base-uncased"])
+    if st.button("Fine-tune Model"):
+        if dataset_file:
+            dataset = Dataset.from_pandas(df)
+            tokenizer = AutoTokenizer.from_pretrained(model_name)
+            model = AutoModelForSequenceClassification.from_pretrained(model_name)
+            def tokenize_function(examples):
+                return tokenizer(examples['text'], padding="max_length", truncation=True)
+            tokenized_datasets = dataset.map(tokenize_function, batched=True)
+            training_args = TrainingArguments(output_dir="./results", num_train_epochs=1, per_device_train_batch_size=8)
+            trainer = Trainer(model=model, args=training_args, train_dataset=tokenized_datasets)
+            trainer.train()
+            st.write("Model fine-tuned successfully!")
+    # Load and display OSINT dataset
+    st.write("### OSINT Dataset")
+    dataset = load_dataset("originalbox/osint")  # Replace with the correct dataset name
+    # Convert to pandas DataFrame for display
+    df = dataset['train'].to_pandas()  # Make sure to use the appropriate split ('train', 'test', etc.)
+    st.write(df.head())
+if __name__ == "__main__":
+    main()