Canstralian commited on
Commit
4315682
·
verified ·
1 Parent(s): c053342

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -9
app.py CHANGED
@@ -1,23 +1,30 @@
1
  import yaml
2
  import streamlit as st
 
 
 
 
 
3
  from components.sidebar import sidebar
4
  from components.chat_box import chat_box
5
  from components.chat_loop import chat_loop
6
  from components.init_state import init_state
7
  from components.prompt_engineering_dashboard import prompt_engineering_dashboard
8
 
9
-
10
-
11
  with open("config.yaml", "r") as file:
12
  config = yaml.safe_load(file)
13
 
 
14
  st.set_page_config(
15
  page_title="( -_•)▄︻テ═一💥 (´༎ຶٹ༎ຶ)NCTC OSINT AGENT BY TRHACKNON ╭∩╮( •̀_•́ )╭∩╮",
16
  page_icon="𓃮",
17
  )
18
 
19
-
20
  init_state(st.session_state, config)
 
 
21
  html_title = '''
22
  <style>
23
  .stTitle {
@@ -30,16 +37,130 @@ html_title = '''
30
  <h1 class="stTitle">( -_•)▄︻テ═一💥(´༎ຶٹ༎ຶ)NCTC OSINT AGENT💥╾━╤デ╦︻(•̀⤙•́)</h1>
31
  '''
32
 
 
33
  st.write(html_title, unsafe_allow_html=True)
34
 
35
- # st.write("# NCTC OSINT AGENT ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- # Prompt Engineering Dashboard is working but not for production, works great for testing.
38
- prompt_engineering_dashboard(st.session_state, config)
 
 
 
39
 
 
 
40
 
41
- sidebar(st.session_state, config)
 
 
 
 
42
 
43
- chat_box(st.session_state, config)
 
 
 
 
 
 
44
 
45
- chat_loop(st.session_state, config)
 
 
1
  import yaml
2
  import streamlit as st
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ import pandas as pd
6
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
7
+ from datasets import load_dataset, Dataset
8
  from components.sidebar import sidebar
9
  from components.chat_box import chat_box
10
  from components.chat_loop import chat_loop
11
  from components.init_state import init_state
12
  from components.prompt_engineering_dashboard import prompt_engineering_dashboard
13
 
14
+ # Load config.yaml
 
15
  with open("config.yaml", "r") as file:
16
  config = yaml.safe_load(file)
17
 
18
+ # Streamlit page configuration
19
  st.set_page_config(
20
  page_title="( -_•)▄︻テ═一💥 (´༎ຶٹ༎ຶ)NCTC OSINT AGENT BY TRHACKNON ╭∩╮( •̀_•́ )╭∩╮",
21
  page_icon="𓃮",
22
  )
23
 
24
+ # Initialize session state
25
  init_state(st.session_state, config)
26
+
27
+ # Custom HTML for title styling
28
  html_title = '''
29
  <style>
30
  .stTitle {
 
37
  <h1 class="stTitle">( -_•)▄︻テ═一💥(´༎ຶٹ༎ຶ)NCTC OSINT AGENT💥╾━╤デ╦︻(•̀⤙•́)</h1>
38
  '''
39
 
40
+ # Display HTML title
41
  st.write(html_title, unsafe_allow_html=True)
42
 
43
+ # OSINT functions
44
+ def get_github_stars_forks(owner, repo):
45
+ url = f"https://api.github.com/repos/{owner}/{repo}"
46
+ response = requests.get(url)
47
+ data = response.json()
48
+ return data['stargazers_count'], data['forks_count']
49
+
50
+ def get_github_issues(owner, repo):
51
+ url = f"https://api.github.com/repos/{owner}/{repo}/issues"
52
+ response = requests.get(url)
53
+ issues = response.json()
54
+ return len(issues)
55
+
56
+ def get_github_pull_requests(owner, repo):
57
+ url = f"https://api.github.com/repos/{owner}/{repo}/pulls"
58
+ response = requests.get(url)
59
+ pulls = response.json()
60
+ return len(pulls)
61
+
62
+ def get_github_license(owner, repo):
63
+ url = f"https://api.github.com/repos/{owner}/{repo}/license"
64
+ response = requests.get(url)
65
+ data = response.json()
66
+ return data['license']['name']
67
+
68
+ def get_last_commit(owner, repo):
69
+ url = f"https://api.github.com/repos/{owner}/{repo}/commits"
70
+ response = requests.get(url)
71
+ commits = response.json()
72
+ return commits[0]['commit']['committer']['date']
73
+
74
+ def get_github_workflow_status(owner, repo):
75
+ url = f"https://api.github.com/repos/{owner}/{repo}/actions/runs"
76
+ response = requests.get(url)
77
+ runs = response.json()
78
+ return runs['workflow_runs'][0]['status'] if runs['workflow_runs'] else "No workflows found"
79
+
80
+ # Function to fetch page title from a URL
81
+ def fetch_page_title(url):
82
+ try:
83
+ response = requests.get(url)
84
+ st.write(f"Fetching URL: {url} - Status Code: {response.status_code}")
85
+ if response.status_code == 200:
86
+ soup = BeautifulSoup(response.text, 'html.parser')
87
+ title = soup.title.string if soup.title else 'No title found'
88
+ return title
89
+ else:
90
+ return f"Error: Received status code {response.status_code}"
91
+ except Exception as e:
92
+ return f"An error occurred: {e}"
93
+
94
+ # Main Streamlit app
95
+ def main():
96
+ # Display Prompt Engineering Dashboard (testing phase)
97
+ prompt_engineering_dashboard(st.session_state, config)
98
+
99
+ # Display sidebar and chat box
100
+ sidebar(st.session_state, config)
101
+ chat_box(st.session_state, config)
102
+ chat_loop(st.session_state, config)
103
+
104
+ # GitHub OSINT Analysis
105
+ st.write("### GitHub Repository OSINT Analysis")
106
+ st.write("Enter the GitHub repository owner and name:")
107
+
108
+ owner = st.text_input("Repository Owner")
109
+ repo = st.text_input("Repository Name")
110
+
111
+ if owner and repo:
112
+ stars, forks = get_github_stars_forks(owner, repo)
113
+ open_issues = get_github_issues(owner, repo)
114
+ open_pulls = get_github_pull_requests(owner, repo)
115
+ license_type = get_github_license(owner, repo)
116
+ last_commit = get_last_commit(owner, repo)
117
+ workflow_status = get_github_workflow_status(owner, repo)
118
+
119
+ st.write(f"Stars: {stars}, Forks: {forks}")
120
+ st.write(f"Open Issues: {open_issues}, Open Pull Requests: {open_pulls}")
121
+ st.write(f"License: {license_type}")
122
+ st.write(f"Last Commit: {last_commit}")
123
+ st.write(f"Workflow Status: {workflow_status}")
124
+
125
+ # URL Title Fetcher
126
+ st.write("### URL Title Fetcher")
127
+ url = st.text_input("Enter a URL to fetch its title:")
128
+ if url:
129
+ title = fetch_page_title(url)
130
+ st.write(f"Title: {title}")
131
+
132
+ # Dataset Upload & Model Fine-Tuning Section
133
+ st.write("### Dataset Upload & Model Fine-Tuning")
134
+ dataset_file = st.file_uploader("Upload a CSV file for fine-tuning", type=["csv"])
135
+ if dataset_file:
136
+ df = pd.read_csv(dataset_file)
137
+ st.dataframe(df.head())
138
+
139
+ st.write("Select a model for fine-tuning:")
140
+ model_name = st.selectbox("Model", ["bert-base-uncased", "distilbert-base-uncased"])
141
 
142
+ if st.button("Fine-tune Model"):
143
+ if dataset_file:
144
+ dataset = Dataset.from_pandas(df)
145
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
146
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
147
 
148
+ def tokenize_function(examples):
149
+ return tokenizer(examples['text'], padding="max_length", truncation=True)
150
 
151
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
152
+ training_args = TrainingArguments(output_dir="./results", num_train_epochs=1, per_device_train_batch_size=8)
153
+ trainer = Trainer(model=model, args=training_args, train_dataset=tokenized_datasets)
154
+ trainer.train()
155
+ st.write("Model fine-tuned successfully!")
156
 
157
+ # Load and display OSINT dataset
158
+ st.write("### OSINT Dataset")
159
+ dataset = load_dataset("originalbox/osint") # Replace with the correct dataset name
160
+
161
+ # Convert to pandas DataFrame for display
162
+ df = dataset['train'].to_pandas() # Make sure to use the appropriate split ('train', 'test', etc.)
163
+ st.write(df.head())
164
 
165
+ if __name__ == "__main__":
166
+ main()