Spaces:
Sleeping
Sleeping
File size: 20,887 Bytes
6f61bb9 96a6d6a 6f61bb9 2f6f53f 6f61bb9 103f38e 6f61bb9 a4aef81 6f61bb9 a4aef81 6f61bb9 347bbc4 6f61bb9 a4aef81 6f61bb9 347bbc4 6f61bb9 103f38e 6f61bb9 103f38e 347bbc4 a4aef81 6f61bb9 2f6f53f 6f61bb9 2f6f53f 6f61bb9 347bbc4 6f61bb9 347bbc4 6f61bb9 a4aef81 6f61bb9 a4aef81 6f61bb9 103f38e 6f61bb9 103f38e a4aef81 6f61bb9 a4aef81 6f61bb9 96a6d6a 6f61bb9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 |
import json
from everything2text4prompt.everything2text4prompt import Everything2Text4Prompt
from everything2text4prompt.util import BaseData, YoutubeData, PodcastData
from digester.chatgpt_service import LLMService, ChatGPTService
from digester.util import Prompt, provide_text_with_css, GradioInputs
WAITING_FOR_TARGET_INPUT = "Waiting for target source input"
RESPONSE_SUFFIX = "⚡by DigestEverythingGPT"
class GradioMethodService:
"""
GradioMethodService is defined as gradio functions
Therefore all methods here will fulfill
- gradio.inputs as signature
- gradio.outputs as return
Detailed-level methods called by methods in GradioMethodService will be in other classes (e.g. DigesterService)
"""
@staticmethod
def write_results_to_file(history, file_name=None):
"""
Writes the conversation history to a file in Markdown format.
If no filename is specified, the filename is generated using the current time.
"""
import os, time
if file_name is None:
file_name = 'chatGPT_report' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.md'
os.makedirs('./analyzer_logs/', exist_ok=True)
with open(f'./analyzer_logs/{file_name}', 'w', encoding='utf8') as f:
f.write('# chatGPT report\n')
for i, content in enumerate(history):
try:
if type(content) != str: content = str(content)
except:
continue
if i % 2 == 0:
f.write('## ')
f.write(content)
f.write('\n\n')
res = 'The above material has been written in ' + os.path.abspath(f'./analyzer_logs/{file_name}')
print(res)
return res
@staticmethod
def fetch_and_summarize(apikey_textbox, source_textbox, source_target_textbox, qa_textbox, gpt_model_textbox, language_textbox, chatbot, history):
g_inputs = GradioInputs(apikey_textbox, source_textbox, source_target_textbox, qa_textbox, gpt_model_textbox, language_textbox, chatbot, history)
g_inputs.history = []
g_inputs.chatbot = []
if g_inputs.apikey_textbox == "" or g_inputs.source_textbox == "" or g_inputs.source_target_textbox == "":
LLMService.report_exception(g_inputs.chatbot, g_inputs.history,
chat_input=f"Source target: [{g_inputs.source_textbox}] {g_inputs.source_target_textbox}",
chat_output=f"{provide_text_with_css('ERROR', 'red')} Please provide api key, source and target source")
yield g_inputs.chatbot, g_inputs.history, 'Error', WAITING_FOR_TARGET_INPUT
return
# TODO: invalid input checking
is_success, text_data = yield from DigesterService.fetch_text(g_inputs)
if not is_success:
return # TODO: error handling testing
yield from PromptEngineeringStrategy.execute_prompt_chain(g_inputs, text_data)
@staticmethod
def ask_question(apikey_textbox, source_textbox, target_source_textbox, qa_textbox, gpt_model_textbox, language_textbox, chatbot, history):
g_inputs = GradioInputs(apikey_textbox, source_textbox, target_source_textbox, qa_textbox, gpt_model_textbox, language_textbox, chatbot, history)
prompt = f"ask_question(`{qa_textbox}`)"
g_inputs.chatbot.append((prompt, "Currently the ask feature is not supported yet"))
yield g_inputs.chatbot, g_inputs.history, 'Normal'
@staticmethod
def test_formatting(apikey_textbox, source_textbox, target_source_textbox, qa_textbox, gpt_model_textbox, language_textbox, chatbot, history):
g_inputs = GradioInputs(apikey_textbox, source_textbox, target_source_textbox, qa_textbox, gpt_model_textbox, language_textbox, chatbot, history)
msg = r"""
# ASCII, table, code test
Overall, this program consists of the following files:
- `main.py`: This is the primary script of the program which uses NLP to analyze and summarize Python code.
- `model.py`: This file defines the `CodeModel` class that is used by `main.py` to model the code as graphs and performs operations on them.
- `parser.py`: This file contains custom parsing functions used by `model.py`.
- `test/`: This directory contains test scripts for `model.py` and `util.py`
- `util.py`: This file provides utility functions for the program such as getting the root directory of the project and reading configuration files.
`util.py` specifically has two functions:
| Function | Input | Output | Functionality |
|----------|-------|--------|---------------|
| `get_project_root()` | None | String containing the path of the parent directory of the script itself | Finds the path of the parent directory of the script itself |
| `get_config()` | None | Dictionary containing the contents of `config.yaml` and `config_secret.yaml`, merged together (with `config_secret.yaml` overwriting any keys with the same name in `config.yaml`) | Reads and merges two YAML configuration files (`config.yaml` and `config_secret.yaml`) located in the `config` directory in the parent directory of the script. Returns the resulting dictionary. |The above material has been written in C:\github\!CodeAnalyzerGPT\CodeAnalyzerGPT\analyzer_logs\chatGPT_report2023-04-07-14-11-55.md
The Hessian matrix is a square matrix that contains information about the second-order partial derivatives of a function. Suppose we have a function $f(x_1,x_2,...,x_n)$ which is twice continuously differentiable. Then the Hessian matrix $H(f)$ of $f$ is defined as the $n\times n$ matrix:
$$H(f) = \begin{bmatrix} \frac{\partial^2 f}{\partial x_1^2} & \frac{\partial^2 f}{\partial x_1 \partial x_2} & \cdots & \frac{\partial^2 f}{\partial x_1 \partial x_n} \ \frac{\partial^2 f}{\partial x_2 \partial x_1} & \frac{\partial^2 f}{\partial x_2^2} & \cdots & \frac{\partial^2 f}{\partial x_2 \partial x_n} \ \vdots & \vdots & \ddots & \vdots \ \frac{\partial^2 f}{\partial x_n \partial x_1} & \frac{\partial^2 f}{\partial x_n \partial x_2} & \cdots & \frac{\partial^2 f}{\partial x_n^2} \ \end{bmatrix}$$
Each element in the Hessian matrix is the second-order partial derivative of the function with respect to a pair of variables, as shown in the matrix above
Here's an example Python code using SymPy module to get the derivative of a mathematical function:
```
import sympy as sp
x = sp.Symbol('x')
f = input('Enter a mathematical function in terms of x: ')
expr = sp.sympify(f)
dfdx = sp.diff(expr, x)
print('The derivative of', f, 'is:', dfdx)
```
This code will prompt the user to enter a mathematical function in terms of x and then use the `diff()` function from SymPy to calculate its derivative with respect to x. The result will be printed on the screen.
# Non-ASCII test
程序整体功能:CodeAnalyzerGPT工程是一个用于自动化代码分析和评审的工具。它使用了OpenAI的GPT模型对代码进行分析,然后根据一定的规则和标准来评价代码的质量和合规性。
程序的构架包含以下几个模块:
1. CodeAnalyzerGPT: 主程序模块,包含了代码分析和评审的主要逻辑。
2. analyzer: 包含了代码分析程序的具体实现。
每个文件的功能可以总结为下表:
| 文件名 | 功能描述 |
| --- | --- |
| C:\github\!CodeAnalyzerGPT\CodeAnalyzerGPT\CodeAnalyzerGPT.py | 主程序入口,调用各种处理逻辑和输出结果 |
| C:\github\!CodeAnalyzerGPT\CodeAnalyzerGPT\analyzer\code_analyzer.py | 代码分析器,包含了对代码文本的解析和分析逻辑 |
| C:\github\!CodeAnalyzerGPT\CodeAnalyzerGPT\analyzer\code_segment.py | 对代码文本进行语句和表达式的分段处理 |
"""
g_inputs.chatbot.append(("test prompt query", msg))
yield g_inputs.chatbot, g_inputs.history, 'Normal'
@staticmethod
def test_asking(apikey_textbox, source_textbox, target_source_textbox, qa_textbox, gpt_model_textbox, language_textbox, chatbot, history):
g_inputs = GradioInputs(apikey_textbox, source_textbox, target_source_textbox, qa_textbox, gpt_model_textbox, language_textbox, chatbot, history)
msg = f"test_ask(`{qa_textbox}`)"
g_inputs.chatbot.append(("test prompt query", msg))
g_inputs.chatbot.append(("test prompt query 2", msg))
g_inputs.chatbot.append(("", "test empty message"))
g_inputs.chatbot.append(("test empty message 2", ""))
g_inputs.chatbot.append((None, "output msg, test no input msg"))
g_inputs.chatbot.append(("input msg, , test no output msg", None))
g_inputs.chatbot.append((None, '<span style="background-color: yellow; color: black; padding: 3px; border-radius: 8px;">WARN</span>'))
yield g_inputs.chatbot, g_inputs.history, 'Normal'
class DigesterService:
@staticmethod
def update_ui(chatbot_input, chatbot_output, status, target_md, chatbot, history, is_append=True):
"""
For instant chatbot_input+output
Not suitable if chatbot_output have delay / processing time
"""
if is_append:
chatbot.append((chatbot_input, chatbot_output))
else:
chatbot[-1] = (chatbot_input, chatbot_output)
history.append(chatbot_input)
history.append(chatbot_output)
yield chatbot, history, status, target_md
@staticmethod
def fetch_text(g_inputs: GradioInputs) -> (bool, BaseData):
"""Fetch text from source using everything2text4prompt. No OpenAI call here"""
converter = Everything2Text4Prompt(openai_api_key=g_inputs.apikey_textbox)
text_data, is_success, error_msg = converter.convert_text(g_inputs.source_textbox, g_inputs.source_target_textbox)
text_content = text_data.full_content
chatbot_input = f"Converting source to text for [{g_inputs.source_textbox}] {g_inputs.source_target_textbox} ..."
target_md = f"[{g_inputs.source_textbox}] {g_inputs.source_target_textbox}"
if is_success:
chatbot_output = f"""
Extracted text successfully:
{text_content}
"""
yield from DigesterService.update_ui(chatbot_input, chatbot_output, "Success", target_md, g_inputs.chatbot, g_inputs.history)
else:
chatbot_output = f"""
{provide_text_with_css("ERROR", "red")} Text extraction failed ({error_msg})
"""
yield from DigesterService.update_ui(chatbot_input, chatbot_output, "Error", target_md, g_inputs.chatbot, g_inputs.history)
return is_success, text_data
class PromptEngineeringStrategy:
@staticmethod
def execute_prompt_chain(g_inputs: GradioInputs, text_data: BaseData):
if g_inputs.source_textbox == 'youtube':
yield from PromptEngineeringStrategy.execute_prompt_chain_youtube(g_inputs, text_data)
elif g_inputs.source_textbox == 'podcast':
yield from PromptEngineeringStrategy.execute_prompt_chain_podcast(g_inputs, text_data)
@staticmethod
def execute_prompt_chain_youtube(g_inputs: GradioInputs, text_data: YoutubeData):
yield from YoutubeChain.execute_chain(g_inputs, text_data)
@staticmethod
def execute_prompt_chain_podcast(g_inputs: GradioInputs, text_data: PodcastData):
pass
class Chain:
@staticmethod
def execute_chain(g_inputs: GradioInputs, text_data: YoutubeData):
raise NotImplementedError
class YoutubeChain(Chain):
CLASSIFIER_PROMPT = Prompt(
prompt_prefix="""
[Youtube Video types]
N things: The youtube will shows N items that will be described in the video. For example "17 cheap purchases that save me time", "10 AMAZING Ways AutoGPT Is Being Used RIGHT NOW". Usually the title starts with a number.
Tutorials: how to do or make something in order to teach a skill or how to use a product or software
How-to and DIY: People show how to make or do something yourself, like crafts, recipes, projects, etc
Interview: Interviewee shows their standpoint with a topic.
Others: If the video type is not listed above
[TITLE]
{title}
[TRANSCRIPT]
""",
prompt_main="""
{transcript}
""",
prompt_suffix="""
[TASK]
From the above title, transcript, classify the youtube video type listed above.
Give the video type with JSON format like {"type": "N things"}, and exclude other text.
""")
TIMESTAMPED_SUMMARY_PROMPT = Prompt(
prompt_prefix="""
[TITLE]
{title}
[Transcript with timestamp]
""",
prompt_main="""
{transcript_with_ts}
""",
prompt_suffix="""
[TASK]
Convert this into youtube summary.
Combine and merge timestamp to for 2-5 minutes chunk. Maximum {word_limit} using noun for one line. Must not exceed the limit
Start with the timestamp followed by the summarized text for that chunk.
Must use language: {language}
Strictly follow the task rules especially for language and character limit
Maximum {word_limit} using noun for one line. Using noun, not sentence
Example format:
{first_timestamp} - This is the first part
{second_minute}:44 - This is the second part
{third_minute}:02 - This is the third part
""")
FINAL_SUMMARY_PROMPT = Prompt(
prompt_prefix="""
[TITLE]
{title}
[TRANSCRIPT]
""",
prompt_main="""
{transcript}
""",
prompt_suffix="""
[TASK]
Summarize the above points under 30 words. Step by step showing points for the main concepts.
Use markdown format.
Must use language: {language}
Strictly follow the task rules and use {language} language
{task_constraint}
The format is like:
Summary: (content of summary)
{format_constraint}
""")
FINAL_SUMMARY_TASK_CONSTRAINTS = {
"N things": """
Additionally, since it is a N things video, the summary should include the N items stated in the video.
""",
"Tutorials": """
Additionally, since it is a Tutorial video, provide step by step instructions for the tutorial.
""",
"Others": """
""",
}
FINAL_SUMMARY_FORMAT_CONSTRAINTS = {
"N things": """
Items mentioned in the video: (content of N things. Put different appropriate emoji in the beginning for each bullet point)
""",
"Tutorials": """
Instructions: (step by step instructions, up to five concise bullet points, less than 20 words. Put different appropriate emoji for each bullet point)
""",
"Others": """
Highlights:
- [Emoji] (content of highlights)
- [Emoji] (content of highlights)
- [Emoji] (content of highlights)
- [Emoji] (content of highlights)
- [Emoji] (content of highlights)
For highlight, up to five concise bullet points, less than {char_limit} for each bullet point. Put different appropriate emoji for each bullet point
Must use language {language} as output
""",
}
@staticmethod
def execute_chain(g_inputs: GradioInputs, text_data: YoutubeData):
text_content = text_data.full_content
timestamped_summary = yield from YoutubeChain.execute_timestamped_summary_chain(g_inputs, text_data)
video_type = yield from YoutubeChain.execute_classifer_chain(g_inputs, text_data)
final_summary = yield from YoutubeChain.execute_final_summary_chain(g_inputs, text_data, video_type)
full_summary = f"""
{provide_text_with_css("✅DONE", "green")}
🎞️Video: {text_data.title}
📝Timestamped summary
{timestamped_summary}
📝Summary
{final_summary}
{RESPONSE_SUFFIX}
"""
prompt_show_user = "Full summary"
g_inputs.chatbot[-1] = (prompt_show_user, full_summary)
g_inputs.history.append(prompt_show_user)
g_inputs.history.append(full_summary)
yield g_inputs.chatbot, g_inputs.history, "Success", f"[{g_inputs.source_textbox}] {g_inputs.source_target_textbox}"
@classmethod
def execute_classifer_chain(cls, g_inputs: GradioInputs, youtube_data: YoutubeData):
TRANSCRIPT_CHAR_LIMIT = 200 # Because classifer don't need to see the whole transcript
prompt = Prompt(cls.CLASSIFIER_PROMPT.prompt_prefix.format(title=youtube_data.title),
cls.CLASSIFIER_PROMPT.prompt_main.format(transcript=youtube_data.full_content[:TRANSCRIPT_CHAR_LIMIT]),
cls.CLASSIFIER_PROMPT.prompt_suffix
)
prompt_show_user = "Classify the video type for me"
response, len_prompts = yield from ChatGPTService.trigger_callgpt_pipeline(prompt, prompt_show_user, g_inputs)
try:
video_type = json.loads(response)['type']
if video_type not in cls.FINAL_SUMMARY_TASK_CONSTRAINTS.keys():
raise Exception(f"Video type is not valid: {video_type}. Use default: Others")
except Exception as e:
yield from ChatGPTService.say_using_ginputs(None, f"{provide_text_with_css('WARN', 'yellow')} {e}", "Success", g_inputs)
video_type = 'Others'
return video_type
@classmethod
def execute_timestamped_summary_chain(cls, g_inputs: GradioInputs, youtube_data: YoutubeData):
transcript_with_ts = ""
for entry in youtube_data.ts_transcript_list:
transcript_with_ts += f"{int(entry['start'] // 60)}:{int(entry['start'] % 60):02d} {entry['text']}\n"
def _get_char_limit(language: str):
"""If Chinese/Japan/Korean, use character limit. Otherwise, use word limit"""
if 'zh' in language or language in ["ja-JP", "ko-KR"]:
return f"15 {language} characters"
else:
return "8 words"
prompt = Prompt(cls.TIMESTAMPED_SUMMARY_PROMPT.prompt_prefix.format(title=youtube_data.title),
cls.TIMESTAMPED_SUMMARY_PROMPT.prompt_main.format(transcript_with_ts=transcript_with_ts),
cls.TIMESTAMPED_SUMMARY_PROMPT.prompt_suffix.replace("{language}", g_inputs.language_textbox)
.replace("{word_limit}", _get_char_limit(g_inputs.language_textbox))
)
prompt_show_user = "Generate the timestamped summary"
response, len_prompts = yield from ChatGPTService.trigger_callgpt_pipeline(prompt, prompt_show_user, g_inputs, is_timestamp=True)
return response
@classmethod
def execute_final_summary_chain(cls, g_inputs: GradioInputs, youtube_data: YoutubeData, video_type):
format_constraint = cls.FINAL_SUMMARY_FORMAT_CONSTRAINTS[video_type]
if video_type in cls.FINAL_SUMMARY_TASK_CONSTRAINTS.keys():
task_constraint = cls.FINAL_SUMMARY_TASK_CONSTRAINTS[video_type]
else:
task_constraint = ""
def _get_char_limit(language):
"""If Chinese/Japan/Korean, use character limit. Otherwise, use word limit"""
if 'zh' in language or language in ["ja-JP", "ko-KR"]:
return f"30 {language} characters"
else:
return "15 words"
prompt = Prompt(
cls.FINAL_SUMMARY_PROMPT.prompt_prefix.format(title=youtube_data.title),
cls.FINAL_SUMMARY_PROMPT.prompt_main.format(transcript=youtube_data.full_content),
cls.FINAL_SUMMARY_PROMPT.prompt_suffix.format(task_constraint=task_constraint,
format_constraint=format_constraint.replace("{char_limit}", _get_char_limit(g_inputs.language_textbox)).replace("{language}", g_inputs.language_textbox),
language=g_inputs.language_textbox)
)
prompt_show_user = "Generate the final summary"
response, len_prompts = yield from ChatGPTService.trigger_callgpt_pipeline(prompt, prompt_show_user, g_inputs)
if len_prompts > 1:
# Give summary of summaries if the video is long
prompt = Prompt(
cls.FINAL_SUMMARY_PROMPT.prompt_prefix.format(title=youtube_data.title),
cls.FINAL_SUMMARY_PROMPT.prompt_main.format(transcript=response),
cls.FINAL_SUMMARY_PROMPT.prompt_suffix.format(task_constraint=task_constraint, format_constraint=format_constraint, language=g_inputs.language_textbox)
)
prompt_show_user = "Since the video is long, generating the final summary of the summaries"
response, len_prompts = yield from ChatGPTService.trigger_callgpt_pipeline(prompt, prompt_show_user, g_inputs)
return response
if __name__ == '__main__':
GPT_MODEL = "gpt-3.5-turbo-16k"
API_KEY = ""
input_1 = """Give me 2 ideas for the summer"""
# input_1 = """Explain more on the first idea"""
response_1 = ChatGPTService.single_rest_call_chatgpt(API_KEY, input_1, GPT_MODEL)
print(response_1)
input_2 = """
For the first idea, suggest some step by step planning for me
"""
response_2 = ChatGPTService.single_rest_call_chatgpt(API_KEY, input_2, GPT_MODEL, history=[input_1, response_1])
print(response_2)
|