PatentClaimsExtractionFullText

Paused

App Files Files Community

PatentClaimsExtractionFullText / app.py

Tonic

Update app.py

a2fe734 almost 2 years ago

raw

history blame

5.82 kB

	import streamlit as st
	import gradio as gr
	import numpy as np
	from audiorecorder import audiorecorder
	import whisper
	import os
	import streamlit.components.v1 as components
	import tempfile
	import io
	import requests
	import json

	def chunk_text(text, chunk_size=2000):
	chunks = []
	start = 0
	while start < len(text):
	end = start + chunk_size
	chunk = text[start:end]
	chunks.append(chunk)
	start = end
	return chunks

	# Streamlit Session State
	if 'learning_objectives' not in st.session_state:
	st.session_state.learning_objectives = ""

	# Streamlit User Input Form
	st.title("Patent Claims Extraction")

	# API Key Input
	api_key = st.text_input("Enter your OpenAI API Key:", type="password")

	# Camera Input
	image = st.camera_input("Camera input")

	with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tf:
	if image:
	tf.write(image.read())
	temp_image_path = tf.name
	else:
	temp_image_path = None

	# Audio Recording
	audio = st.audio_recorder("Click to record audio", "Click to stop recording")

	submit_button = st.button("Use this audio")

	if submit_button:
	model = whisper.load_model("base")
	result = model.transcribe(audio)
	st.info("Transcribing...")
	st.success("Transcription complete")
	transcript = result['text']

	with st.expander("See transcript"):
	st.markdown(transcript)

	# Model Selection Dropdown
	model_choice = st.selectbox(
	"Select the model you want to use:",
	["gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-3.5-turbo", "gpt-4-0314", "gpt-4-0613", "gpt-4"]
	)

	# Context, Subject, and Level
	context = "You are a patent claims identifier and extractor. You will freeform text, identify any claims contained therein that may be patentable. You identify, extract, print such claims, briefly explain why each claim is patentable."
	userinput = st.text_input("Input Text:", "Freeform text here!")

	# Initialize OpenAI API
	if api_key:
	openai.api_key = api_key

	# Learning Objectives
	st.write("### Patentable Claims:")
	# Initialize autogenerated objectives
	claims_extraction = ""
	# Initialize status placeholder
	learning_status_placeholder = st.empty()
	disable_button_bool = False

	if userinput and api_key and st.button("Extract Claims", key="claims_extraction", disabled=disable_button_bool):
	# Split the user input into chunks
	input_chunks = chunk_text(userinput)

	# Initialize a variable to store the extracted claims
	all_extracted_claims = ""

	for chunk in input_chunks:
	# Display status message for the current chunk
	learning_status_placeholder.text(f"Extracting Patentable Claims for chunk {input_chunks.index(chunk) + 1}...")

	# API call to generate objectives for the current chunk
	claims_extraction_response = openai.ChatCompletion.create(
	model=model_choice,
	messages=[
	{"role": "user", "content": f"Extract any patentable claims from the following: \n {chunk}. \n Extract each claim. Briefly explain why you extracted this word phrase. Exclude any additional commentary."}
	]
	)

	# Extract the generated objectives from the API response
	claims_extraction = claims_extraction_response['choices'][0]['message']['content']

	# Append the extracted claims from the current chunk to the overall results
	all_extracted_claims += claims_extraction.strip()

	# Save the generated objectives to session state
	st.session_state.claims_extraction = all_extracted_claims

	# Display generated objectives for all chunks
	learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}")

	# Claims Extraction
	if st.button("Extract Claims") and api_key and transcript:
	# You should have 'transcript' available at this point
	# Ensure 'transcript' is defined before this block.

	# Split the user input into chunks
	input_chunks = chunk_text(transcript) # Use 'transcript' instead of 'userinput'

	# Initialize a variable to store the extracted claims
	all_extracted_claims = ""

	for chunk in input_chunks:
	# Display status message for the current chunk
	learning_status_placeholder.text(f"Extracting Patentable Claims for chunk {input_chunks.index(chunk) + 1}...")

	# API call to generate objectives for the current chunk
	claims_extraction_response = openai.ChatCompletion.create(
	model=model_choice,
	messages=[
	{"role": "user", "content": f"Extract any patentable claims from the following: \n {chunk}. \n Extract each claim. Briefly explain why you extracted this word phrase. Exclude any additional commentary."}
	]
	)

	# Extract the generated objectives from the API response
	claims_extraction = claims_extraction_response['choices'][0]['message']['content']

	# Append the extracted claims from the current chunk to the overall results
	all_extracted_claims += claims_extraction.strip()

	# Save the generated objectives to session state
	st.session_state.claims_extraction = all_extracted_claims

	# Display generated objectives for all chunks
	learning_status_placeholder.text(f"Patentable Claims Extracted!\n{all_extracted_claims.strip()}")

	# Display status message
	lesson_plan = st.text("Extracting Patentable Claims...")

	# Extract and display
	assistant_reply = claims_extraction_response['choices'][0]['message']['content']
	claims_extraction = st.text(assistant_reply.strip())

	# Citation
	st.markdown("<sub>This app was created by [Taylor Ennen](https://github.com/taylor-ennen/GPT-Streamlit-MVP) & [Tonic](https://huggingface.co/tonic)</sub>", unsafe_allow_html=True)