Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from openai import AzureOpenAI, OpenAIError | |
| import csv | |
| def get_first_column(csv_filepath): | |
| """ | |
| Reads a CSV file with a header and returns a list containing only the | |
| values from the first column. | |
| Args: | |
| csv_filepath: The path to the CSV file. | |
| Returns: | |
| A list of strings, where each string is a value from the first | |
| column of the CSV file. Returns an empty list if there's an error | |
| opening or reading the file, or if the file has no rows after the header. | |
| Prints an error message to the console in case of file errors. | |
| """ | |
| first_column_values = [] | |
| try: | |
| with open(csv_filepath, 'r', newline='', encoding='utf-8') as csvfile: # Handle potential encoding issues | |
| reader = csv.reader(csvfile) | |
| next(reader, None) # Skip the header row (if it exists) | |
| for row in reader: | |
| if row: # Check for empty rows | |
| first_column_values.append(row[0]) | |
| except FileNotFoundError: | |
| print(f"Error: File not found at {csv_filepath}") | |
| except Exception as e: # Catch other potential errors (e.g., UnicodeDecodeError) | |
| print(f"An error occurred: {e}") | |
| return first_column_values | |
| def add_text_to_csv(csv_filepath, text_to_add, index=0): | |
| """ | |
| Adds text to a single-column CSV file (UTF-8 encoding). | |
| Args: | |
| csv_filepath: The path to the CSV file. | |
| text_to_add: The text to append to the CSV file (one value per new row). | |
| """ | |
| try: | |
| with open(csv_filepath, 'a', newline='', encoding='utf-8') as csvfile: # 'a' for append mode | |
| writer = csv.writer(csvfile) | |
| # Check if file is empty to determine if header needs to be written | |
| csvfile.seek(0, 2) # Go to end of file | |
| is_empty = csvfile.tell() == 0 | |
| if is_empty: | |
| writer.writerow(["#", "Text"]) # Write header if file is empty | |
| if isinstance(text_to_add, list): # Check if text_to_add is a list | |
| for text_item in text_to_add: | |
| writer.writerow([index, text_item]) # Write text_item as a single-element row | |
| else: # If not a list, assume it's a single string | |
| writer.writerow([index, text_to_add]) # Write text_to_add as a single-element row | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| load_dotenv() | |
| AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY") | |
| AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT") | |
| AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION") | |
| azure_client = AzureOpenAI( | |
| azure_endpoint="https://quoc-nguyen.openai.azure.com/", | |
| api_key=AZURE_OPENAI_API_KEY, | |
| api_version="2024-05-01-preview", | |
| ) | |
| deplopment_name = "gpt-4o" # "o1-mini" # or "gpt-4o" | |
| TEXT_PROMPT = """ | |
| Paraphrase the following news, only output the paraphrased text: | |
| """ | |
| text = get_first_column("data/bbc_news.csv") | |
| count = 0 | |
| for index, news in enumerate(text): | |
| if count > 1000: | |
| break | |
| prompt = TEXT_PROMPT + news | |
| print(f"{index:5}:\t{news[:50]}") | |
| #print(f"{index:5}:\t{prompt}") | |
| try: | |
| response = azure_client.chat.completions.create( | |
| model=deplopment_name, # model = "deployment_name". | |
| messages=[ | |
| # {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| # max_tokens=512, | |
| # temperature=0, | |
| ) | |
| except OpenAIError as e: | |
| print(f"Error interacting with OpenAI API: {e}") | |
| continue | |
| count += 1 | |
| paraphrased_news = response.choices[0].message.content | |
| add_text_to_csv("data/bbc_news_4o.csv", paraphrased_news, count) | |