Spaces:

Deaksh
/

Persona-postgenerator

Sleeping

App Files Files Community

Persona-postgenerator / few_shot.py

Deaksh

Upload 14 files

6d0c6c2 verified 8 months ago

raw

history blame

1.92 kB

	import pandas as pd
	import json
	import os


	#class FewShotPosts:
	# def __init__(self, file_path="data/processed_posts.json"):
	# self.df = None
	# self.unique_tags = None
	# self.load_posts(file_path)


	class FewShotPosts:
	def __init__(self, persona_name):
	"""Dynamically load JSON based on the selected persona."""
	self.df = None
	self.unique_tags = None
	self.file_path = f"data/processed_{persona_name.lower()}_posts.json"

	if os.path.exists(self.file_path): # Check if JSON exists
	self.load_posts(self.file_path)
	else:
	raise FileNotFoundError(f"Processed JSON file not found for persona: {persona_name}")


	def load_posts(self, file_path):
	with open(file_path, encoding="utf-8") as f:
	posts = json.load(f)
	self.df = pd.json_normalize(posts)
	self.df['length'] = self.df['line_count'].apply(self.categorize_length)
	# collect unique tags
	all_tags = self.df['tags'].apply(lambda x: x).sum()
	self.unique_tags = list(set(all_tags))

	def get_filtered_posts(self, length, language, tag):
	df_filtered = self.df[
	(self.df['tags'].apply(lambda tags: tag in tags)) & # Tags contain 'Influencer'
	(self.df['language'] == language) & # Language is 'English'
	(self.df['length'] == length) # Line count is less than 5
	]
	return df_filtered.to_dict(orient='records')

	def categorize_length(self, line_count):
	if line_count < 5:
	return "Short"
	elif 5 <= line_count <= 10:
	return "Medium"
	else:
	return "Long"

	def get_tags(self):
	return self.unique_tags


	#if __name__ == "__main__":
	# fs = FewShotPosts()
	# # print(fs.get_tags())
	# posts = fs.get_filtered_posts("Short","English","Economy")
	#print(posts)