File size: 5,519 Bytes
7e54f17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28e7059
 
7e54f17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9cac175
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from setup import *
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from pydantic import BaseModel,ValidationError
from typing import List
from langchain_community.tools import TavilySearchResults


keyword_search = TavilySearchResults(
    max_results=3,
    search_depth="advanced",
    include_answer=True,
    include_raw_content=True,
    include_images=True,
)


class UseCaseKeywords(BaseModel):
    use_case: str
    description: str
    keyword: str

    def as_dict(self) -> dict:
        """Convert the instance to a dictionary using model_dump."""
        return self.model_dump()

class KeywordGenerationResponse(BaseModel):
    data: List[UseCaseKeywords]

    def as_list_of_dicts(self) -> List[dict]:
        """Convert the list of UseCaseKeywords to a list of dictionaries."""
        return [item.as_dict() for item in self.data]



def keyword_generation(report):

  query_generation_sys_prompt = SystemMessage(content='''You are an expert in creating precise and relevant keyword queries to search for datasets. Your task is to generate a keyword query for each use case provided below. These queries should be optimized for searching datasets on platforms such as GitHub, Kaggle, and Hugging Face.  

  **Instructions:**  
  1. Extract the key concepts from the use case (e.g., objectives, AI application, and domain).  
  2. Formulate a concise, descriptive query using relevant terms and synonyms.  
  3. Include terms related to data types (e.g., "customer data," "chat logs," "shopping behavior"), AI techniques (e.g., "machine learning," "recommendation systems"), and target domain (e.g., "e-commerce," "retail").  
  4. Create a output dictionary with the use case title as the key and the keyword query as the value.

  **Use Cases: Examples** 
  ## Use Case 1: Personalized Shopping Experiences with GenAI  
  **Objective/Use Case:** Create tailored shopping experiences for individual customers based on their browsing history, purchasing behavior, and preferences.  
  **AI Application:** Implement machine learning algorithms that analyze customer data to generate personalized offers, marketing communications, and product recommendations.  
  **Cross-Functional Benefit:**  
  - **Marketing:** Increases customer satisfaction and loyalty through targeted marketing efforts.  
  - **Sales:** Boosts sales by offering relevant products to customers.  
  - **Customer Service:** Enhances customer experience through personalized support.  

  ## Use Case 2: AI-Powered Chatbots for Customer Service  
  **Objective/Use Case:** Improve in-store customer service by providing instant assistance and directing customers to relevant products.  
  **AI Application:** Develop GenAI-powered chatbots that analyze customer queries and provide accurate responses, suggesting related products and services.  
  **Cross-Functional Benefit:**  
  - **Customer Service:** Reduces wait times and improves customer satisfaction.  
  - **Sales:** Increases sales by suggesting relevant products to customers.  
  - **Operations:** Enhances employee productivity by automating routine tasks.  

  Example output:
    [{'use_case' : "Personalized Shopping Experiences with GenAI" , 
      'description':"AI-driven personalization enhances customer satisfaction through tailored offers, recommendations, and marketing based on individual preferences."                                
    'keyword': "e-commerce personalized shopping data customer behavior recommendation system offers dataset"},
    {'use_case': "AI-Powered Chatbots for Customer Service" , 
      'description': AI chatbots provide instant, accurate assistance, improving customer service, increasing sales, and boosting operational efficiency.                                
    'keyword': "customer service chatbot dataset customer queries retail e-commerce AI automation"}]''')

  # # Example usage (you will use llm to generate the output)
  Keyword_generation_llm = llm.with_structured_output(KeywordGenerationResponse)

  # Your report as input (ensure that this variable is properly formatted and available)
  report_msg = HumanMessage(content=f'The usecases are as follows {report}')

  # Invoke the LLM and get the response
  output_dict = Keyword_generation_llm.invoke([query_generation_sys_prompt, report_msg])

  # Convert the response to a list of dictionaries
  output_list = output_dict.as_list_of_dicts()

  return output_list



def dataset_search(output_list):
  for usecase_dict in output_list:
    query = usecase_dict['keyword']
    query_format = f'{query} AND dataset OR implementation guide site:(kaggle.com OR github.com OR huggingface.co OR paperswithcode.com OR arxiv.org OR research.google.com)'
    links = keyword_search.invoke({'query': query_format.format(query)})
    usecase_dict['links'] = links
  return output_list



def grouping_urls(output_list):
  for dict_item in output_list:
    urls_list = []
    for ele in dict_item['links']:
      urls_list.append(ele['url'])
    dict_item['urls_list'] = urls_list
  return output_list



def delete_columns(output_list):
  # Specify the keys you want to include
  keys_to_del = ['links', 'keyword']

  for dict_item in output_list:
    for key in keys_to_del:
      dict_item.pop(key, None)
  return output_list


def feasibility_agent_func(report):
  dict_list = keyword_generation(report)
  dict_links = dataset_search(dict_list)
  urls_dict = grouping_urls(dict_links)
  pd_dict = delete_columns(urls_dict)
  return pd_dict