LuxyR commited on
Commit
a0fbe53
·
verified ·
1 Parent(s): 93b0380

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -105
app.py CHANGED
@@ -1,134 +1,101 @@
1
  import gradio as gr
2
- import re
3
- import torch
4
  import requests
5
- import random
6
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
7
 
8
- # TMDb API key
9
  TMDB_API_KEY = "364ab334807e87805d326b6dc0f1905c"
10
-
11
- # Mapeamento dos gêneros TMDb
12
  GENRE_MAP = {
13
- "Action": 28, "Adventure": 12, "Animation": 16, "Comedy": 35, "Crime": 80,
14
- "Documentary": 99, "Drama": 18, "Family": 10751, "Fantasy": 14, "History": 36,
15
- "Horror": 27, "Music": 10402, "Mystery": 9648, "Romance": 10749, "Sci-Fi": 878,
16
- "TV Movie": 10770, "Thriller": 53, "War": 10752, "Western": 37
 
17
  }
18
 
19
- # Modelos
 
 
 
20
 
21
- gpt4_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b")
22
- gpt4_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-rw-1b")
23
- gpt4_pipe = pipeline("text-generation", model=gpt4_model, tokenizer=gpt4_tokenizer, device=0 if torch.cuda.is_available() else -1)
24
-
25
- claude_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
26
- claude_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
27
  claude_pipe = pipeline("text2text-generation", model=claude_model, tokenizer=claude_tokenizer)
28
 
29
- arb_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
30
- arb_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
31
- arb_pipe = pipeline("text2text-generation", model=arb_model, tokenizer=arb_tokenizer)
32
-
33
- # Extrai gêneros reconhecidos
34
- GENRE_LIST = list(GENRE_MAP.keys())
35
- GENRE_PATTERN = r"\\b(" + "|".join(GENRE_LIST) + r")\\b"
36
 
37
  def extract_genres(output):
38
- genres = re.findall(GENRE_PATTERN, output, flags=re.IGNORECASE)
39
- return sorted(set([g.title() for g in genres if g.title() in GENRE_MAP]))
40
 
41
- # Classificadores de gênero
42
-
43
- def gpt4_genre_classifier(text):
44
  prompt = (
45
- "List only the relevant genres present in the following game or movie description. Do not list all possible genres.\n"
 
46
  f"Description: \"{text}\"\nGenres:"
47
  )
48
- output = gpt4_pipe(prompt, max_new_tokens=20)[0]['generated_text']
49
- genres = extract_genres(output)
50
- return genres
51
-
52
- def claude_genre_classifier(text):
53
- prompt = (
54
- "List only the relevant genres present in the following game or movie description. Do not list all possible genres.\n"
55
- f"Description: \"{text}\"\nGenres:"
56
- )
57
- output = claude_pipe(prompt, max_new_tokens=20)[0]['generated_text']
58
- genres = extract_genres(output)
59
- return genres
60
-
61
- # Busca um filme aleatório com ao menos 1 gênero identificado
62
-
63
- def recommend_movie(genres, used_ids):
64
- random.shuffle(genres)
65
- for genre in genres:
66
- genre_id = GENRE_MAP.get(genre)
67
- if not genre_id:
68
- continue
69
- url = f"https://api.themoviedb.org/3/discover/movie?api_key={TMDB_API_KEY}&with_genres={genre_id}&sort_by=vote_average.desc&vote_count.gte=100"
70
- resp = requests.get(url)
71
- if resp.status_code == 200:
72
- results = resp.json().get("results", [])
73
- for movie in results:
74
- if movie['id'] not in used_ids:
75
- used_ids.add(movie['id'])
76
- return {
77
- "title": movie['title'],
78
- "rating": movie['vote_average'],
79
- "genre": genre
80
- }
81
  return None
82
 
83
- # Árbitro
84
-
85
- def llama_judge(text, gpt_genres, claude_genres):
86
  prompt = (
87
- f"You are an expert genre classifier.\n"
88
- f"Description: \"{text}\"\n"
89
- f"GPT-4 genres: {', '.join(gpt_genres)}\n"
90
- f"Claude genres: {', '.join(claude_genres)}\n"
91
- "Who identified the genres more accurately? Reply with only 'GPT-4', 'Claude', or 'Tie'."
92
  )
93
- output = arb_pipe(prompt, max_new_tokens=10)[0]['generated_text']
94
- if "gpt" in output.lower():
95
- return "GPT-4"
96
- elif "claude" in output.lower():
97
- return "Claude"
98
- return "Tie"
99
-
100
- # Função principal
101
-
102
- def process_input(text):
103
- gpt_genres = gpt4_genre_classifier(text)
104
- claude_genres = claude_genre_classifier(text)
105
-
106
- used_ids = set()
107
- gpt_movie = recommend_movie(gpt_genres, used_ids)
108
- claude_movie = recommend_movie(claude_genres, used_ids)
109
 
110
  if not gpt_movie or not claude_movie:
111
  return "⚠️ Não foi possível encontrar recomendações de filmes para os gêneros identificados."
112
 
113
- winner = llama_judge(text, gpt_genres, claude_genres)
114
- best_movie = gpt_movie if gpt_movie['rating'] >= claude_movie['rating'] else claude_movie
115
 
116
  return (
117
- f"GPT-4 Genres: {', '.join(gpt_genres)}\n"
118
- f"Claude Genres: {', '.join(claude_genres)}\n\n"
119
- f"GPT-4 Movie: {gpt_movie['title']} ({gpt_movie['rating']})\n"
120
- f"Claude Movie: {claude_movie['title']} ({claude_movie['rating']})\n\n"
121
- f"🏆 Winner: {winner}\n"
122
- f"🎬 Best Rated Movie: {best_movie['title']}"
123
  )
124
 
125
- # Interface Gradio
126
- iface = gr.Interface(
127
- fn=process_input,
128
- inputs=gr.Textbox(lines=3, label="Enter game or movie description in English"),
129
  outputs="text",
130
- title="AI Genre Duel + Movie Recommendation",
131
- description="Dois modelos de IA identificam os gêneros de uma descrição e recomendam um filme com base nisso. Um juiz decide o melhor classificador e destaca o filme com maior nota."
132
- )
133
-
134
- iface.launch(share=True)
 
1
  import gradio as gr
 
 
2
  import requests
3
+ import re
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
5
 
 
6
  TMDB_API_KEY = "364ab334807e87805d326b6dc0f1905c"
 
 
7
  GENRE_MAP = {
8
+ "Action": 28, "Adventure": 12, "Animation": 16, "Comedy": 35,
9
+ "Crime": 80, "Documentary": 99, "Drama": 18, "Family": 10751,
10
+ "Fantasy": 14, "History": 36, "Horror": 27, "Music": 10402,
11
+ "Mystery": 9648, "Romance": 10749, "Sci-Fi": 878, "TV Movie": 10770,
12
+ "Thriller": 53, "War": 10752, "Western": 37
13
  }
14
 
15
+ # Modelos pequenos para Hugging Face
16
+ gpt_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
17
+ gpt_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
18
+ gpt_pipe = pipeline("text2text-generation", model=gpt_model, tokenizer=gpt_tokenizer)
19
 
20
+ claude_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
21
+ claude_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
 
 
 
 
22
  claude_pipe = pipeline("text2text-generation", model=claude_model, tokenizer=claude_tokenizer)
23
 
24
+ arb_pipe = claude_pipe # usa o mesmo modelo para o juiz
 
 
 
 
 
 
25
 
26
  def extract_genres(output):
27
+ allowed_genres = list(GENRE_MAP.keys())
28
+ return sorted(set(re.findall(r'\b(?:' + '|'.join(allowed_genres) + r')\b', output, flags=re.IGNORECASE)))
29
 
30
+ def classify_genres(pipe, text):
 
 
31
  prompt = (
32
+ "List the genres (only) for this movie or game description.\n"
33
+ "Valid genres: " + ", ".join(GENRE_MAP.keys()) + ".\n"
34
  f"Description: \"{text}\"\nGenres:"
35
  )
36
+ output = pipe(prompt, max_new_tokens=30)[0]["generated_text"]
37
+ return extract_genres(output)
38
+
39
+ def get_movie_by_genres(genres, exclude_titles=[]):
40
+ genre_ids = [GENRE_MAP[g] for g in genres if g in GENRE_MAP]
41
+ if not genre_ids:
42
+ return None
43
+ url = f"https://api.themoviedb.org/3/discover/movie"
44
+ params = {
45
+ "api_key": TMDB_API_KEY,
46
+ "with_genres": ",".join(map(str, genre_ids)),
47
+ "sort_by": "vote_average.desc",
48
+ "vote_count.gte": 50,
49
+ "language": "en-US"
50
+ }
51
+ response = requests.get(url, params=params).json()
52
+ movies = response.get("results", [])
53
+ for movie in movies:
54
+ if movie["title"] not in exclude_titles:
55
+ return {"title": movie["title"], "rating": movie["vote_average"]}
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  return None
57
 
58
+ def judge_and_decide(text, gpt_genres, claude_genres, gpt_movie, claude_movie):
 
 
59
  prompt = (
60
+ f"You are a genre classification judge.\n"
61
+ f"Description: {text}\n"
62
+ f"GPT Genres: {', '.join(gpt_genres)}\nClaude Genres: {', '.join(claude_genres)}\n"
63
+ f"Which list better matches the description? Reply with GPT, Claude or Tie."
 
64
  )
65
+ decision = arb_pipe(prompt, max_new_tokens=10)[0]["generated_text"]
66
+ decision = "Claude" if "Claude" in decision else "GPT" if "GPT" in decision else "Tie"
67
+
68
+ best_movie = max([gpt_movie, claude_movie], key=lambda m: m["rating"])
69
+ return {
70
+ "decision": decision,
71
+ "winner_movie": best_movie
72
+ }
73
+
74
+ def main(description):
75
+ gpt_genres = classify_genres(gpt_pipe, description)
76
+ claude_genres = classify_genres(claude_pipe, description)
77
+
78
+ gpt_movie = get_movie_by_genres(gpt_genres)
79
+ claude_movie = get_movie_by_genres(claude_genres, exclude_titles=[gpt_movie["title"]] if gpt_movie else [])
 
80
 
81
  if not gpt_movie or not claude_movie:
82
  return "⚠️ Não foi possível encontrar recomendações de filmes para os gêneros identificados."
83
 
84
+ result = judge_and_decide(description, gpt_genres, claude_genres, gpt_movie, claude_movie)
 
85
 
86
  return (
87
+ f"🎯 GPT-4 Genres: {', '.join(gpt_genres)}\n"
88
+ f"🎯 Claude Genres: {', '.join(claude_genres)}\n\n"
89
+ f"🎬 GPT Movie: {gpt_movie['title']} ({gpt_movie['rating']})\n"
90
+ f"🎬 Claude Movie: {claude_movie['title']} ({claude_movie['rating']})\n\n"
91
+ f"🏆 Winner: {result['decision']}\n"
92
+ f"🎖️ Best Rated Movie: {result['winner_movie']['title']} ({result['winner_movie']['rating']})"
93
  )
94
 
95
+ gr.Interface(
96
+ fn=main,
97
+ inputs=gr.Textbox(lines=3, placeholder="Enter a movie/game description in English"),
 
98
  outputs="text",
99
+ title="🎬 AI Genre Duel + Movie Recommendation",
100
+ description="Dois modelos AI classificam o gênero de uma descrição e recomendam filmes. O juiz escolhe o vencedor com base nos gêneros e na melhor nota de filme."
101
+ ).launch()