Update app.py
Browse files
app.py
CHANGED
@@ -1,134 +1,101 @@
|
|
1 |
import gradio as gr
|
2 |
-
import re
|
3 |
-
import torch
|
4 |
import requests
|
5 |
-
import
|
6 |
-
from transformers import pipeline, AutoTokenizer,
|
7 |
|
8 |
-
# TMDb API key
|
9 |
TMDB_API_KEY = "364ab334807e87805d326b6dc0f1905c"
|
10 |
-
|
11 |
-
# Mapeamento dos gêneros TMDb
|
12 |
GENRE_MAP = {
|
13 |
-
"Action": 28, "Adventure": 12, "Animation": 16, "Comedy": 35,
|
14 |
-
"Documentary": 99, "Drama": 18, "Family": 10751,
|
15 |
-
"
|
16 |
-
"
|
|
|
17 |
}
|
18 |
|
19 |
-
# Modelos
|
|
|
|
|
|
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
gpt4_pipe = pipeline("text-generation", model=gpt4_model, tokenizer=gpt4_tokenizer, device=0 if torch.cuda.is_available() else -1)
|
24 |
-
|
25 |
-
claude_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
|
26 |
-
claude_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
|
27 |
claude_pipe = pipeline("text2text-generation", model=claude_model, tokenizer=claude_tokenizer)
|
28 |
|
29 |
-
|
30 |
-
arb_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
|
31 |
-
arb_pipe = pipeline("text2text-generation", model=arb_model, tokenizer=arb_tokenizer)
|
32 |
-
|
33 |
-
# Extrai gêneros reconhecidos
|
34 |
-
GENRE_LIST = list(GENRE_MAP.keys())
|
35 |
-
GENRE_PATTERN = r"\\b(" + "|".join(GENRE_LIST) + r")\\b"
|
36 |
|
37 |
def extract_genres(output):
|
38 |
-
|
39 |
-
return sorted(set(
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
def gpt4_genre_classifier(text):
|
44 |
prompt = (
|
45 |
-
"List
|
|
|
46 |
f"Description: \"{text}\"\nGenres:"
|
47 |
)
|
48 |
-
output =
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
for
|
66 |
-
|
67 |
-
|
68 |
-
continue
|
69 |
-
url = f"https://api.themoviedb.org/3/discover/movie?api_key={TMDB_API_KEY}&with_genres={genre_id}&sort_by=vote_average.desc&vote_count.gte=100"
|
70 |
-
resp = requests.get(url)
|
71 |
-
if resp.status_code == 200:
|
72 |
-
results = resp.json().get("results", [])
|
73 |
-
for movie in results:
|
74 |
-
if movie['id'] not in used_ids:
|
75 |
-
used_ids.add(movie['id'])
|
76 |
-
return {
|
77 |
-
"title": movie['title'],
|
78 |
-
"rating": movie['vote_average'],
|
79 |
-
"genre": genre
|
80 |
-
}
|
81 |
return None
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
def llama_judge(text, gpt_genres, claude_genres):
|
86 |
prompt = (
|
87 |
-
f"You are
|
88 |
-
f"Description:
|
89 |
-
f"GPT
|
90 |
-
f"
|
91 |
-
"Who identified the genres more accurately? Reply with only 'GPT-4', 'Claude', or 'Tie'."
|
92 |
)
|
93 |
-
|
94 |
-
if "
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
def
|
103 |
-
gpt_genres =
|
104 |
-
claude_genres =
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
claude_movie = recommend_movie(claude_genres, used_ids)
|
109 |
|
110 |
if not gpt_movie or not claude_movie:
|
111 |
return "⚠️ Não foi possível encontrar recomendações de filmes para os gêneros identificados."
|
112 |
|
113 |
-
|
114 |
-
best_movie = gpt_movie if gpt_movie['rating'] >= claude_movie['rating'] else claude_movie
|
115 |
|
116 |
return (
|
117 |
-
f"GPT-4 Genres: {', '.join(gpt_genres)}\n"
|
118 |
-
f"Claude Genres: {', '.join(claude_genres)}\n\n"
|
119 |
-
f"GPT
|
120 |
-
f"Claude Movie: {claude_movie['title']} ({claude_movie['rating']})\n\n"
|
121 |
-
f"🏆 Winner: {
|
122 |
-
f"
|
123 |
)
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
inputs=gr.Textbox(lines=3, label="Enter game or movie description in English"),
|
129 |
outputs="text",
|
130 |
-
title="AI Genre Duel + Movie Recommendation",
|
131 |
-
description="Dois modelos
|
132 |
-
)
|
133 |
-
|
134 |
-
iface.launch(share=True)
|
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
import requests
|
3 |
+
import re
|
4 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
|
5 |
|
|
|
6 |
TMDB_API_KEY = "364ab334807e87805d326b6dc0f1905c"
|
|
|
|
|
7 |
GENRE_MAP = {
|
8 |
+
"Action": 28, "Adventure": 12, "Animation": 16, "Comedy": 35,
|
9 |
+
"Crime": 80, "Documentary": 99, "Drama": 18, "Family": 10751,
|
10 |
+
"Fantasy": 14, "History": 36, "Horror": 27, "Music": 10402,
|
11 |
+
"Mystery": 9648, "Romance": 10749, "Sci-Fi": 878, "TV Movie": 10770,
|
12 |
+
"Thriller": 53, "War": 10752, "Western": 37
|
13 |
}
|
14 |
|
15 |
+
# Modelos pequenos para Hugging Face
|
16 |
+
gpt_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
|
17 |
+
gpt_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
|
18 |
+
gpt_pipe = pipeline("text2text-generation", model=gpt_model, tokenizer=gpt_tokenizer)
|
19 |
|
20 |
+
claude_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
|
21 |
+
claude_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
|
|
|
|
|
|
|
|
|
22 |
claude_pipe = pipeline("text2text-generation", model=claude_model, tokenizer=claude_tokenizer)
|
23 |
|
24 |
+
arb_pipe = claude_pipe # usa o mesmo modelo para o juiz
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
def extract_genres(output):
|
27 |
+
allowed_genres = list(GENRE_MAP.keys())
|
28 |
+
return sorted(set(re.findall(r'\b(?:' + '|'.join(allowed_genres) + r')\b', output, flags=re.IGNORECASE)))
|
29 |
|
30 |
+
def classify_genres(pipe, text):
|
|
|
|
|
31 |
prompt = (
|
32 |
+
"List the genres (only) for this movie or game description.\n"
|
33 |
+
"Valid genres: " + ", ".join(GENRE_MAP.keys()) + ".\n"
|
34 |
f"Description: \"{text}\"\nGenres:"
|
35 |
)
|
36 |
+
output = pipe(prompt, max_new_tokens=30)[0]["generated_text"]
|
37 |
+
return extract_genres(output)
|
38 |
+
|
39 |
+
def get_movie_by_genres(genres, exclude_titles=[]):
|
40 |
+
genre_ids = [GENRE_MAP[g] for g in genres if g in GENRE_MAP]
|
41 |
+
if not genre_ids:
|
42 |
+
return None
|
43 |
+
url = f"https://api.themoviedb.org/3/discover/movie"
|
44 |
+
params = {
|
45 |
+
"api_key": TMDB_API_KEY,
|
46 |
+
"with_genres": ",".join(map(str, genre_ids)),
|
47 |
+
"sort_by": "vote_average.desc",
|
48 |
+
"vote_count.gte": 50,
|
49 |
+
"language": "en-US"
|
50 |
+
}
|
51 |
+
response = requests.get(url, params=params).json()
|
52 |
+
movies = response.get("results", [])
|
53 |
+
for movie in movies:
|
54 |
+
if movie["title"] not in exclude_titles:
|
55 |
+
return {"title": movie["title"], "rating": movie["vote_average"]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
return None
|
57 |
|
58 |
+
def judge_and_decide(text, gpt_genres, claude_genres, gpt_movie, claude_movie):
|
|
|
|
|
59 |
prompt = (
|
60 |
+
f"You are a genre classification judge.\n"
|
61 |
+
f"Description: {text}\n"
|
62 |
+
f"GPT Genres: {', '.join(gpt_genres)}\nClaude Genres: {', '.join(claude_genres)}\n"
|
63 |
+
f"Which list better matches the description? Reply with GPT, Claude or Tie."
|
|
|
64 |
)
|
65 |
+
decision = arb_pipe(prompt, max_new_tokens=10)[0]["generated_text"]
|
66 |
+
decision = "Claude" if "Claude" in decision else "GPT" if "GPT" in decision else "Tie"
|
67 |
+
|
68 |
+
best_movie = max([gpt_movie, claude_movie], key=lambda m: m["rating"])
|
69 |
+
return {
|
70 |
+
"decision": decision,
|
71 |
+
"winner_movie": best_movie
|
72 |
+
}
|
73 |
+
|
74 |
+
def main(description):
|
75 |
+
gpt_genres = classify_genres(gpt_pipe, description)
|
76 |
+
claude_genres = classify_genres(claude_pipe, description)
|
77 |
+
|
78 |
+
gpt_movie = get_movie_by_genres(gpt_genres)
|
79 |
+
claude_movie = get_movie_by_genres(claude_genres, exclude_titles=[gpt_movie["title"]] if gpt_movie else [])
|
|
|
80 |
|
81 |
if not gpt_movie or not claude_movie:
|
82 |
return "⚠️ Não foi possível encontrar recomendações de filmes para os gêneros identificados."
|
83 |
|
84 |
+
result = judge_and_decide(description, gpt_genres, claude_genres, gpt_movie, claude_movie)
|
|
|
85 |
|
86 |
return (
|
87 |
+
f"🎯 GPT-4 Genres: {', '.join(gpt_genres)}\n"
|
88 |
+
f"🎯 Claude Genres: {', '.join(claude_genres)}\n\n"
|
89 |
+
f"🎬 GPT Movie: {gpt_movie['title']} ({gpt_movie['rating']})\n"
|
90 |
+
f"🎬 Claude Movie: {claude_movie['title']} ({claude_movie['rating']})\n\n"
|
91 |
+
f"🏆 Winner: {result['decision']}\n"
|
92 |
+
f"🎖️ Best Rated Movie: {result['winner_movie']['title']} ({result['winner_movie']['rating']})"
|
93 |
)
|
94 |
|
95 |
+
gr.Interface(
|
96 |
+
fn=main,
|
97 |
+
inputs=gr.Textbox(lines=3, placeholder="Enter a movie/game description in English"),
|
|
|
98 |
outputs="text",
|
99 |
+
title="🎬 AI Genre Duel + Movie Recommendation",
|
100 |
+
description="Dois modelos AI classificam o gênero de uma descrição e recomendam filmes. O juiz escolhe o vencedor com base nos gêneros e na melhor nota de filme."
|
101 |
+
).launch()
|
|
|
|