Spaces:
Runtime error
Runtime error
Commit
·
a1e77ee
1
Parent(s):
5e3004a
Upload 8 files
Browse files- app.py +20 -0
- config.yaml +4 -0
- requirements.txt +8 -0
- result.csv +0 -0
- skeletal.npy +3 -0
- src/__pycache__/skeletal.cpython-39.pyc +0 -0
- src/scrape.py +126 -0
- src/skeletal.py +79 -0
app.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from src.skeletal import FaceMesh
|
3 |
+
|
4 |
+
facemesh = FaceMesh("config.yaml")
|
5 |
+
|
6 |
+
with gr.Blocks() as demo:
|
7 |
+
gr.Markdown("Estimate smilar person using this demo.")
|
8 |
+
with gr.Row():
|
9 |
+
with gr.Column(scale=1):
|
10 |
+
input = gr.Image(type="filepath", label="Input image")
|
11 |
+
dropdown = gr.Dropdown([5, 10, 20, 30, 40, 50], value="20", label="Top K")
|
12 |
+
button = gr.Button("Estimate")
|
13 |
+
with gr.Column(scale=2):
|
14 |
+
output = gr.Dataframe()
|
15 |
+
|
16 |
+
button.click(
|
17 |
+
facemesh.estimate_similar_person, inputs=[input, dropdown], outputs=output
|
18 |
+
)
|
19 |
+
|
20 |
+
demo.launch()
|
config.yaml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
path_data: data
|
2 |
+
path_garbage: garbage
|
3 |
+
path_csv: result.csv
|
4 |
+
path_skeletal: skeletal.npy
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
notebook==6.5.3
|
2 |
+
requests==2.30.0
|
3 |
+
bs4==0.0.1
|
4 |
+
pandas==2.0.1
|
5 |
+
omegaconf==2.3.0
|
6 |
+
opencv-python==4.7.0
|
7 |
+
matplotlib==3.7.1
|
8 |
+
gradio==3.28.3
|
result.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
skeletal.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18d7a3a30feb38ed39755e90b1cb6b2f1a578d7ad80c8f4be7a1bae009958085
|
3 |
+
size 13835360
|
src/__pycache__/skeletal.cpython-39.pyc
ADDED
Binary file (2.68 kB). View file
|
|
src/scrape.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import glob
|
3 |
+
import os
|
4 |
+
import shutil
|
5 |
+
import time
|
6 |
+
import urllib
|
7 |
+
|
8 |
+
import cv2
|
9 |
+
import mediapipe as mp
|
10 |
+
import pandas as pd
|
11 |
+
import requests
|
12 |
+
from bs4 import BeautifulSoup
|
13 |
+
from omegaconf import OmegaConf
|
14 |
+
|
15 |
+
|
16 |
+
class Scraper:
|
17 |
+
def __init__(self, config):
|
18 |
+
self.config = OmegaConf.load(config)
|
19 |
+
self.base_url = "https://hominis.media/person/"
|
20 |
+
if os.path.exists(self.config.path_csv):
|
21 |
+
self.df = pd.read_csv(self.config.path_csv)
|
22 |
+
self.idx = len(self.df)
|
23 |
+
else:
|
24 |
+
self.df = pd.DataFrame([], columns=["filepath", "name", "url"])
|
25 |
+
self.idx = 0
|
26 |
+
os.makedirs(self.config.path_data, exist_ok=True)
|
27 |
+
os.makedirs(self.config.path_garbage, exist_ok=True)
|
28 |
+
|
29 |
+
def run(self):
|
30 |
+
html = requests.get(self.base_url, timeout=5)
|
31 |
+
soup = BeautifulSoup(html.content, "html.parser")
|
32 |
+
pages = soup.find_all("input", class_="selectButton")
|
33 |
+
before = 0
|
34 |
+
|
35 |
+
for page in pages:
|
36 |
+
url = self.base_url + page.get("onclick").split("'")[1].replace(
|
37 |
+
"/person/", ""
|
38 |
+
)
|
39 |
+
html = requests.get(url, timeout=5)
|
40 |
+
soup = BeautifulSoup(html.content, "html.parser")
|
41 |
+
people = soup.find_all("li", class_="card people")
|
42 |
+
for person in people:
|
43 |
+
name = person.find("p", class_="name").text
|
44 |
+
img_url = (
|
45 |
+
person.find("p", class_="thumbnail")
|
46 |
+
.get("style")
|
47 |
+
.replace("background-image:url('", "")
|
48 |
+
.replace("');", "")
|
49 |
+
)
|
50 |
+
img_path = os.path.join(self.config.path_data, name + ".png")
|
51 |
+
if os.path.exists(img_path):
|
52 |
+
continue
|
53 |
+
try:
|
54 |
+
urllib.request.urlretrieve(img_url, img_path)
|
55 |
+
self.df.loc[self.idx] = {
|
56 |
+
"filepath": img_path,
|
57 |
+
"name": name,
|
58 |
+
"url": img_url,
|
59 |
+
}
|
60 |
+
self.idx += 1
|
61 |
+
time.sleep(1)
|
62 |
+
except Exception:
|
63 |
+
continue
|
64 |
+
|
65 |
+
imgs = glob.glob(os.path.join(self.config.path_data, "*.png"))
|
66 |
+
assert len(imgs) == len(self.df)
|
67 |
+
print(f"Get {len(imgs) - before} images")
|
68 |
+
before = len(imgs)
|
69 |
+
|
70 |
+
self.df.to_csv(self.config.path_csv, index=False)
|
71 |
+
|
72 |
+
def post_processing(self):
|
73 |
+
mp_face_mesh = mp.solutions.face_mesh
|
74 |
+
with mp_face_mesh.FaceMesh(
|
75 |
+
static_image_mode=True,
|
76 |
+
max_num_faces=10,
|
77 |
+
refine_landmarks=True,
|
78 |
+
min_detection_confidence=0.5,
|
79 |
+
) as face_mesh:
|
80 |
+
for file in glob.glob(os.path.join(self.config.path_data, "*.png")):
|
81 |
+
image = cv2.imread(file)
|
82 |
+
results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
83 |
+
if not results.multi_face_landmarks:
|
84 |
+
shutil.move(
|
85 |
+
file,
|
86 |
+
os.path.join(self.config.path_garbage, os.path.split(file)[-1]),
|
87 |
+
)
|
88 |
+
if len(results.multi_face_landmarks) > 1:
|
89 |
+
shutil.move(
|
90 |
+
file,
|
91 |
+
os.path.join(self.config.path_garbage, os.path.split(file)[-1]),
|
92 |
+
)
|
93 |
+
|
94 |
+
idx = []
|
95 |
+
for path in glob.glob(os.path.join(self.config.path_garbage, "*.png")):
|
96 |
+
idx.append(
|
97 |
+
self.df[
|
98 |
+
self.df["filepath"]
|
99 |
+
== os.path.join(self.config.path_data, os.path.split(path)[-1])
|
100 |
+
].index.values[0]
|
101 |
+
)
|
102 |
+
self.df = self.df.drop(idx)
|
103 |
+
assert len(glob.glob(os.path.join(self.config.path_data, "*.png"))) == len(
|
104 |
+
self.df
|
105 |
+
)
|
106 |
+
self.df.to_csv(self.config.path_csv, index=False)
|
107 |
+
|
108 |
+
|
109 |
+
def argparser():
|
110 |
+
parser = argparse.ArgumentParser()
|
111 |
+
parser.add_argument(
|
112 |
+
"-c",
|
113 |
+
"--config",
|
114 |
+
type=str,
|
115 |
+
default="config.yaml",
|
116 |
+
help="File path for config file.",
|
117 |
+
)
|
118 |
+
args = parser.parse_args()
|
119 |
+
return args
|
120 |
+
|
121 |
+
|
122 |
+
if __name__ == "__main__":
|
123 |
+
args = argparser()
|
124 |
+
scraper = Scraper(args.config)
|
125 |
+
scraper.run()
|
126 |
+
scraper.post_processing()
|
src/skeletal.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
|
4 |
+
import cv2
|
5 |
+
import mediapipe as mp
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
from omegaconf import OmegaConf
|
9 |
+
|
10 |
+
|
11 |
+
class FaceMesh:
|
12 |
+
def __init__(self, config):
|
13 |
+
self.config = OmegaConf.load(config)
|
14 |
+
self.df = pd.read_csv(self.config.path_csv)
|
15 |
+
if os.path.exists(self.config.path_skeletal):
|
16 |
+
self.reference = np.load(self.config.path_skeletal)
|
17 |
+
|
18 |
+
def normalize(self, landmarks):
|
19 |
+
output = []
|
20 |
+
for landmark in landmarks:
|
21 |
+
landmark = np.array(landmark)
|
22 |
+
landmark = (landmark - landmark.min()) / (landmark.max() - landmark.min())
|
23 |
+
output.append(landmark)
|
24 |
+
return np.array(output)
|
25 |
+
|
26 |
+
def get_facemesh(self, path):
|
27 |
+
mp_face_mesh = mp.solutions.face_mesh
|
28 |
+
|
29 |
+
with mp_face_mesh.FaceMesh(
|
30 |
+
static_image_mode=True,
|
31 |
+
max_num_faces=1,
|
32 |
+
refine_landmarks=True,
|
33 |
+
min_detection_confidence=0.5,
|
34 |
+
) as face_mesh:
|
35 |
+
results = face_mesh.process(
|
36 |
+
cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
|
37 |
+
)
|
38 |
+
x, y, z = [], [], []
|
39 |
+
result = results.multi_face_landmarks[0]
|
40 |
+
for lands in result.landmark:
|
41 |
+
x.append(lands.x)
|
42 |
+
y.append(lands.y)
|
43 |
+
z.append(lands.z)
|
44 |
+
landmark = self.normalize([x, y, z])
|
45 |
+
return landmark
|
46 |
+
|
47 |
+
def create_dataset(self):
|
48 |
+
landmarks = []
|
49 |
+
for i in range(len(self.df)):
|
50 |
+
landmark = self.get_facemesh(self.df.iloc[i]["filepath"])
|
51 |
+
landmarks.append(landmark)
|
52 |
+
np.save(self.config.path_skeletal, np.array(landmarks))
|
53 |
+
|
54 |
+
def estimate_similar_person(self, path, topK):
|
55 |
+
print(path)
|
56 |
+
facemesh = self.get_facemesh(path)
|
57 |
+
diff = abs(self.reference - facemesh).mean((1, 2))
|
58 |
+
rank = np.argsort(diff)[0 : int(topK)]
|
59 |
+
top = self.df.iloc[rank]
|
60 |
+
return top.drop("filepath", axis=1)
|
61 |
+
|
62 |
+
|
63 |
+
def argparser():
|
64 |
+
parser = argparse.ArgumentParser()
|
65 |
+
parser.add_argument(
|
66 |
+
"-c",
|
67 |
+
"--config",
|
68 |
+
type=str,
|
69 |
+
default="config.yaml",
|
70 |
+
help="File path for config file.",
|
71 |
+
)
|
72 |
+
args = parser.parse_args()
|
73 |
+
return args
|
74 |
+
|
75 |
+
|
76 |
+
if __name__ == "__main__":
|
77 |
+
args = argparser()
|
78 |
+
scraper = FaceMesh(args.config)
|
79 |
+
scraper.create_dataset()
|