import os # import clip import torch import open_clip import numpy as np from sklearn.linear_model import LogisticRegression from torchvision.datasets import CIFAR100 from tqdm import tqdm from joblib import dump, load from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize import torchvision.transforms as transforms import torchvision import pandas as pd from pathlib import Path from PIL import Image from torch.utils.data import Dataset, DataLoader import pickle class PHASE(Dataset): """PHASE dataset.""" def __init__(self, csv_file, root_dir, transform=None, resolution=224): """ Arguments: csv_file (string): Path to the csv file with annotations. root_dir (string): Directory with all the images. transform (callable, optional): Optional transform to be applied on a sample. """ self.annotations = pd.read_csv(csv_file, sep=' ', header=None) # print(self.annotations) self.root_dir = root_dir self.transform = transform self.base_transforms = Compose([ Resize((resolution, resolution), interpolation=Image.BICUBIC) ]) def __len__(self): return len(self.annotations) def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() img_name = os.path.join(self.root_dir, self.annotations.iloc[idx, 0]) image = Image.open(img_name).convert('RGB') label = self.annotations.iloc[idx, 1] image = self.base_transforms(image) if self.transform: image = self.transform(image) sample = {'image': image, 'label': label} # print(image, label) return image, label class FACET(Dataset): """Face Landmarks dataset.""" def __init__(self, csv_file, root_dir, transform=None): """ Arguments: csv_file (string): Path to the csv file with annotations. root_dir (string): Directory with all the images. transform (callable, optional): Optional transform to be applied on a sample. """ self.annotations = pd.read_csv(csv_file, sep=' ', header=None) self.root_dir = root_dir self.transform = transform def __len__(self): return len(self.annotations) def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() img_name = os.path.join(self.root_dir, self.annotations.iloc[idx, 0]) image = Image.open(img_name).convert('RGB') label = self.annotations.iloc[idx, 1] base_transforms = Compose([ Resize((224, 224), interpolation=Image.BICUBIC) ]) image = base_transforms(image) if self.transform: image = self.transform(image) return image, label class MORPH(Dataset): """MORPH dataset.""" def __init__(self, csv_file, root_dir, transform=None): """ Arguments: csv_file (string): Path to the csv file with annotations. root_dir (string): Directory with all the images. transform (callable, optional): Optional transform to be applied on a sample. """ self.annotations = pd.read_csv(csv_file, sep=',', header=0) self.root_dir = root_dir self.transform = transform def __len__(self): return len(self.annotations) def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() img_name = os.path.join(self.annotations.iloc[idx]["filepath"]) image = Image.open(f"{img_name}").convert('RGB') label = self.annotations.iloc[idx]["gender"] base_transforms = Compose([ Resize((224, 224), interpolation=Image.BICUBIC) ]) image = base_transforms(image) if self.transform: image = self.transform(image) return image, label # Load the model device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu" resnet_model = torchvision.models.resnet50(pretrained=False) resnet_model.fc = torch.nn.Identity() resnet_model.eval() features_root = "features_facet_training_set" cls_root = "classifiers_facet" models = ( # # Supervised # "resnet18", # "resnet34", # "resnet50", # "resnet101", # "resnet152", # "vit_b_16", # "vit_b_32", # "vit_l_16", # "vit_l_32", # # Self-Supervised RN50 # 'swav', # 'simclr', # 'moco_v2', # 'npid', # 'deepcluster_v2', # 'jigsaw', # 'odc', # # DINO v1 # 'dino_v1_cnn', # 'dino_v1_vit_b_16', # "dino_v1_vit_s_16", # # DINO v2 # "dino_v2_vit_s_14", # 'dino_v2_vit_b_14', # "dino_v2_vit_l_14", # "dino_v2_vit_g_14", # # CLIP OpenAI # "ViT-B/16", # "ViT-B/32", # "ViT-L/14", # "ViT-L/14@336px", # "RN50", # "RN101", # # CLIP OpenCLIP #"vit_b_16_400m", # "vit_b_16_2b", # "vit_l_14_400m", # "vit_l_14_2b", # "vit_b_32_400m", # "vit_b_32_2b", # OpenCLIP CC3M - ours # "vit_b_16_cc3m_50_28ep", "vit_b_16_cc3m_50_30ep", # "vit_b_16_cc3m_50", "vit_b_16_cc3m_original", "vit_b_16_cc3m_50_30ep_difficult_batches", # OpenCLIP CC3M - full regeneration "rn50_cc3m_mix_000", "rn50_cc3m_mix_100", ) weights = ( # # ResNet's # "supervised_torch_hub", # "supervised_torch_hub", # "supervised_torch_hub", # "supervised_torch_hub", # "supervised_torch_hub", # # ViT's # "supervised_torch_hub", # "supervised_torch_hub", # "supervised_torch_hub", # "supervised_torch_hub", # # SSL # "/home/kis/Desktop/rhome/kis/code/mmselfsup/pretrained_models/official_weights/mmselfsup_format/swav_backbone.pth", # "/home/kis/Desktop/rhome/kis/code/mmselfsup/pretrained_models/official_weights/mmselfsup_format/simclr_backbone.pth", # "/home/kis/Desktop/rhome/kis/code/mmselfsup/pretrained_models/official_weights/mmselfsup_format/moco_v2_backbone.pth", # "/home/kis/Desktop/rhome/kis/code/mmselfsup/pretrained_models/official_weights/mmselfsup_format/npid_backbone.pth", # "/home/kis/Desktop/rhome/kis/code/mmselfsup/pretrained_models/official_weights/mmselfsup_format/deepcluster_v2_backbone.pth", # "/home/kis/Desktop/rhome/kis/code/mmselfsup/pretrained_models/official_weights/mmselfsup_format/jigsaw_backbone.pth", # "/home/kis/Desktop/rhome/kis/code/mmselfsup/pretrained_models/official_weights/mmselfsup_format/odc_r50_v1-5af5dd0c.pth", # # DINO-v1 # "dino_facebook_hub", # "dino_facebook_hub", # "dino_facebook_hub", # # DINO-v2 # "dino_facebook_hub", # "dino_facebook_hub", # "dino_facebook_hub", # "dino_facebook_hub", # # CLIP OpenAI # "OpenAI hub", # "OpenAI hub", # "OpenAI hub", # "OpenAI hub", # "OpenAI hub", # "OpenAI hub", # # CLIP OpenCLIP #"OpenCLIP hub", # "OpenCLIP hub", # "OpenCLIP hub", # "OpenCLIP hub", # "OpenCLIP hub", # "OpenCLIP hub", # OpenCLIP CC3M - ours # "/home/kis/Desktop/rhome/kis/code/open_clip_latest/open_clip/logs/2024_08_27-11_48_49-model_ViT-B-16-lr_0.001-b_410-j_8-p_amp/checkpoints/epoch_28.pt", "/home/kis/Desktop/rhome/kis/code/open_clip_latest/open_clip/logs/2024_08_27-11_48_49-model_ViT-B-16-lr_0.001-b_410-j_8-p_amp/checkpoints/epoch_30.pt", # "/home/kis/Desktop/rhome/kis/code/open_clip_latest/open_clip/logs/2024_08_27-11_48_49-model_ViT-B-16-lr_0.001-b_410-j_8-p_amp/checkpoints/epoch_16.pt", "/home/kis/Desktop/rhome/kis/code/open_clip/logs/2024_07_12-19_17_23-model_ViT-B-16-lr_0.001-b_410-j_4-p_amp/checkpoints/epoch_30.pt", "/home/kis/Desktop/rhome/kis/code/open_clip_latest/open_clip/logs/2024_09_15-14_07_26-model_ViT-B-16-lr_0.001-b_410-j_8-p_amp/checkpoints/epoch_30.pt", # OpenCLIP CC3M - full regeneration "/home/kis/code/models/models/cc3m_mix_000/epoch_50.pt", "/home/kis/code/models/models/cc3m_mix_100/epoch_50.pt", ) model_idx=0 model_type = 'transformer' for model_name, weight in zip(models, weights): print( "\n\n",model_name) preprocess = None clip_like = False if model_name == 'dino_v1_cnn': model = torch.hub.load('facebookresearch/dino:main', 'dino_resnet50') model.fc = torch.nn.Identity() model.eval() model_type = 'cnn' elif model_name == "vit_b_16_cc3m_50": model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight) elif model_name == "vit_b_16_cc3m_50_28ep": model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight) elif model_name == "vit_b_16_cc3m_50_30ep_difficult_batches": model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight) elif model_name == "vit_b_16_cc3m_50_30ep": model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight) elif model_name == "rn50_cc3m_mix_000": model, _, preprocess = open_clip.create_model_and_transforms('RN50', pretrained=weight) elif model_name == "vit_b_16_cc3m_future_models": model, _, preprocess = open_clip.create_model_and_transforms('RN50', pretrained=weight) elif model_name == "vit_b_16_cc3m_original": model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained=weight) elif model_name == 'dino_v1_vit_s_16': model = torch.hub.load('facebookresearch/dino:main', 'dino_vits16') elif model_name == 'dino_v1_vit_b_16': model = torch.hub.load('facebookresearch/dino:main', 'dino_vitb16') elif model_name == 'dino_v2_vit_s_14': model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14') elif model_name == 'dino_v2_vit_b_14': model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitb14') elif model_name == 'dino_v2_vit_l_14': model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14') elif model_name == 'dino_v2_vit_g_14': model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitg14') elif model_name == "vit_b_16_400m": model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained='laion400m_e32') clip_like = True elif model_name == "vit_b_16_2b": model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained='laion2b_s34b_b88k') clip_like = True elif model_name == "vit_b_32_400m": model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion400m_e32') clip_like = True elif model_name == "vit_b_32_2b": model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k') clip_like = True elif model_name == "vit_l_14_400m": model, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion400m_e32') clip_like = True elif model_name == "vit_l_14_2b": model, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='laion2b_s32b_b82k') clip_like = True elif "resnet" in model_name: model = torch.hub.load('pytorch/vision:v0.10.0', model_name, pretrained=True) model.fc = torch.nn.Identity() model.eval() model_type = 'cnn' elif "vit" in model_name: model = torch.hub.load('pytorch/vision', model_name, weights='IMAGENET1K_V1') model.heads = torch.nn.Identity() elif "ViT" in model_name: model, preprocess = clip.load(model_name, device) clip_like = True elif "RN" in model_name: model, preprocess = clip.load(model_name, device) model.visual.attnpool = torch.nn.AdaptiveAvgPool2d((1,1)) # replace Attention pool with Avgpool clip_like = True model_type = 'cnn' else: w = torch.load(weight) model = torchvision.models.resnet50(pretrained=False) model.fc = torch.nn.Identity() model.eval() model.load_state_dict(update_keys(w['state_dict']), strict=True) model_type = 'cnn' model.cuda() if 'simclr' in model_name: img_norm_cfg = dict(mean=[0., 0., 0.], std=[1., 1., 1.]) else: img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform_test = transforms.Compose([ transforms.CenterCrop((224, 224)), transforms.ToTensor(), transforms.Normalize(**img_norm_cfg) ]) # # model_name = 'CLIP_RN50' # features_root = "../../features/MORPH/" # # features_root = "../../features/FACET/" # train_dataset = MORPH(csv_file=f'/home/kis/Desktop/rhome/kis/datasets/morph/Dataset/Index/Train.csv', # root_dir='/home/kis/Desktop/rhome/kis/datasets/morph/Dataset/', # transform=transform_test # ) # val_dataset = MORPH(csv_file=f'/home/kis/Desktop/rhome/kis/datasets/morph/Dataset/Index/Validation.csv', # root_dir='/home/kis/Desktop/rhome/kis/datasets/morph/Dataset/', # transform=transform_test # ) # def get_features(dataset): # all_features = [] # all_labels = [] # with torch.no_grad(): # for images, labels in tqdm(DataLoader(dataset, batch_size=512)): # if model_type == 'cnn': # features = model(images.to(device)) # else: # features = model.encode_image(images.to(device)) # print(features.shape) # all_features.append(features) # all_labels.append(labels) # return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy() # # Calculate the image features # feat_path = Path(f"{features_root}/{model_name}_features.pkl") # if feat_path.exists(): # print("Already extracted!") # with open(f"{features_root}/{model_name}_features.pkl", 'rb') as f: # features = pickle.load(f) # with open(f"{features_root}/{model_name}_labels.pkl", 'rb') as f: # labels = pickle.load(f) # with open(f"{features_root}/{model_name}_features_val.pkl", 'rb') as f: # features_val = pickle.load(f) # with open(f"{features_root}/{model_name}_labels_val.pkl", 'rb') as f: # labels_val = pickle.load(f) # else: # features, labels = get_features(train_dataset) # print(labels) # with open(f"{features_root}/{model_name}_features.pkl", 'wb') as f: # pickle.dump(features, f) # with open(f"{features_root}/{model_name}_labels.pkl", 'wb') as f: # pickle.dump(labels, f) # features_val, labels_val = get_features(val_dataset) # with open(f"{features_root}/{model_name}_features_val.pkl", 'wb') as f: # pickle.dump(features_val, f) # with open(f"{features_root}/{model_name}_labels_val.pkl", 'wb') as f: # pickle.dump(labels_val, f) # print("Done!") # for i in range(1, 10): # c = i * 0.1 # classifier = LogisticRegression(random_state=0, C=c, max_iter=10000, verbose=0) # classifier.fit(features, labels) # # classifier = load(f'{model_name}_logistic_regression_classifier_c_{c}.joblib') # predictions = classifier.predict(features_val) # dump(classifier, f'{features_root}/{model_name}_logistic_regression_classifier_c_{c}.joblib') # with open(f'{features_root}/{model_name}_predictions_c_{c}.pkl', 'wb') as f: # pickle.dump(predictions, f) # pd.DataFrame(predictions).to_csv(f"{features_root}/{model_name}_predictions_c_{c}.txt") # accuracy = np.mean((labels_val == predictions).astype(float)) * 100. # print(f"C={c}, Accuracy = {accuracy:.3f}") # features_root = "../../features/FACET/" # train_dataset = FACET(csv_file=f'/home/kis/Desktop/rhome/kis/datasets/facet/train_seed_0.csv', # root_dir='/home/kis/Desktop/rhome/kis/datasets/facet/images_bb/', # transform=preprocess) # val_dataset = FACET(csv_file=f'/home/kis/Desktop/rhome/kis/datasets/facet/test_val_seed_0.csv', # root_dir='/home/kis/Desktop/rhome/kis/datasets/facet/images_bb/', # transform=preprocess) # def get_features(dataset): # all_features = [] # all_labels = [] # with torch.no_grad(): # for images, labels in tqdm(DataLoader(dataset, batch_size=512)): # if model_type == 'cnn': # features = model(images.to(device)) # else: # features = model.encode_image(images.to(device)) # print(features.shape) # all_features.append(features) # all_labels.append(labels) # return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy() # # Calculate the image features # feat_path = Path(f"{features_root}/{model_name}_features.pkl") # if feat_path.exists(): # print("Already extracted!") # with open(f"{features_root}/{model_name}_features.pkl", 'rb') as f: # features = pickle.load(f) # with open(f"{features_root}/{model_name}_labels.pkl", 'rb') as f: # labels = pickle.load(f) # with open(f"{features_root}/{model_name}_features_val.pkl", 'rb') as f: # features_val = pickle.load(f) # with open(f"{features_root}/{model_name}_labels_val.pkl", 'rb') as f: # labels_val = pickle.load(f) # else: # features, labels = get_features(train_dataset) # with open(f"{features_root}/{model_name}_features.pkl", 'wb') as f: # pickle.dump(features, f) # with open(f"{features_root}/{model_name}_labels.pkl", 'wb') as f: # pickle.dump(labels, f) # features_val, labels_val = get_features(val_dataset) # with open(f"{features_root}/{model_name}_features_val.pkl", 'wb') as f: # pickle.dump(features_val, f) # with open(f"{features_root}/{model_name}_labels_val.pkl", 'wb') as f: # pickle.dump(labels_val, f) # print("Done!") # for i in range(1, 10): # c = i * 0.1 # classifier = LogisticRegression(random_state=0, C=c, max_iter=10000, verbose=0) # classifier.fit(features, labels) # # classifier = load(f'{model_name}_logistic_regression_classifier_c_{c}.joblib') # predictions = classifier.predict(features_val) # dump(classifier, f'{features_root}/{model_name}_logistic_regression_classifier_c_{c}.joblib') # with open(f'{features_root}/{model_name}_predictions_c_{c}.pkl', 'wb') as f: # pickle.dump(predictions, f) # pd.DataFrame(predictions).to_csv(f"{features_root}/{model_name}_predictions_c_{c}.txt") # accuracy = np.mean((labels_val == predictions).astype(float)) * 100. # print(f"C={c}, Accuracy = {accuracy:.3f}") features_root = "../../features/PHASE_EMOTIONS/" train_dataset = PHASE(csv_file=f'/home/kis/Desktop/rhome/kis/datasets/phase/phase_annotations/train_annotations_emotion.txt', root_dir='/home/kis/Desktop/rhome/kis/datasets/phase/images/train_bb/', transform=transform_test ) val_dataset = PHASE(csv_file=f'/home/kis/Desktop/rhome/kis/datasets/phase/phase_annotations/val_annotations_emotion.txt', root_dir='/home/kis/Desktop/rhome/kis/datasets/phase/images/val_bb/', transform=transform_test ) def get_features(dataset): all_features = [] all_labels = [] with torch.no_grad(): for images, labels in tqdm(DataLoader(dataset, batch_size=512)): if model_type == 'cnn': features = model(images.to(device)) else: features = model.encode_image(images.to(device)) print(features.shape) all_features.append(features) all_labels.append(labels) return torch.cat(all_features).cpu().numpy(), torch.cat(all_labels).cpu().numpy() # Calculate the image features feat_path = Path(f"{features_root}/{model_name}_features.pkl") if feat_path.exists(): print("Already extracted!") with open(f"{features_root}/{model_name}_features.pkl", 'rb') as f: features = pickle.load(f) with open(f"{features_root}/{model_name}_labels.pkl", 'rb') as f: labels = pickle.load(f) with open(f"{features_root}/{model_name}_features_val.pkl", 'rb') as f: features_val = pickle.load(f) with open(f"{features_root}/{model_name}_labels_val.pkl", 'rb') as f: labels_val = pickle.load(f) else: features, labels = get_features(train_dataset) with open(f"{features_root}/{model_name}_features.pkl", 'wb') as f: pickle.dump(features, f) with open(f"{features_root}/{model_name}_labels.pkl", 'wb') as f: pickle.dump(labels, f) features_val, labels_val = get_features(val_dataset) with open(f"{features_root}/{model_name}_features_val.pkl", 'wb') as f: pickle.dump(features_val, f) with open(f"{features_root}/{model_name}_labels_val.pkl", 'wb') as f: pickle.dump(labels_val, f) print("Done!") for i in range(1, 10): c = i * 0.1 classifier = LogisticRegression(random_state=0, C=c, max_iter=10000, verbose=0, class_weight="balanced") classifier.fit(features, labels) # classifier = load(f'{model_name}_logistic_regression_classifier_c_{c}.joblib') predictions = classifier.predict(features_val) dump(classifier, f'{features_root}/{model_name}_logistic_regression_classifier_c_{c}.joblib') with open(f'{features_root}/{model_name}_predictions_c_{c}.pkl', 'wb') as f: pickle.dump(predictions, f) pd.DataFrame(predictions).to_csv(f"{features_root}/{model_name}_predictions_c_{c}.txt") accuracy = np.mean((labels_val == predictions).astype(float)) * 100. print(f"C={c}, Accuracy = {accuracy:.3f}")