Spaces:

LeMaterial
/

materials_explorer

Running

App Files Files Community

msiron commited on Dec 9, 2024

Commit

1e6e599

1 Parent(s): ce5365e

fix splits load all

Browse files

Files changed (1) hide show

app.py +41 -30

app.py CHANGED Viewed

@@ -5,45 +5,52 @@ import crystal_toolkit.components as ctc
 import dash
 import dash_mp_components as dmp
 import numpy as np
 import periodictable
 from crystal_toolkit.settings import SETTINGS
 from dash import dcc, html
 from dash.dependencies import Input, Output, State
 from dash_breakpoints import WindowBreakpoints
-from datasets import load_dataset
 from pymatgen.analysis.structure_analyzer import SpacegroupAnalyzer
 from pymatgen.core import Structure
 HF_TOKEN = os.environ.get("HF_TOKEN")
 top_k = 500
 # Load only the train split of the dataset
-dataset = load_dataset(
-    "LeMaterial/leMat-Bulk",
-    token=HF_TOKEN,
-    split="train",
-    columns=[
-        "lattice_vectors",
-        "species_at_sites",
-        "cartesian_site_positions",
-        "energy",
-        # "energy_corrected", # not yet available in LeMat-Bulk
-        "immutable_id",
-        "elements",
-        "functional",
-        "stress_tensor",
-        "magnetic_moments",
-        "forces",
-        # "band_gap_direct", #future release
-        # "band_gap_indirect", #future release
-        "dos_ef",
-        # "charges", #future release
-        "functional",
-        "chemical_formula_reduced",
-        "chemical_formula_descriptive",
-        "total_magnetization",
-    ],
-).select(range(1000))
 display_columns = [
     "chemical_formula_descriptive",
@@ -64,6 +71,8 @@ map_periodic_table = {v.symbol: k for k, v in enumerate(periodictable.elements)}
 n_elements = len(map_periodic_table)
 # Preprocessing step to create an index for the dataset
 train_df = dataset.select_columns(["chemical_formula_descriptive"]).to_pandas()
 pattern = re.compile(r"(?P<element>[A-Z][a-z]?)(?P<count>\d*)")
@@ -367,8 +376,8 @@ def display_material(active_cell, selected_rows):
         row["cartesian_site_positions"],
         coords_are_cartesian=True,
     )
-    if row['magnetic_moments']:
-        structure.add_site_property('magmom',row['magnetic_moments'])
     sga = SpacegroupAnalyzer(structure)
@@ -379,7 +388,9 @@ def display_material(active_cell, selected_rows):
     properties = {
         "Material ID": row["immutable_id"],
         "Formula": row["chemical_formula_descriptive"],
-        "Energy per atom (eV/atom)": round(row["energy"] / len(row["species_at_sites"]), 3),
         # "Band Gap (eV)": row["band_gap_direct"] or row["band_gap_indirect"], #future release
         "Total Magnetization (μB)": row["total_magnetization"],
         "Density (g/cm^3)": round(structure.density, 3),

 import dash
 import dash_mp_components as dmp
 import numpy as np
+import pandas as pd
 import periodictable
 from crystal_toolkit.settings import SETTINGS
 from dash import dcc, html
 from dash.dependencies import Input, Output, State
 from dash_breakpoints import WindowBreakpoints
+from datasets import concatenate_datasets, load_dataset
 from pymatgen.analysis.structure_analyzer import SpacegroupAnalyzer
 from pymatgen.core import Structure
 HF_TOKEN = os.environ.get("HF_TOKEN")
 top_k = 500
+splits = ["compatible_pbe", "compatible_pbesol", "compatible_scan", "non_compatible"]
 # Load only the train split of the dataset
+datasets = []
+for split in splits:
+    dataset = load_dataset(
+        "LeMaterial/leMat-Bulk",
+        token=HF_TOKEN,
+        split=split,
+        columns=[
+            "lattice_vectors",
+            "species_at_sites",
+            "cartesian_site_positions",
+            "energy",
+            # "energy_corrected", # not yet available in LeMat-Bulk
+            "immutable_id",
+            "elements",
+            "functional",
+            "stress_tensor",
+            "magnetic_moments",
+            "forces",
+            # "band_gap_direct", #future release
+            # "band_gap_indirect", #future release
+            "dos_ef",
+            # "charges", #future release
+            "functional",
+            "chemical_formula_reduced",
+            "chemical_formula_descriptive",
+            "total_magnetization",
+        ],
+    )
+    datasets.append(dataset)
 display_columns = [
     "chemical_formula_descriptive",
 n_elements = len(map_periodic_table)
 # Preprocessing step to create an index for the dataset
+# df = pd.concat([x.to_pandas() for x in datasets])
+dataset = concatenate_datasets(datasets)
 train_df = dataset.select_columns(["chemical_formula_descriptive"]).to_pandas()
 pattern = re.compile(r"(?P<element>[A-Z][a-z]?)(?P<count>\d*)")
         row["cartesian_site_positions"],
         coords_are_cartesian=True,
     )
+    if row["magnetic_moments"]:
+        structure.add_site_property("magmom", row["magnetic_moments"])
     sga = SpacegroupAnalyzer(structure)
     properties = {
         "Material ID": row["immutable_id"],
         "Formula": row["chemical_formula_descriptive"],
+        "Energy per atom (eV/atom)": round(
+            row["energy"] / len(row["species_at_sites"]), 3
+        ),
         # "Band Gap (eV)": row["band_gap_direct"] or row["band_gap_indirect"], #future release
         "Total Magnetization (μB)": row["total_magnetization"],
         "Density (g/cm^3)": round(structure.density, 3),