Spaces:
Sleeping
Sleeping
import numpy as np | |
import pandas as pd | |
import statsmodels.formula.api as smf | |
import statsmodels.api as sm | |
import plotly.graph_objects as go | |
from plotly.subplots import make_subplots | |
from scipy.optimize import minimize | |
import plotly.express as px | |
from scipy.stats import t, f | |
import gradio as gr | |
import io | |
import os | |
from zipfile import ZipFile | |
import warnings | |
# Suppress specific warnings | |
warnings.filterwarnings('ignore', category=UserWarning) | |
warnings.filterwarnings('ignore', category=RuntimeWarning) | |
class RSM_BoxBehnken: | |
def __init__(self, data, x1_name, x2_name, x3_name, y_name, x1_levels, x2_levels, x3_levels): | |
""" | |
Initialize the Response Surface Methodology Box-Behnken Design class | |
Parameters: | |
----------- | |
data : pandas.DataFrame | |
Experimental design data | |
x1_name, x2_name, x3_name : str | |
Names of independent variables | |
y_name : str | |
Name of dependent variable | |
x1_levels, x2_levels, x3_levels : list | |
Levels of each independent variable | |
""" | |
self.data = data.copy() | |
self.model = None | |
self.model_simplified = None | |
self.optimized_results = None | |
self.optimal_levels = None | |
# Variable names | |
self.x1_name = x1_name | |
self.x2_name = x2_name | |
self.x3_name = x3_name | |
self.y_name = y_name | |
# Original levels of variables | |
self.x1_levels = x1_levels | |
self.x2_levels = x2_levels | |
self.x3_levels = x3_levels | |
def _get_levels(self, variable_name): | |
""" | |
Get levels for a specific variable | |
Parameters: | |
----------- | |
variable_name : str | |
Name of the variable | |
Returns: | |
-------- | |
list | |
Levels of the variable | |
""" | |
level_map = { | |
self.x1_name: self.x1_levels, | |
self.x2_name: self.x2_levels, | |
self.x3_name: self.x3_levels | |
} | |
if variable_name not in level_map: | |
raise ValueError(f"Unknown variable: {variable_name}") | |
return level_map[variable_name] | |
def fit_model(self, simplified=False): | |
""" | |
Fit the response surface model | |
Parameters: | |
----------- | |
simplified : bool, optional | |
Whether to fit a simplified model, by default False | |
Returns: | |
-------- | |
tuple | |
Fitted model and Pareto chart | |
""" | |
if simplified: | |
formula = f'{self.y_name} ~ {self.x1_name} + {self.x2_name} + {self.x3_name} + ' \ | |
f'I({self.x1_name}**2) + I({self.x2_name}**2) + I({self.x3_name}**2)' | |
self.model_simplified = smf.ols(formula, data=self.data).fit() | |
print("\nSimplified Model:") | |
print(self.model_simplified.summary()) | |
return self.model_simplified, self.pareto_chart(self.model_simplified, "Pareto - Simplified Model") | |
else: | |
formula = f'{self.y_name} ~ {self.x1_name} + {self.x2_name} + {self.x3_name} + ' \ | |
f'I({self.x1_name}**2) + I({self.x2_name}**2) + I({self.x3_name}**2) + ' \ | |
f'{self.x1_name}:{self.x2_name} + {self.x1_name}:{self.x3_name} + {self.x2_name}:{self.x3_name}' | |
self.model = smf.ols(formula, data=self.data).fit() | |
print("Full Model:") | |
print(self.model.summary()) | |
return self.model, self.pareto_chart(self.model, "Pareto - Full Model") | |
def optimize(self, method='Nelder-Mead'): | |
""" | |
Optimize the response surface model | |
Parameters: | |
----------- | |
method : str, optional | |
Optimization method, by default 'Nelder-Mead' | |
Returns: | |
-------- | |
pandas.DataFrame | |
Optimization results table | |
""" | |
if self.model_simplified is None: | |
raise ValueError("Fit the simplified model first.") | |
def objective_function(x): | |
"""Objective function for optimization""" | |
return -self.model_simplified.predict(pd.DataFrame({ | |
self.x1_name: [x[0]], | |
self.x2_name: [x[1]], | |
self.x3_name: [x[2]] | |
})) | |
bounds = [(-1, 1), (-1, 1), (-1, 1)] | |
x0 = [0, 0, 0] | |
self.optimized_results = minimize( | |
objective_function, | |
x0, | |
method=method, | |
bounds=bounds | |
) | |
self.optimal_levels = self.optimized_results.x | |
# Convert to natural levels | |
optimal_levels_natural = [ | |
round(self.coded_to_natural(self.optimal_levels[i], var), 3) | |
for i, var in enumerate([self.x1_name, self.x2_name, self.x3_name]) | |
] | |
optimization_table = pd.DataFrame({ | |
'Variable': [self.x1_name, self.x2_name, self.x3_name], | |
'Optimal Level (Natural)': optimal_levels_natural, | |
'Optimal Level (Coded)': [round(x, 3) for x in self.optimal_levels] | |
}) | |
return optimization_table | |
def coded_to_natural(self, coded_value, variable_name): | |
""" | |
Convert coded value to natural level | |
Parameters: | |
----------- | |
coded_value : float | |
Coded value of the variable | |
variable_name : str | |
Name of the variable | |
Returns: | |
-------- | |
float | |
Natural level of the variable | |
""" | |
levels = self._get_levels(variable_name) | |
return levels[0] + (coded_value + 1) * (levels[-1] - levels[0]) / 2 | |
def natural_to_coded(self, natural_value, variable_name): | |
""" | |
Convert natural level to coded value | |
Parameters: | |
----------- | |
natural_value : float | |
Natural level of the variable | |
variable_name : str | |
Name of the variable | |
Returns: | |
-------- | |
float | |
Coded value of the variable | |
""" | |
levels = self._get_levels(variable_name) | |
return -1 + 2 * (natural_value - levels[0]) / (levels[-1] - levels[0]) | |
def pareto_chart(self, model, title): | |
""" | |
Create Pareto chart of standardized effects | |
Parameters: | |
----------- | |
model : statsmodels.regression.linear_model.RegressionResultsWrapper | |
Fitted regression model | |
title : str | |
Title of the Pareto chart | |
Returns: | |
-------- | |
plotly.graph_objects.Figure | |
Pareto chart | |
""" | |
tvalues = model.tvalues[1:] | |
abs_tvalues = np.abs(tvalues) | |
sorted_idx = np.argsort(abs_tvalues)[::-1] | |
sorted_tvalues = abs_tvalues[sorted_idx] | |
sorted_names = tvalues.index[sorted_idx] | |
alpha = 0.05 | |
dof = model.df_resid | |
t_critical = t.ppf(1 - alpha / 2, dof) | |
fig = px.bar( | |
x=sorted_tvalues, | |
y=sorted_names, | |
orientation='h', | |
labels={'x': 'Standardized Effect', 'y': 'Term'}, | |
title=title | |
) | |
fig.update_yaxes(autorange="reversed") | |
fig.add_vline(x=t_critical, line_dash="dot", | |
annotation_text=f"Critical t = {t_critical:.2f}", | |
annotation_position="bottom right") | |
return fig | |
def generate_prediction_table(self): | |
""" | |
Generate prediction table with predicted and residual values | |
Returns: | |
-------- | |
pandas.DataFrame | |
Prediction table | |
""" | |
if self.model_simplified is None: | |
raise ValueError("Fit the simplified model first.") | |
predictions = self.model_simplified.predict(self.data) | |
residuals = self.data[self.y_name] - predictions | |
prediction_table = self.data.copy() | |
prediction_table['Predicted'] = predictions.round(3) | |
prediction_table['Residual'] = residuals.round(3) | |
return prediction_table[[self.y_name, 'Predicted', 'Residual']] | |
def calculate_contribution_percentage(self): | |
""" | |
Calculate percentage contribution of model terms | |
Returns: | |
-------- | |
pandas.DataFrame | |
Contribution percentage table | |
""" | |
if self.model_simplified is None: | |
raise ValueError("Fit the simplified model first.") | |
anova_table = sm.stats.anova_lm(self.model_simplified, typ=2) | |
ss_total = anova_table['sum_sq'].sum() | |
contribution_table = [] | |
for index, row in anova_table.iterrows(): | |
if index != 'Residual': | |
factor_name = index.replace('I(', '').replace('**2)', '^2') | |
ss_factor = row['sum_sq'] | |
contribution_percentage = (ss_factor / ss_total) * 100 | |
contribution_table.append({ | |
'Factor': factor_name, | |
'Sum of Squares': round(ss_factor, 3), | |
'% Contribution': round(contribution_percentage, 3) | |
}) | |
return pd.DataFrame(contribution_table) | |
def calculate_detailed_anova(self): | |
""" | |
Perform detailed ANOVA analysis | |
Returns: | |
-------- | |
pandas.DataFrame | |
Detailed ANOVA table | |
""" | |
if self.model_simplified is None: | |
raise ValueError("Fit the simplified model first.") | |
# Preparar datos para ANOVA detallado | |
ss_total = np.sum((self.data[self.y_name] - self.data[self.y_name].mean())**2) | |
df_total = len(self.data) - 1 | |
# ANOVA para modelo reducido | |
formula_reduced = f'{self.y_name} ~ {self.x1_name} + {self.x2_name} + {self.x3_name} + ' \ | |
f'I({self.x1_name}**2) + I({self.x2_name}**2) + I({self.x3_name}**2)' | |
model_reduced = smf.ols(formula_reduced, data=self.data).fit() | |
anova_reduced = sm.stats.anova_lm(model_reduced, typ=2) | |
# Calcular componentes de variación | |
ss_regression = anova_reduced['sum_sq'][:-1].sum() | |
df_regression = len(anova_reduced) - 1 | |
ss_residual = self.model_simplified.ssr | |
df_residual = self.model_simplified.df_resid | |
# Error puro | |
replicas = self.data[self.data.duplicated(subset=[self.x1_name, self.x2_name, self.x3_name], keep=False)] | |
ss_pure_error = replicas.groupby([self.x1_name, self.x2_name, self.x3_name])[self.y_name].var().sum() | |
df_pure_error = len(replicas) - len(replicas.groupby([self.x1_name, self.x2_name, self.x3_name])) | |
# Falta de ajuste | |
ss_lack_of_fit = ss_residual - ss_pure_error | |
df_lack_of_fit = df_residual - df_pure_error | |
# Calcular cuadrados medios y estadísticos F | |
ms_regression = ss_regression / df_regression | |
ms_residual = ss_residual / df_residual | |
ms_lack_of_fit = ss_lack_of_fit / df_lack_of_fit | |
ms_pure_error = ss_pure_error / df_pure_error | |
f_lack_of_fit = ms_lack_of_fit / ms_pure_error | |
p_lack_of_fit = 1 - f.cdf(f_lack_of_fit, df_lack_of_fit, df_pure_error) | |
# Crear tabla de ANOVA detallada | |
detailed_anova_table = pd.DataFrame({ | |
'Source of Variation': ['Regression', 'Residual', 'Lack of Fit', 'Pure Error', 'Total'], | |
'Sum of Squares': [ | |
round(ss_regression, 3), | |
round(ss_residual, 3), | |
round(ss_lack_of_fit, 3), | |
round(ss_pure_error, 3), | |
round(ss_total, 3) | |
], | |
'Degrees of Freedom': [df_regression, df_residual, df_lack_of_fit, df_pure_error, df_total], | |
'Mean Square': [ | |
round(ms_regression, 3), | |
round(ms_residual, 3), | |
round(ms_lack_of_fit, 3), | |
round(ms_pure_error, 3), | |
np.nan | |
], | |
'F': [np.nan, np.nan, round(f_lack_of_fit, 3), np.nan, np.nan], | |
'p-value': [np.nan, np.nan, round(p_lack_of_fit, 3), np.nan, np.nan] | |
}) | |
return detailed_anova_table | |
# --- Funciones para la interfaz de Gradio --- | |
def load_data(x1_name, x2_name, x3_name, y_name, x1_levels_str, x2_levels_str, x3_levels_str, data_str): | |
try: | |
x1_levels = [float(x.strip()) for x in x1_levels_str.split(',')] | |
x2_levels = [float(x.strip()) for x in x2_levels_str.split(',')] | |
x3_levels = [float(x.strip()) for x in x3_levels_str.split(',')] | |
data_list = [row.split(',') for row in data_str.strip().split('\n')] | |
column_names = ['Exp.', x1_name, x2_name, x3_name, y_name] | |
data = pd.DataFrame(data_list, columns=column_names) | |
data = data.apply(pd.to_numeric, errors='coerce') | |
if not all(col in data.columns for col in column_names): | |
raise ValueError("El formato de los datos no es correcto.") | |
global rsm | |
rsm = RSM_BoxBehnken(data, x1_name, x2_name, x3_name, y_name, x1_levels, x2_levels, x3_levels) | |
return data, x1_name, x2_name, x3_name, y_name, x1_levels, x2_levels, x3_levels, gr.update(visible=True) | |
except Exception as e: | |
return None, "", "", "", "", [], [], [], gr.update(visible=False), f"Error: {e}" | |
def fit_and_optimize_model(): | |
if 'rsm' not in globals(): | |
return None, None, None, None, None, None, "Error: Carga los datos primero." | |
model_completo, pareto_completo = rsm.fit_model() | |
model_simplificado, pareto_simplificado = rsm.fit_simplified_model() | |
optimization_table = rsm.optimize() | |
equation = rsm.get_simplified_equation() | |
prediction_table = rsm.generate_prediction_table() | |
contribution_table = rsm.calculate_contribution_percentage() | |
anova_table = rsm.calculate_detailed_anova() | |
equation_formatted = equation.replace(" + ", "<br>+ ").replace(" ** ", "^").replace("*", " × ") | |
equation_formatted = f"### Ecuación del Modelo Simplificado:<br>{equation_formatted}" | |
return model_completo.summary().as_html(), pareto_completo, model_simplificado.summary().as_html(), pareto_simplificado, equation_formatted, optimization_table, prediction_table, contribution_table, anova_table | |
def generate_rsm_plot(fixed_variable, fixed_level): | |
if 'rsm' not in globals(): | |
return None, "Error: Carga los datos primero." | |
# Generar todas las gráficas | |
all_figs = rsm.generate_all_plots() | |
# Crear una lista de figuras para la salida | |
plot_outputs = [] | |
for fig in all_figs: | |
# Convertir la figura a una imagen en formato PNG | |
img_bytes = fig.to_image(format="png") | |
plot_outputs.append(img_bytes) | |
# Retornar la lista de imágenes | |
return plot_outputs | |
def download_excel(): | |
if 'rsm' not in globals(): | |
return None, "Error: Carga los datos y ajusta el modelo primero." | |
output = io.BytesIO() | |
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: | |
rsm.data.to_excel(writer, sheet_name='Datos', index=False) | |
rsm.generate_prediction_table().to_excel(writer, sheet_name='Predicciones', index=False) | |
rsm.optimize().to_excel(writer, sheet_name='Optimizacion', index=False) | |
rsm.calculate_contribution_percentage().to_excel(writer, sheet_name='Contribucion', index=False) | |
rsm.calculate_detailed_anova().to_excel(writer, sheet_name='ANOVA', index=False) | |
output.seek(0) | |
return gr.File.update(value=output, visible=True, filename="resultados_rsm.xlsx") | |
def download_images(): | |
if 'rsm' not in globals(): | |
return None, "Error: Carga los datos y ajusta el modelo primero." | |
# Crear un directorio temporal para guardar las imágenes | |
temp_dir = "temp_images" | |
os.makedirs(temp_dir, exist_ok=True) | |
# Generar todas las gráficas y guardarlas como imágenes PNG | |
all_figs = rsm.generate_all_plots() | |
for i, fig in enumerate(all_figs): | |
img_path = os.path.join(temp_dir, f"plot_{i}.png") | |
fig.write_image(img_path) | |
# Comprimir las imágenes en un archivo ZIP | |
zip_buffer = io.BytesIO() | |
with ZipFile(zip_buffer, "w") as zip_file: | |
for filename in os.listdir(temp_dir): | |
file_path = os.path.join(temp_dir, filename) | |
zip_file.write(file_path, arcname=filename) | |
# Eliminar el directorio temporal | |
for filename in os.listdir(temp_dir): | |
file_path = os.path.join(temp_dir, filename) | |
os.remove(file_path) | |
os.rmdir(temp_dir) | |
zip_buffer.seek(0) | |
return gr.File.update(value=zip_buffer, visible=True, filename="graficos_rsm.zip") | |
# --- Crear la interfaz de Gradio --- | |
with gr.Blocks() as demo: | |
gr.Markdown("# Optimización de la producción de AIA usando RSM Box-Behnken") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("## Configuración del Diseño") | |
x1_name_input = gr.Textbox(label="Nombre de la Variable X1 (ej. Glucosa)", value="Glucosa") | |
x2_name_input = gr.Textbox(label="Nombre de la Variable X2 (ej. Extracto de Levadura)", value="Extracto_de_Levadura") | |
x3_name_input = gr.Textbox(label="Nombre de la Variable X3 (ej. Triptófano)", value="Triptofano") | |
y_name_input = gr.Textbox(label="Nombre de la Variable Dependiente (ej. AIA (ppm))", value="AIA_ppm") | |
x1_levels_input = gr.Textbox(label="Niveles de X1 (separados por comas)", value="1, 3.5, 5.5") | |
x2_levels_input = gr.Textbox(label="Niveles de X2 (separados por comas)", value="0.03, 0.2, 0.3") | |
x3_levels_input = gr.Textbox(label="Niveles de X3 (separados por comas)", value="0.4, 0.65, 0.9") | |
data_input = gr.Textbox(label="Datos del Experimento (formato CSV)", lines=5, value="""1,-1,-1,0,166.594 | |
2,1,-1,0,177.557 | |
3,-1,1,0,127.261 | |
4,1,1,0,147.573 | |
5,-1,0,-1,188.883 | |
6,1,0,-1,224.527 | |
7,-1,0,1,190.238 | |
8,1,0,1,226.483 | |
9,0,-1,-1,195.550 | |
10,0,1,-1,149.493 | |
11,0,-1,1,187.683 | |
12,0,1,1,148.621 | |
13,0,0,0,278.951 | |
14,0,0,0,297.238 | |
15,0,0,0,280.896""") | |
load_button = gr.Button("Cargar Datos") | |
with gr.Column(): | |
gr.Markdown("## Datos Cargados") | |
data_output = gr.Dataframe(label="Tabla de Datos") | |
# Hacer que la sección de análisis sea visible solo después de cargar los datos | |
with gr.Row(visible=False) as analysis_row: | |
with gr.Column(): | |
fit_button = gr.Button("Ajustar Modelo y Optimizar") | |
download_excel_button = gr.Button("Descargar Tablas en Excel") | |
download_images_button = gr.Button("Descargar Gráficos en ZIP") | |
gr.Markdown("**Modelo Completo**") | |
model_completo_output = gr.HTML() | |
pareto_completo_output = gr.Plot() | |
gr.Markdown("**Modelo Simplificado**") | |
model_simplificado_output = gr.HTML() | |
pareto_simplificado_output = gr.Plot() | |
equation_output = gr.HTML() | |
optimization_table_output = gr.Dataframe(label="Tabla de Optimización") | |
prediction_table_output = gr.Dataframe(label="Tabla de Predicciones") | |
contribution_table_output = gr.Dataframe(label="Tabla de % de Contribución") | |
anova_table_output = gr.Dataframe(label="Tabla ANOVA Detallada") | |
with gr.Column(): | |
gr.Markdown("## Generar Gráficos de Superficie de Respuesta") | |
fixed_variable_input = gr.Dropdown(label="Variable Fija", choices=["Glucosa", "Extracto_de_Levadura", "Triptofano"], value="Glucosa") | |
fixed_level_input = gr.Slider(label="Nivel de Variable Fija", minimum=0, maximum=1, step=0.01, value=0.5) | |
plot_button = gr.Button("Generar Gráfico") | |
rsm_plot_output = gr.Gallery(label="Gráficos RSM", columns=3, preview=True, height="auto") | |
load_button.click( | |
load_data, | |
inputs=[x1_name_input, x2_name_input, x3_name_input, y_name_input, x1_levels_input, x2_levels_input, x3_levels_input, data_input], | |
outputs=[data_output, x1_name_input, x2_name_input, x3_name_input, y_name_input, x1_levels_input, x2_levels_input, x3_levels_input, analysis_row] | |
) | |
fit_button.click(fit_and_optimize_model, outputs=[model_completo_output, pareto_completo_output, model_simplificado_output, pareto_simplificado_output, equation_output, optimization_table_output, prediction_table_output, contribution_table_output, anova_table_output]) | |
plot_button.click(generate_rsm_plot, inputs=[fixed_variable_input, fixed_level_input], outputs=[rsm_plot_output]) | |
download_excel_button.click(download_excel, outputs=[gr.File()]) | |
download_images_button.click(download_images, outputs=[gr.File()]) | |
# Ejemplo de uso | |
gr.Markdown("## Ejemplo de uso") | |
gr.Markdown("1. Introduce los nombres de las variables y sus niveles en las cajas de texto correspondientes.") | |
gr.Markdown("2. Copia y pega los datos del experimento en la caja de texto 'Datos del Experimento'.") | |
gr.Markdown("3. Haz clic en 'Cargar Datos' para cargar los datos en la tabla.") | |
gr.Markdown("4. Haz clic en 'Ajustar Modelo y Optimizar' para ajustar el modelo y encontrar los niveles óptimos de los factores.") | |
gr.Markdown("5. Selecciona una variable fija y su nivel en los controles deslizantes.") | |
gr.Markdown("6. Haz clic en 'Generar Gráfico' para generar un gráfico de superficie de respuesta.") | |
gr.Markdown("7. Haz clic en 'Descargar Tablas en Excel' para obtener un archivo Excel con todas las tablas generadas.") | |
gr.Markdown("8. Haz clic en 'Descargar Gráficos en ZIP' para obtener un archivo ZIP con todos los gráficos generados.") | |
demo.launch() |