diff --git a/code/analisis_resultados.py b/code/analisis_resultados.py
new file mode 100644
index 0000000000000000000000000000000000000000..d73e1833375dbc5b6b3403307eac23466fad8415
--- /dev/null
+++ b/code/analisis_resultados.py
@@ -0,0 +1,100 @@
+import pickle
+import numpy as np
+import load_saved_models as lsm
+import time
+import tqdm
+import pandas as pd
+import os
+
+
+DATA_PATH = os.path.abspath(os.path.join(__file__, '../../results/regression'))
+
+
+def get_bests(var_names = ['Mn(kg·mol)-1', 'MwMn', 'conversion']):
+    bests = {}
+    for var_name in var_names:
+        bests[var_name] = {}
+        all = pd.read_excel(os.path.join(DATA_PATH, f'{var_name}_mean.xlsx'), index_col=0)
+        for column in all.columns[0::2]:
+            results = all[column]
+            indexes = np.array(list(all.index))
+            order = np.argsort(results.values)[::-1]
+            bests[var_name][column.replace('(mean)', '')] = list(indexes[order[:3]])
+            
+    return bests
+
+def save_bests_excel(var_name, data):
+    # data in format LR: {'RAW_PCA'}
+    print(data)
+    excel_data = {'Preprocessing': [f"{l}{k}".replace(var_name, '') for k in data for l in data[k]],
+                  'MAE': [data[k][l][1] for k in data for l in data[k]],
+                  'Model': ['LR', 'LR', 'LR', 'SVR', 'SVR', 'SVR', 'RF', 'RF', 'RF']}    
+    
+    df = pd.DataFrame(excel_data)
+    df.set_index('Preprocessing', inplace=True)
+    df.to_excel(f'{var_name}_bests.xlsx')
+
+
+if __name__ == "__main__":
+
+    var_names = ['Mn (kg·mol)-1', 'MwMn', 'conversion']
+    dict_model = {}
+    bests_dict = {}
+    seeds = [645, 4578, 72, 2365, 90345, 24, 1859, 1334, 2078, 2446, 7409, 6995, 2041, 449, 9475]
+    
+    all_bests = get_bests()
+
+    # Iterate over all the best models and create datasets
+    for var in var_names:
+        bests = all_bests[var.replace(' ', '')]
+        bests_dict[var] = {}
+        for best_key in bests:
+            bests_dict[var][best_key] = {}
+            for name in bests[best_key]:
+                print(var, best_key, name)    
+                yhat_val = []
+                maes_val = []
+                mapes_val = []
+                model_varname = f'{name}_{var}'
+                model_type = 'linear' if best_key == 'LR' else ('forest' if best_key == 'RF' else 'svr')
+
+                for seed in tqdm.tqdm(seeds): 
+                    data, ensemble = lsm.load_ensemble(f'{name}_{var}', f'./results/models/{seed}/{name}_{var}_{model_type}.pkl', seed)
+                    y_hat = ensemble.predict_ensemble(data[0])
+                    mae, mape = ensemble.compute_mae_mape_ensemble(data[0], data[1])
+                    yhat_val.append(y_hat)
+                    maes_val.append(mae)
+                    mapes_val.append(mape)
+
+                bests_dict[var][best_key][model_varname] = [np.mean(yhat_val), np.mean(maes_val), np.mean(mapes_val)]
+                
+        save_bests_excel(var, bests_dict[var])
+
+    pickle.dump(bests_dict, open('all_bests.pkl', 'wb'))
+    
+    # Iterate over all the best RF models and save the data
+    best_rf = [all_bests[v.replace(' ','')]['RF'] for v in var_names]
+    
+    for var, best_model in zip(var_names, best_rf):
+        for name in best_model:    
+            yhat_val = []
+            maes_val = []
+            mapes_val = []
+            model_varname = f'{name}_{var}'
+            time_t = []
+            
+            for seed in tqdm.tqdm(seeds): 
+                data, ensemble = lsm.load_ensemble(f'{name}_{var}', f'./results/models/{seed}/{name}_{var}_forest.pkl',seed)
+                start_time = time.time()
+                y_hat = ensemble.predict_ensemble(data[0])
+                end_time = time.time()
+                execution_time = end_time - start_time
+                time_t.append(execution_time)
+                mae, mape = ensemble.compute_mae_mape(data[0], data[1])
+                yhat_val.append(y_hat)
+                maes_val.append(mae)
+                mapes_val.append(mape)
+
+            dict_model[model_varname] = [np.mean(yhat_val, axis = 0),np.mean(maes_val, axis = 0),np.mean(mapes_val, axis = 0),np.mean(time_t)]
+        
+        pickle.dump([dict_model],open('resolution_%s.pkl' %(var), 'wb'))
\ No newline at end of file
diff --git a/code/feature_selection.py b/code/feature_selection.py
index 376b0ceef55eba80a82f38aefcfb1d73e9e055b8..6041277dbb100432dc6dde8ed10da4dc8c498746 100644
--- a/code/feature_selection.py
+++ b/code/feature_selection.py
@@ -1,13 +1,18 @@
 import numpy as np
 import pandas as pd
+import pickle as pkl
 import seaborn as sns
 import matplotlib.pyplot as plt
 
 # Machine learning
 import xgboost as xgb
 from sklearn.tree import DecisionTreeRegressor
+from sklearn.preprocessing import StandardScaler
 from sklearn.ensemble import RandomForestRegressor
-from sklearn.feature_selection import SelectKBest, f_regression, RFE
+from sklearn.feature_selection import SelectKBest, f_regression, mutual_info_regression, RFE
+
+# User imports  
+from prebio_df import read_imputed
 
 
 DELETE_PERC = 90
@@ -15,14 +20,14 @@ DELETE_PERC = 90
 # ELBOW METHOD
 def find_stable_index(metric_values: np.array, tolerance: float)-> float:
     """
-    Finds the index where a metric stabilizes in an array of values.
+    Encuentra el índice donde una métrica se estabiliza en un array de valores.
 
-    Parameters:
-    - metric_values: A list or array of metric values.
-    - tolerance: The maximum allowed difference between successive values.
+    Parámetros:
+    - metric_values: Una lista o array de valores de la métrica.
+    - tolerance: La diferencia máxima permitida entre valores sucesivos.
 
-    Returns:
-    - The index where the metric stabilizes or None if not found.
+    Retorna:
+    - El índice donde la métrica se estabiliza o None si no se encuentra.
     """
     for i in range(1, len(metric_values)):
         if abs(metric_values[i] - metric_values[i - 1]) <= tolerance:
@@ -145,4 +150,43 @@ def random_forest_selector(x: pd.DataFrame, y: pd.DataFrame, tolerance=0.01, **k
         plt.show()
     
     return results
- 
\ No newline at end of file
+ 
+def print_distribution(dataframe, name, delete_indexes):
+    outlier_df = dataframe.copy(deep=True)
+    outlier_df['outlier'] = [True if i in delete_indexes else False for i in range(outlier_df.shape[0])]
+    sns.displot(data=outlier_df, x=name, hue='outlier', palette= "blend:#7AB,#E06B53", **dict(edgecolor="white", linewidth=0))
+    sns.despine()
+    plt.tight_layout()
+    plt.savefig(f"./results/feature_extraction/{name.replace('/', '_')}_outliers.png", transparent=False)
+
+
+def main():
+    dependent_vars = ['Mn (kg·mol)-1', 'Mw/Mn', 'conversion']
+    reduced_path = './data/reduced_df.pickle'
+    
+    df = read_imputed().select_dtypes('number')
+    reduced_dfs = {}
+    
+    for name in dependent_vars:
+        print(f"--- Processing {name.capitalize()} ---")
+        df_name = df.copy(deep=True)
+        x, y = df[name]
+        
+        results = q1_feature_selection(x, y, DELETE_PERC, stats=name)
+        # Descomentar para usar RFE
+        #results = rfe_selection(x, y, DELETE_PERC, stats=name)
+        #results = xgboost_selection(x, y, DELETE_PERC)
+        result = random_forest_selector(x, y)
+        
+        #print(list(results.index))
+        #
+        #x = x[list(results.index)]
+        #
+        ## Save
+        #x.reset_index(inplace=True, drop=True)
+        #y.reset_index(inplace=True, drop=True)
+
+
+if __name__ == "__main__":
+   main()
+   
\ No newline at end of file
diff --git a/code/load_saved_models.py b/code/load_saved_models.py
index 77dac6aaf4fa5658acd516c8d34b6cde801fbeda..f80c6e02c700ffc7f20d1a4a77d2999858073056 100644
--- a/code/load_saved_models.py
+++ b/code/load_saved_models.py
@@ -81,6 +81,5 @@ def load_ensemble(data_name: str, model_data_path: str, seed: int) -> Tuple[pd.D
     """
     data_all = pickle.load(open(f'data/all_processed_{seed}.pkl','rb'))
     ensemble = Ensemble(pickle.load(open(model_data_path, 'rb')))
-    #print(data_name)
-    #print(model_data_path)
-    return data_all[data_name], ensemble
+    
+    return data_all[data_name], ensemble
\ No newline at end of file
diff --git a/code/missing_imputation.py b/code/missing_imputation.py
new file mode 100644
index 0000000000000000000000000000000000000000..7429ec685ffc17fa47ad7be1c8bf2c06a683c2f9
--- /dev/null
+++ b/code/missing_imputation.py
@@ -0,0 +1,88 @@
+import click
+import numpy as np
+import pandas as pd
+from sklearn.impute import KNNImputer
+
+from prebio_df import read_prebio
+from nan_analysis import missing_values_table
+
+
+def impute_missing(target: pd.Series, set: pd.DataFrame) -> pd.DataFrame:
+    """Impute missing values in the dataset using KNN imputation.
+    
+    Args:
+        target (pd.Series): Target row with missing values to be imputed.
+        set (pd.DataFrame): Dataset containing the target row and other rows for imputation.
+    
+    Returns:
+        pd.DataFrame: Dataset with missing values imputed using KNN imputation.
+    """
+    set.loc[len(set)] = target
+    imputer = KNNImputer()
+    res = imputer.fit_transform(set)
+    
+    return pd.DataFrame(res, columns = set.columns)
+    
+
+def check_complete_samples(df: pd.DataFrame) -> tuple:
+    """Find all samples with 0% of missing values
+    
+    Args:
+        df (pd.DataFrame): Input dataframe
+    
+    Returns:
+        tuple: A tuple containing the indexes of samples with 0% missing values (empty if none),
+            an array of NaN counts for each row, and the unique counts of NaNs.
+    """
+    nans = np.array([r.isnull().sum() for _, r in df.iterrows()])
+    indexes = np.where(nans == 0)[0]
+    print(f'> The amount of instances with 0% of missing values is {len(indexes)}!')
+    unique, counts = np.unique(nans, return_counts=True)
+    print(dict(zip(unique, counts)))
+    
+    return indexes, nans, unique
+
+
+@click.command
+@click.option('--ds', default='../data/polycarbonates_entregable.xlsx', help='Path to prebio2 dataset (.xlsx)')
+def main(ds):
+    """Main function to preprocess the dataset by imputing missing values and storing the result.
+    
+    Args:
+        ds (str): Path to the prebio2 dataset (.xlsx).
+    """
+    df = read_prebio(ds)
+    missing_table = missing_values_table(df)
+    # print(missing_table)
+    
+    # We drop columns with > 70% of data missing.
+    more_than_70_labels = missing_table[missing_table['% of Total Values'] > 70].index
+    df70 = df.drop(columns = more_than_70_labels)
+    missing_values_table(df70, title="------ Columns dropped ------")
+    
+    # Get all the samples with a 0% of missing values
+    indexes, nans, unique = check_complete_samples(df70)
+    full_df = df70.iloc[indexes]
+    
+    # For ease imputing new data we delete categorical columns (None of them contains NaNs)
+    full_df = full_df.drop(columns=['Epoxide', 'SMILES', 'Catalyst'])
+
+    # Assert the new dataset is empty
+    assert missing_values_table(full_df).empty, "This shouldn't be happening. Your full_df has empty values!"
+
+    # Impute missing data sequentially
+    for min_nan in np.sort(unique)[1:]:  # We omit taking samples with 0% of missing values as they will be repeated
+        for index in np.where(nans == min_nan)[0]:
+            full_df = impute_missing(df70.iloc[index], full_df)
+
+    df70.update(full_df)
+    missing_values_table(df70, title='------ Imputed DataFrame ------')
+
+    path = '../data/imputed_polycarbonates.xlsx'
+    print(f'The new dataset will be stored in {path}')
+    df70.to_excel(path)
+
+
+if __name__ == "__main__":
+    main()
+    
\ No newline at end of file
diff --git a/code/nan_analysis.py b/code/nan_analysis.py
index bc4d58020d25f5780b9b19374c506efc213b090c..05ba89d7d971fd51918d73ff24130e3669d826ed 100644
--- a/code/nan_analysis.py
+++ b/code/nan_analysis.py
@@ -1,6 +1,8 @@
+import click
 import numpy as np
 import pandas as pd
 
+from prebio_df import read_prebio
 
 def missing_values_table(df: pd.DataFrame, title:str=None) -> pd.DataFrame:
     """Generate a table of missing values in the dataframe.
@@ -61,3 +63,36 @@ def get_nans_per_row(df) -> pd.DataFrame:
         new_data.loc[len(new_data)] = new_row
         
     return new_data
+
+
+def generate_images(df):
+    import seaborn as sns
+    import seaborn_image as snsi
+    import matplotlib.pyplot as plt
+    
+    res = df.isnull()
+    snsi.imgplot(res, gray=True)
+    plt.tight_layout()
+    plt.savefig('NaN.png')
+    plt.close()
+    
+    res = missing_values_table(df)
+    res_len = res.shape[0]
+    res.reset_index(inplace=True)
+    res = res.rename(columns = {'index': f'Feature ID ({res_len} columns)'})
+    res = res.drop(columns=['% of Total Values'])
+    
+    sns.barplot(res, x= f'Feature ID ({res_len} columns)', y='Missing Values')
+    sns.despine()
+    plt.tick_params(
+        axis='x',          # changes apply to the x-axis
+        which='both',      # both major and minor ticks are affected
+        bottom=False,      # ticks along the bottom edge are off
+        top=False,         # ticks along the top edge are off
+        labelbottom=False
+    )
+    
+    plt.tight_layout()
+    plt.savefig('NaNPerColumn.png',  transparent=True)
+    plt.close()
+    
\ No newline at end of file
diff --git a/code/prebio_df.py b/code/prebio_df.py
index 3035477424d39e310e5ce90326f50da344f736a1..80ddbe0e4b89ff4e1d5731b9b73a48a72cdb3c6d 100644
--- a/code/prebio_df.py
+++ b/code/prebio_df.py
@@ -65,4 +65,4 @@ def load_split_df(path: str = './data/imputed_polycarbonates.xlsx') -> tuple:
 
 def load_reduced_df() -> Dict[str, Tuple[pd.DataFrame, pd.DataFrame]]:
     with open('./data/reduced_df.pickle', 'rb') as f:
-        return pkl.load(f)
\ No newline at end of file
+        return pkl.load(f)
diff --git a/code/preprocessing.py b/code/preprocessing.py
index e5d0020dd3fcfec994f59efb0bdbbdd57bf2fc9a..c04788d77bab9a3003de17c44d831f4c143f3177 100644
--- a/code/preprocessing.py
+++ b/code/preprocessing.py
@@ -1,6 +1,7 @@
 import click
 import numpy as np
 import pandas as pd
+from scipy import stats
 from sklearn.impute import KNNImputer
 
 from prebio_df import read_prebio
diff --git a/code/results_manager.py b/code/results_manager.py
index 23873a5335060117faaff91c4eb2b46b4f439a4b..6582703eb99998578756b0bfe8feee95c366caf7 100644
--- a/code/results_manager.py
+++ b/code/results_manager.py
@@ -7,7 +7,7 @@ from typing import Any
 
 class DataProvider:
     def __init__(self) -> None:
-        self.data = {} # Format: Name: Dict({var_name: [x, y]})
+        self.data = {} # Formato: Nombre: Dict({nombre_variable: [x, y]})
     
     @property
     def ids(self):