diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..681fee37b151c1f64161ee0c53781ddf68cb603f --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +*.pyc +*.pickle +*.h5 +*.hf5 +*.txt +*.TXT +summary/ +__pycache__/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..bd04de7abc60896f6d45e41c1a41784a5bde3392 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Aritz + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 03f9c67a6a58af0e1e100e84b47b51da1a492b2e..4eff32c798514182dbf9cefe2f67ce0092cc3527 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,96 @@ -# A-MFEA-RL +# Citing A-MFEA-RL +> Aritz D. Martinez, Javier Del Ser, Eneko Osaba and Francisco Herrera, Adaptive Multi-factorial Evolutionary Optimization for Multi-task Reinforcement Learning, 2020. +# A-MFEA-RL: Adaptive Multi-factorial Evolutionary Optimization for Multi-task Reinforcement Learning +>(ABSTRACT) Evolutionary Computation has largely exhibited its potential to replace conventional learning algorithms in a manifold of Machine Learning tasks, especially those related to unsupervised (clustering) and supervised learning. It has not been until lately when the computational efficiency of evolutionary solvers has been put in prospective for training Reinforcement Learning (RL) models. However, most studies framed in this context so far have considered environments and tasks conceived in isolation, without any exchange of knowledge among related tasks. In this manuscript we present A-MFEA-RL, an adaptive version of the well-known MFEA algorithm whose search and inheritance operators are tailored for multitask RL environments. Specifically, our A-MFEA-RL approach includes crossover and inheritance mechanisms for refining the exchange of genetic material that rely on the multi-layered structure of modern Deep Learning based RL models. In order to assess the performance of the proposed evolutionary multitasking approach, we design an extensive experimental setup comprising different multitask RL environments of varying levels of complexity, comparing them to those furnished by alternative non-evolutionary multitask RL approaches. As concluded from the discussion of the obtained results, A-MFEA-RL not only achieves competitive success rates over the tasks being simultaneously solved, but also fosters the exchange of knowledge among tasks that could be intuitively expected to keep a degree of synergistic relationship. + +In the framework, a reformulation of the well-known MFEA/MFEA-II algorithms is introduced. The algorithm is thought so that Multifactorial Optimization can be applied to train neural networks taking advantage of inter-task similarities bi mimicking the traditional **Model-based Transfer Learning** procedure. The adaptation is carried out by means of three crucial points: + +1. **Design of the unified space towards favoring model-based Transfer Learning**: specifically, aspects such as the neural network architecture, the number of neurons of each layer, and the presence of shared layers among models evolved for each task are taken into account. +2. **Adapted crossover operator**: the crossover operator must support the previous aspects by preventing neural models from exchanging irrelevant information. +3. **Layer-based Transfer Learning**: unlike in traditional means to implement Transfer Learning, the number of layers to be transferred between models evolved for different tasks is autonomously decided by A-MFEA-RL during the search. + +The code works on top of . The experimentation carried out considers three scenarios; *TOY*, *MT-10/MT-10-R* and *MT-50/MT-50-R* (Results included in [Results](#results) Section ), *R* denotes random initialized episodes as in the next image: + +<h3>MT-10-R results +<img src="/uploads/16013a3d9ec2050cca0e0616189bea87/out.gif" width="80%" /> </h3> + +# Running the experimentation + +It is recommended to use the conda environment provided with the code (*mujoco36.yml*) for ease: +```bash +conda env create -f mujoco36.yml +conda activate mujoco36 +``` + +A-MFEA-RL depends on Metaworld and  (license required). To install Metaworld please follow the instructions in the  or run: + +```bash +pip install git+https://github.com/rlworkgroup/metaworld.git@master#egg=metaworld +``` + +The experimentation can be replicated by running the `RUN_ALL.sh`. In order to run experiments independently: + +``` +python3 exp.py -exp INT -t INT -p STR +``` + +* `-exp`: Integer. 0 = TOY, 1 = MT-10/MT-10-R, 2 = MT-50/MT-50-R. +* `-t`: Integer. Number of threads used by Ray. +* `-p`: STRING. Name of the folder under `summary` where results are saved. + +# Results +| | | MT-10 | | | MT-10-R | | | MT-50 | | | MT-50-R | | +|-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| +| **Environment name (complexity)** | **A** | **B** | **C** | **A** | **B** | **C** | **A** | **B** | **C** | **A** | **B** | **C** | +| assembly (H) | - | - | - | - | - | - | 0 | 0 | 0 | 0 | 0 | 0 | +| basketball (H) | - | - | - | - | - | - | 0 | 0 | 0 | 22 | 33 | 0 | +| bin-picking (H) | - | - | - | - | - | - | 0 | 0 | 0 | 0 | 0 | 11 | +| box-close (H) | - | - | - | - | - | - | 44 | 44 | 0 | 22 | 33 | 0 | +| button-press-topdown (M) | 100 | 100 | 100 | 100 | 89 | 91 | 100 | 100 | 100 | 100 | 100 | 97 | +| button-press-topdown-wall (H) | - | - | - | - | - | - | 67 | 78 | 100 | 67 | 100 | 100 | +| button-press (M) | - | - | - | - | - | - | 44 | 67 | 100 | 44 | 55 | 100 | +| button-press-wall (H) | - | - | - | - | - | - | 100 | 100 | 100 | 100 | 100 | 98 | +| coffee-button (H) | - | - | - | - | - | - | 44 | 78 | 100 | 56 | 89 | 100 | +| coffee-pull (M) | - | - | - | - | - | - | 78 | 100 | 0 | 100 | 100 | 70 | +| coffee-push (M) | - | - | - | - | - | - | 78 | 89 | 100 | 89 | 89 | 40 | +| dial-turn (H) | - | - | - | - | - | - | 100 | 100 | 100 | 100 | 100 | 99 | +| disassemble (H) | - | - | - | - | - | - | 0 | 0 | 0 | 0 | 0 | 0 | +| door-close (H) | - | - | - | - | - | - | 78 | 56 | 100 | 78 | 55 | 100 | +| door-lock (H) | - | - | - | - | - | - | 89 | 100 | 100 | 89 | 89 | 100 | +| door-open (H) | 100 | 33 | 100 | 100 | 100 | 100 | 78 | 67 | 100 | 67 | 67 | 100 | +| door-unlock (M) | - | - | - | - | - | - | 78 | 89 | 100 | 89 | 100 | 100 | +| drawer-close (H) | 100 | 100 | 100 | 100 | 100 | 100 | 79 | 89 | 100 | 67 | 78 | 100 | +| drawer-open (H) | 0 | 33 | 100 | 33 | 0 | 99 | 22 | 33 | 100 | 22 | 44 | 98 | +| faucet-close (M) | - | - | - | - | - | - | 100 | 67 | 100 | 78 | 44 | 81 | +| faucet-open (M) | - | - | - | - | - | - | 89 | 89 | 100 | 89 | 67 | 91 | +| hammer (H) | - | - | - | - | - | - | 33 | 56 | 100 | 11 | 67 | 100 | +| hand-insert (M) | - | - | - | - | - | - | 100 | 100 | 100 | 100 | 100 | 100 | +| handle-press-side (H) | - | - | - | - | - | - | 0 | 11 | 100 | 100 | 33 | 40 | +| handle-press (H) | - | - | - | - | - | - | 89 | 78 | 60 | 100 | 78 | 35 | +| handle-pull-side (H) | - | - | - | - | - | - | 56 | 67 | 0 | 56 | 89 | 0 | +| handle-pull (H) | - | - | - | - | - | - | 89 | 100 | 0 | 78 | 100 | 0 | +| lever-pull (M) | - | - | - | - | - | - | 0 | 0 | 0 | 0 | 0 | 0 | +| peg-insert-side (H) | 67 | 33 | 0 | 56 | 56 | 0 | 0 | 22 | 0 | 44 | 33 | 0 | +| peg-unplug-side (H) | - | - | - | - | - | - | 100 | 100 | 0 | 100 | 100 | 0 | +| pick-out-of-hole (H) | - | - | - | - | - | - | 0 | 0 | 0 | 0 | 0 | 0 | +| pick-place (H) | 66 | 100 | 0 | 0 | 0 | 0 | 44 | 11 | 0 | 33 | 11 | 0 | +| pick-place-wall (H) | - | - | - | - | - | - | 44 | 33 | 0 | 33 | 0 | 10 | +| plate-slide-back-side (M) | - | - | - | - | - | - | 100 | 89 | 40 | 78 | 89 | 45 | +| plate-slide-back (M) | - | - | - | - | - | - | 67 | 89 | 100 | 89 | 100 | 58 | +| plate-slide-side (M) | - | - | - | - | - | - | 100 | 89 | 100 | 55 | 100 | 100 | +| plate-slide (M) | - | - | - | - | - | - | 33 | 100 | 100 | 78 | 78 | 77 | +| push-back (E) | - | - | - | - | - | - | 89 | 100 | 0 | 89 | 100 | 71 | +| push (E) | 100 | 100 | 100 | 78 | 67 | 59 | 44 | 89 | 100 | 78 | 33 | 47 | +| push-wall (M) | - | - | - | - | - | - | 56 | 33 | 100 | 55 | 44 | 47 | +| reach (E) | 100 | 100 | 100 | 100 | 100 | 91 | 100 | 100 | 100 | 100 | 100 | 98 | +| reach-wall (E) | - | - | - | - | - | - | 100 | 100 | 100 | 100 | 100 | 98 | +| shelf-place (H) | - | - | - | - | - | - | 0 | 0 | 0 | 44 | 55 | 0 | +| soccer (E) | - | - | - | - | - | - | 67 | 78 | 0 | 55 | 33 | 48 | +| stick-pull (H) | - | - | - | - | - | - | 11 | 33 | 0 | 11 | 44 | 79 | +| stick-push (H) | - | - | - | - | - | - | 0 | 0 | 0 | 11 | 0 | 100 | +| sweep-into (E) | - | - | - | - | - | - | 100 | 78 | 100 | 67 | 89 | 80 | +| sweep (E) | - | - | - | - | - | - | 100 | 89 | 100 | 100 | 67 | 74 | +| window-close (H) | 33 | 33 | 100 | 100 | 78 | 100 | 67 | 44 | 100 | 89 | 44 | 100 | +| window-open (H) | 67 | 100 | 100 | 78 | 89 | 99 | 11 | 67 | 100 | 44 | 78 | 93 | +| **Average success rate** | 73.3 | 73.2 | **80.0** | **74.5** | 67.9 | 73.9 | 57.3 | **62.0** | 60.0 | 61.5 | **62.1** | 59.7 | diff --git a/RUN_ALL.sh b/RUN_ALL.sh new file mode 100755 index 0000000000000000000000000000000000000000..601a9768a0e11b10a9a44a3b56db1b00dd832abf --- /dev/null +++ b/RUN_ALL.sh @@ -0,0 +1,3 @@ +python3 exp.py -exp 0 +python3 exp.py -exp 1 +python3 exp.py -exp 2 diff --git a/analyzer/Analyzer.py b/analyzer/Analyzer.py new file mode 100644 index 0000000000000000000000000000000000000000..032a9afdbb7cf43752fa210191c215bf09743c90 --- /dev/null +++ b/analyzer/Analyzer.py @@ -0,0 +1,75 @@ +# -*- coding:utf-8 -*- +import numpy as np + + +class Layer(): + def __init__(self, ltype, lkeras_config, w_size, + w_shape, b_size, b_shape, trainable): + self.type = ltype + self.keras_config = lkeras_config + self.w_size = w_size + self.b_size = b_size + self.w_shape = w_shape + self.b_shape = b_shape + self.trainable = trainable + if self.trainable: + self.he_limit = np.sqrt(6/w_shape[0]) + + def __str__(self): + retstr = f"\t | Type: {self.type}\n" + retstr += f"\t | Trainable: {self.trainable}\n" + retstr += f"\t | Size: Weights={self.w_size}, Bias={self.b_size}\n" + retstr += f"\t | Shapes: Weights={self.w_shape}, Bias={self.b_shape}\n" + try: + w_i = self.w_index + b_i = self.b_index + retstr += f"\t | Indexes: Weights={w_i}, Bias={b_i}\n" + except Exception: + pass + retstr += "\t |__\n" + return retstr + + def AddIndexes(self, weights, bias): pass + + +class ModelInfo(): + def __init__(self, model): + self.model = model + self.D = model.count_params() + self.num_layers = 0 + self.mask = [] + self.shapes = [] + + def getInfos(self): + infos = [] + for i in range(self.num_layers): + layer_info = getattr(self, f"layer{i}") + infos.append(layer_info) + return infos + + def __str__(self): + retstr = "---------- MODEL INFO ----------\n\n" + retstr += f"\t* The model has {self.num_layers} layers:\n" + for i in range(self.num_layers): + layer_info = getattr(self, f"layer{i}") + retstr += f"\n\t* Layer{i}:\n" + retstr += str(layer_info) + return retstr + + def AddLayerInfo(self, layer, trainable): + pass + + +class Analyzer(): + def __init__(self): + self.models_info = [] + + def __str__(self): + retstr = "" + for m in self.models_info: + retstr += str(m) + retstr += "\n" + return retstr + + def analyze(self, model): pass + def unified_space(self, layers_to_share=None): pass diff --git a/analyzer/__init__.py b/analyzer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/analyzer/kerpy.py b/analyzer/kerpy.py new file mode 100644 index 0000000000000000000000000000000000000000..61776638ad3cc4c3a4f678d2641dfd186e976741 --- /dev/null +++ b/analyzer/kerpy.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +import numpy as np + + +# ------------------------------------------------------------ +# Operations with models +# ------------------------------------------------------------ +def get_flattened_weights(model): + weights = model.get_weights() + np_array = np.array([]) + for i in range(np.shape(weights)[0]): + np_array = np.append(np_array, weights[i]) + return np_array + + +def unravel(weights, layer_shapes): + reshaped_weights = [] + for layer_shape in layer_shapes: + if type(layer_shape) == list: + layer_weights = [] + for shape in layer_shape: + to_r, weights = np.split(weights, [np.nanprod(shape)]) + layer_weights.append(np.reshape(to_r, shape)) + reshaped_weights.append(layer_weights) + else: + split = np.nanprod(layer_shape) + to_r, weights = np.split(weights, [split]) + reshaped_weights.append(np.reshape(to_r, layer_shape)) + return reshaped_weights + + +def set_weights(model, weights, mask, shapes): + weights = unravel(weights, shapes) + for i, l in enumerate(model.layers): + if mask[i] == 1: + l.set_weights(weights.pop(0)) diff --git a/compressed_MT10.pbz2 b/compressed_MT10.pbz2 new file mode 100644 index 0000000000000000000000000000000000000000..3f69abe72203322ac27791f0eb6cbd387c92d0cf Binary files /dev/null and b/compressed_MT10.pbz2 differ diff --git a/compressed_TOY.pbz2 b/compressed_TOY.pbz2 new file mode 100644 index 0000000000000000000000000000000000000000..4fe0c9c3d142c4024cf432a58a2914bf57cacbcd Binary files /dev/null and b/compressed_TOY.pbz2 differ diff --git a/exp.py b/exp.py new file mode 100644 index 0000000000000000000000000000000000000000..7e5a13577acaf72fe3e403250b76291ac534fa3d --- /dev/null +++ b/exp.py @@ -0,0 +1,74 @@ +#! /usr/bin/env python3 +# -*- coding: utf-8 -*- +import os +import ray +import shutil +import signal +import argparse +from mfea import mfea +from utils.callback import callback_function as cb +from utils.utils import load_compressed_pickle as lp +from pickle import dump as pdump + + +def end_exp(): + shutil.rmtree('/tmp/amfearl/', ignore_errors=True) + ray.shutdown() + + +def handler(signum, frame): + end_exp() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-p', dest='path', type=str, default="summary") + parser.add_argument('-t', dest='threads', type=int, default=None) + parser.add_argument('-exp', dest='experiment', type=int, default=0) + args = parser.parse_args() + filename = args.path + + signal.signal(signal.SIGINT, handler) + + # Prepare temporary folder + try: + os.mkdir('/tmp/amfearl/') + except FileExistsError: + shutil.rmtree('/tmp/amfearl/') + os.mkdir('/tmp/amfearl/') + + envs = [] + + if args.experiment == 0: + # TOY EXPERIMENT + print('Loading compressed_TOY.pbz2, this may take a few seconds...') + data = lp('./compressed_TOY.pbz2') + tasks = data['tasks'] + dim = data['unified_dimensions'] + filename = "TOY" + + if args.experiment == 1: + # MT-10 + print('Loading compressed_MT10.pbz2, this may take a few seconds...') + data = lp('./compressed_MT10.pbz2') + tasks = data['tasks'] + dim = data['unified_dimensions'] + filename = "MT10" + + if args.experiment == 2: + # MT-50 + print('Loading compressed_MT50.pbz2, this may take a few seconds...') + data = lp('./compressed_MT50.pbz2') + tasks = data['tasks'] + dim = data['unified_dimensions'] + filename = "MT50" + + for i in range(len(tasks)): + pdump(tasks[i], open('/tmp/amfearl/'+str(i)+'.pickle', 'wb')) + + mfea.mfea(tasks, rmp=0.8, gen=1000, pop=10, episodes=10, + D_multitask=dim, callback=cb, + f_name=filename, num_threads=args.threads) + + # Cleaning + end_exp() diff --git a/mfea/__init__.py b/mfea/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mfea/individual.py b/mfea/individual.py new file mode 100644 index 0000000000000000000000000000000000000000..22c1e878538140fca35645eb13793a9372ccfd17 --- /dev/null +++ b/mfea/individual.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- +import numpy as np +import ray +import pickle + +@ray.remote +class PIndividual(object): + def __init__(self, D_multitask, n_tasks): + self.dim = D_multitask + self.tasks = None + self.no_of_tasks = n_tasks + self.rnvec = np.random.uniform(size=D_multitask) + self.candidate = None + self.skf = None # skill factor + + def evaluate(self, p_il, method): + assert self.skf is not None, "Error: Skill factor is None" + if self.skf is None: + raise ValueError("skill factor not set") + else: + task = self.tasks # check set_individual + objective, candidate, sr = task.mujoco_env.fitness(self.rnvec) + funcCount = 1 + self.objective = objective + self.candidate = candidate + self.success_rate = sr + ret_dict = {'skf': self.skf, 'candidate': candidate, + 'objective':objective, 'success_rate': sr, + 'rnvec':self.rnvec} + return ret_dict + + def set_individual(self, rnvec, candidate, skf): + self.rnvec = rnvec + self.candidate = candidate + if skf != self.skf: + env = pickle.load(open('/tmp/amfearl/'+str(skf)+'.pickle','rb')) + self.tasks = env + del env + self.skf = skf + return 1 + + def get_individual(self): + return self.rnvec, self.candidate, self.skf + + +class Individual(object): + def __init__(self, D_multitask, tasks): + self.dim = D_multitask + self.tasks = tasks + self.no_of_tasks = len(tasks) + self.rnvec = np.random.uniform(size=D_multitask) + self.candidate = None + self.scalar_fitness = None + self.skf = None + self.objective = None + self.mut_cross = (False, False) + self.parent_fitness = None + self.parents_skfactor = None + self.layers_updated = None + self.success_rate = None + + def update_fitness(self, res): + assert res['skf'] == self.skf, "Error: skill factor not matching" + self.candidate = res['candidate'] + self.objective = res['objective'] + self.success_rate = res['success_rate'] + + def clear(self): + self.parent_fitness = None + self.parents_skfactor = None + self.mut_cross = (False, False) + self.candidate = None + + def get_individual(self): + return self.rnvec, self.candidate, self.skf + + def set_individual(self, rnvec, candidate, skf): + self.rnvec = rnvec + self.candidate = candidate + sekf.skf = skf + + def evaluate(self, p_il, method): + assert self.skf is not None, "Error: Skill factor is None" + if self.skf is None: + raise ValueError("skill factor not set") + else: + task = self.tasks[self.skf] + objective, candidate, sr = task.mujoco_env.fitness(self.rnvec) + funcCount = 1 + self.objective = objective + self.candidate = candidate + self.success_rate = sr + ret_dict = {'skf': self.skf, 'candidate': candidate, + 'objective':objective, 'success_rate': sr, + 'rnvec':self.rnvec} + return ret_dict diff --git a/mfea/mfea.py b/mfea/mfea.py new file mode 100644 index 0000000000000000000000000000000000000000..0d4b9ca9b090305137416a9e1b36d7c7e63f3c61 --- /dev/null +++ b/mfea/mfea.py @@ -0,0 +1,384 @@ +# -*- coding:utf-8 -*- +import numpy as np +from .individual import Individual, PIndividual +from .operators import layer_wise_crossover, mutate, RouletteWheelSelection + +import ray +import time +import tqdm + +# ------------------ +# RAY INITIALIZATION +# ------------------ +ray.init() +print(f"Available resources: {ray.available_resources()}") + + +# ------------------ +# METHODS +# ------------------ +def update_remotes(population, remote_pop): + assert len(population) == len(remote_pop), "Different lengths" + remotes = [] + for i, ind in enumerate(remote_pop): + rnvec, candidate, skf = population[i].get_individual() + r = ind.set_individual.remote(rnvec, candidate, skf) + remotes.append(r) + exit_status = ray.get(remotes) + assert all(exit_status), "Error when updating remotes info" + + +def evaluate_parallel(candidates, remotes, p_il, method): + evals = len(candidates) + maxV = len(candidates) + results = np.full((evals,), None) + index = 0 + b_format = '{desc} |{bar}| [elapsed: {elapsed}]' + _desc = f"Evaluating 0/{maxV} : 0%" + tq = tqdm.tqdm(total=maxV, bar_format=b_format, desc=_desc) + while evals > 0: + eval_amount = min(len(remotes), evals) + pop_eval = candidates[index: index + eval_amount] + # Update remotes + update_remotes(pop_eval, remotes[:eval_amount]) + # Evaluate candidates in parallel + rmt = remotes[:eval_amount] + partials = [ind.evaluate.remote(p_il, method) for ind in rmt] + evaluated_pop = ray.get(partials) + results[index:index+eval_amount] = evaluated_pop + # Update counts + evals -= eval_amount + index += eval_amount + tq.update(eval_amount) + tq.desc = "Evaluating " + f"({index}/{maxV}): " + tq.desc += f"{round(index/maxV*100,3)}%" + tq.close() + return results + + +def rnd(x): + return np.random.random() <= x + + +def isnan(x): + return np.isnan(x) + + +# ---------- +# A-MFEA-RL +# ---------- +def mfea(tasks, pop=100, gen=1000, selproc='elitist', + rmp=0.3, p_il=0.0, episodes=100, method='L-BFGS-B', + D_multitask=None, callback=None, f_name=None, + num_threads=None): + ''' + :param tasks: List of Task type, can not be empty + :param pop: Integer, population size + :param gen: Integer, generation + :param selproc: String, 'elitist', 'roulette wheel' or customized + :param rmp: Float, between 0 and 1 + :param p_il: Float, between 0 and 1 + :param episodes: Integer, episodes to test in the fitness function + :param method: String, details in scipy.optimize.minimize + :param D_multitask: Integer, size of the Unified Space + :param callback: Function, function to call on each generation end + :param f_name: String, name of the folder in ./summary + :param num_threads: Int, number of parallely evaluated candidates + :return: Dict() + ''' + + # Population MUST be an EVEN number and tasks at LEAST 1 + pop = pop + 1 if pop % 2 != 0 else pop + no_of_tasks = len(tasks) + assert no_of_tasks >= 1 and pop % 2 == 0 + print('Valid configuration detected ==> (STARTING)') + + # Set the number of episodes to test the environments + for i in range(no_of_tasks): + tasks[i].mujoco_env.episodes = episodes + + # Matrix containing best individual per task + bestobj = np.empty(shape=(gen, no_of_tasks)) + + # (MFEA) Start MFEA variables + pop = pop * no_of_tasks + _shape = (2 * pop, no_of_tasks) + factorial_costs = np.full(shape=_shape, fill_value=np.inf) + factorial_ranks = np.empty(shape=_shape) + + # (MFEA) Initialize the local and remote populations + max_threads = min(int(ray.available_resources()['CPU']), pop) + if num_threads is not None: + max_threads = min(num_threads, max_threads) + print(f"Creating {pop} individuals and {max_threads} threads... ", end='') + population = np.asarray([Individual(D_multitask, tasks) for _ in range(2 * pop)]) + remote_pop = [PIndividual.remote(D_multitask, no_of_tasks) for _ in range(max_threads)] + print("Completed!") + + # Assign skill factor to each individual in population + for i, individual in enumerate(population[:pop]): + individual.skf = i % no_of_tasks + + # Algorithm data + mu, mum = 2, 5 + rmp = float(rmp) + crossover_matrix = np.full((len(tasks), len(tasks)), 0.0) + effective_crossovers = np.full((len(tasks), len(tasks)), 0.0) + mutation_matrix = np.full((len(tasks),), 0.0) + effective_mutations = np.full((len(tasks),), 0.0) + best_tmp = np.full(shape=no_of_tasks, fill_value=np.Inf) + + # Create layer-wise rmp matrix + layer_amount = [task.num_layers for task in tasks] + no_sharable_layers = max(layer_amount) + rmp_matrix = np.full((no_of_tasks, no_of_tasks, no_sharable_layers), rmp) + + # Set rmp in tasks (i==j) = 1, np.nan if not trainable + for i in range(no_of_tasks): + rmp_matrix[i][i] = np.full((no_sharable_layers,), 1.0) + infos = tasks[i].mujoco_env.info.getInfos() + for idx, layer_info in enumerate(infos): + if not layer_info.trainable: + rmp_matrix[i, :, idx] = np.nan + rmp_matrix[i, :, len(infos):] = np.nan + for ii in range(no_of_tasks): + if ii != i: + rmp_matrix[i, ii, layer_amount[ii]:] = np.nan + + # EXTRA: Crossover and effective crossovers per layer + lcrossover_matrix = np.copy(rmp_matrix) + lcrossover_matrix[lcrossover_matrix >= 0] = 0 + leffective_crossovers = np.copy(rmp_matrix) + leffective_crossovers[lcrossover_matrix >= 0] = 0 + + # Evaluate population the first time + res = evaluate_parallel(population[:pop], remote_pop, p_il, method) + + # Update local population and factorial costs + for i, res in enumerate(res): + population[i].update_fitness(res) + j, factorial_cost = res['skf'], res['objective'] + factorial_costs[i, j] = factorial_cost + + for generation in range(gen): + initial_time = time.time() + count = pop + factorial_costs[pop:, :] = np.inf + inorder = np.random.permutation(pop) + + for i in range(int(pop/2)): + # Get parents and offspring + p1 = population[inorder[i]] + p2 = population[inorder[i + int(pop/2)]] + c1 = population[count] + c2 = population[count+1] + c1.clear() + c2.clear() + count += 2 + + # Crossover ---- + rmp_row = rmp_matrix[p1.skf][p2.skf] + + if p1.skf == p2.skf: + # INTRA-task crossover if same skill factor + cross_layers = [False if isnan(i) else True for i in rmp_row] + else: + # else INTER-task crossover + cross_layers = [False if isnan(i) else rnd(i) for i in rmp_row] + + # Crossover factors + u = np.random.uniform(size=D_multitask) + cf = np.empty(shape=D_multitask) + cf[u <= 0.5] = np.power((2 * u[u <= 0.5]), (1 / (mu + 1))) + cf[u > 0.5] = np.power((2 * (1 - u[u > 0.5])), (-1 / (mu + 1))) + + if any(cross_layers): + # Required info + infop1 = tasks[p1.skf].mujoco_env + infop2 = tasks[p2.skf].mujoco_env + + # cross and get offspring + c1.rnvec = layer_wise_crossover(p1.rnvec, p2.rnvec, + cf, infop1, infop2, + cross_layers) + c2.rnvec = layer_wise_crossover(p2.rnvec, p1.rnvec, + cf, infop2, infop1, + cross_layers) + c1.layers_updated = cross_layers + c2.layers_updated = cross_layers + + # Update children skill factor + sf1 = 1 + np.round(np.random.uniform()) + sf2 = 1 + np.round(np.random.uniform()) + if sf1 == 1: + c1.mut_cross = (False, True) + c1.skf = p1.skf + c1.parents_skfactor = (p1.skf, p2.skf) + c1.parent_fitness = p1.objective + else: + c1.mut_cross = (False, True) + c1.skf = p2.skf + c1.parents_skfactor = (p2.skf, p1.skf) + c1.parent_fitness = p2.objective + + if sf2 == 1: + c2.mut_cross = (False, True) + c2.skf = p1.skf + c2.parents_skfactor = (p1.skf, p2.skf) + c2.parent_fitness = p1.objective + else: + c2.mut_cross = (False, True) + c2.skf = p2.skf + c2.parents_skfactor = (p2.skf, p1.skf) + c2.parent_fitness = p2.objective + + else: + c1.rnvec = mutate(c1.rnvec, D_multitask, mum) + c2.rnvec = mutate(c2.rnvec, D_multitask, mum) + + # Allways get parents skill factor + c1.skf = p1.skf + c1.parents_skfactor = (p1.skf, p1.skf) + c1.parent_fitness = p1.objective + c1.mut_cross = (True, False) + + c2.skf = p2.skf + c2.parents_skfactor = (p2.skf, p2.skf) + c2.parent_fitness = p2.objective + c2.mut_cross = (True, False) + + # Evaluate the offspring + res = evaluate_parallel(population[pop:], remote_pop, p_il, method) + + # Update offspring values + for i, individual in enumerate(population[pop:]): + individual.update_fitness(res[i]) + j, factorial_cost = individual.skf, individual.objective + factorial_costs[pop+i, j] = factorial_cost + individual = population[pop+i] + + # Update crossover matrix + if individual.mut_cross[1]: + a, b = individual.parents_skfactor + mask = np.array(individual.layers_updated) + mask = np.where(mask==True) + crossover_matrix[a][b] += 1 + lcrossover_matrix[a][b][individual.layers_updated] += 1 + + # If child is better + if individual.objective < individual.parent_fitness: + effective_crossovers[a][b] += 1 + leffective_crossovers[a][b][individual.layers_updated] += 1 + if a is not b: + new_rmp = rmp_matrix[a][b] + new_rmp[mask] += 0.1 + new_rmp = np.clip(new_rmp, 0.15, 0.95) + rmp_matrix[a][b] = new_rmp + + # If child is worse + elif a is not b: + new_rmp = rmp_matrix[a][b] + new_rmp[mask] -= 1e-2 + new_rmp = np.clip(new_rmp, 0.15, 0.95) + rmp_matrix[a][b] = new_rmp + + # Update mutation matrix + elif individual.mut_cross[0]: + skf = individual.skf + mutation_matrix[skf] += 1 + if individual.objective < individual.parent_fitness: + effective_mutations[skf] += 1 + + for j in range(no_of_tasks): + factorial_cost_j = factorial_costs[:, j] + indices = list(range(len(factorial_cost_j))) + indices.sort(key=lambda x: factorial_cost_j[x]) + ranks = np.empty(shape=2*pop) + for i, x in enumerate(indices): + ranks[x] = i + 1 + factorial_ranks[:, j] = ranks + + for i in range(2*pop): + population[i].scalar_fitness = 1/np.min(factorial_ranks[i]) + + if selproc == 'elitist': + scalar_fitnesses = np.array([x.scalar_fitness for x in population]) + y = np.argsort(scalar_fitnesses)[::-1] + population = population[y] + factorial_costs = factorial_costs[y] + factorial_ranks = factorial_ranks[y] + + elif selproc == 'roulette wheel': + scalar_fitnesses = np.array([x.scalar_fitness for x in population]) + for i in range(pop): + RW = RouletteWheelSelection + population[i] = population[RW(scalar_fitnesses)] + + final_time = time.time() - initial_time + # Parte de recogida y print de datos + for j in range(no_of_tasks): + xxx = np.argmin(factorial_costs[:, j]) + if(best_tmp[j] > factorial_costs[xxx, j]): + bestobj[generation, j] = factorial_costs[xxx, j] + best_tmp[j] = factorial_costs[xxx, j] + + pop_sf = np.array([p.skf for p in population[:pop]]) + fc = np.array(factorial_costs) + pop_std = [] + pop_mean = [] + for i in range(len(tasks)): + index = np.where(pop_sf == i) + mean = np.mean(fc[:, i][index]) + std = np.std(fc[:, i][index]) + pop_mean.append(mean) + pop_std.append(std) + + # Num of individuals per task + popskf = np.full((len(tasks), ), 0) + for i in range(pop): + popskf[population[i].skf] += 1 + + # Data + pop_info = {'crossover_matrix': crossover_matrix, + 'effective_crossovers': effective_crossovers, + 'layer_crossover_matrix': lcrossover_matrix, + 'layer_effective_crossovers': leffective_crossovers, + 'mutation_matrix': mutation_matrix, + 'effective_mutations': effective_mutations, + 'rmp_matrix': rmp_matrix, + 'num_tasks': no_of_tasks, + 'configuration': {'pop_size': pop, + 'generations': gen, + 'selection_process': selproc, + 'rmp': rmp, + 'dimension': D_multitask}} + + for j in range(no_of_tasks): + nnn = np.argsort(factorial_costs[:, j]) + pole_c = list(map(lambda x: x.candidate, population[nnn[:2]])) + pole_f = list(map(lambda x: x.objective, population[nnn[:2]])) + pole_s = list(map(lambda x: x.success_rate, population[nnn[:2]])) + # The two best solutions (candidate, fitness, success rate) + pole = list(zip(pole_c, pole_f, pole_s)) + env = tasks[population[nnn[0]].skf].mujoco_env + pop_info['task'+str(j)] = dict() + pop_info['task'+str(j)]['mean'] = pop_mean[j] + pop_info['task'+str(j)]['std'] = pop_std[j] + pop_info['task'+str(j)]['name'] = env.name + pop_info['task'+str(j)]['num_ind'] = popskf[j] + pop_info['task'+str(j)]['env'] = env + pop_info['task'+str(j)]['pole'] = pole + + # Callback data + if callback: + msg = f" GENERATION {generation}" + msg += f"Elapsed time: {round(final_time, 3)} s." + print('/'+'='*len(msg)+'\\') + print(msg) + print(' '+'-'*(len(msg))) + callback(pop_info, f_name, generation) + print('\n '+'-'*(len(msg))) + print(msg) + print('\\'+'='*len(msg)+'/') + + return pop_info diff --git a/mfea/operators.py b/mfea/operators.py new file mode 100644 index 0000000000000000000000000000000000000000..a70ae262ad1c92c239fa64b4548b2349599c3c88 --- /dev/null +++ b/mfea/operators.py @@ -0,0 +1,70 @@ +# -*- coding:utf-8 -*- +import numpy as np + + +# CROSSOVER - Especialized for DNNs --- +def layer_wise_crossover(p1, p2, cf, info, info2, layers): + # Check both models' size and get sharable layers + infos = info.info.getInfos() + lay1 = info.info.num_layers + lay2 = info2.info.num_layers + sharable_layers = min(lay1, lay2) + res = np.array(p1) + for i, info in enumerate(infos[:sharable_layers]): + if info.trainable and layers[i]: + # Cross weights + start = info.w_index + end = start + info.w_size + cross_w = crossover(p1[start:end], p2[start:end], cf[start:end]) + res[start:end] = cross_w + # Cross biases + start = info.b_index + end = start + info.b_size + cross_b = crossover(p1[start:end], p2[start:end], cf[start:end]) + res[start:end] = cross_b + return res + + +# MUTATOR - polynomial mutation --- +def mutate(p, dim, mum): + p_tmp = np.copy(p) + for i in range(dim): + if np.random.uniform() < (1 / dim): + u = np.random.uniform() + if u <= 0.5: + delta = (2 * u) ** (1 / (1 + mum)) - 1 + p_tmp[i] = p[i] + delta * p[i] + else: + delta = 1 - (2 * (1 - u)) ** (1 / (1 + mum)) + p_tmp[i] = p[i] + delta * (1 - p[i]) + return p_tmp + + +# Find random candidate of same skill factor +def find_relative(population, skill_factor, N): + pop_sf = np.array([p.skf for p in population[:N]]) + index = np.random.choice(np.where(pop_sf == skill_factor)[0]) + return population[index] + + +# Simulated Binary Crossover +def crossover(p1, p2, cf): + # p1, p2 为genotype, ndarray类型, shape = (dim, ) + child = 0.5 * ((1+cf) * p1 + (1-cf) * p2) + child[child < 0] = 0 + child[child > 1] = 1 + return child + + +# Selector --- +def RouletteWheelSelection(fitness): + length = len(fitness) + if length == 1: + return 0 + accumulator = 0 + sumFits = np.sum(fitness) + rndPoints = np.random.uniform(low=0, high=sumFits) + for index, val in enumerate(fitness): + accumulator += val + if accumulator >= rndPoints: + return index diff --git a/mfea/task.py b/mfea/task.py new file mode 100644 index 0000000000000000000000000000000000000000..7db877cee263f06bc18d6de73e7655a5b8d96808 --- /dev/null +++ b/mfea/task.py @@ -0,0 +1,16 @@ +# -*- coding:utf-8 -*- + + +class Task(object): + def __init__(self, mujoco_env): + self.dim = mujoco_env.D + self.lb = mujoco_env.lb + self.ub = mujoco_env.ub + self.mujoco_env = mujoco_env + self.num_layers = mujoco_env.info.num_layers + + def decode(self, rnvec): + return self.lb + rnvec * (self.ub - self.lb) + + def encode(self, vec): + return (vec - self.lb)/(self.ub - self.lb) diff --git a/mujoco/MujocoEnv.py b/mujoco/MujocoEnv.py new file mode 100644 index 0000000000000000000000000000000000000000..6b6d6e71910d2236739f98d60a265edaf92a8185 --- /dev/null +++ b/mujoco/MujocoEnv.py @@ -0,0 +1,96 @@ +from .environments_dict import get_env +from .Network import CreateModel, CreateMediumModel, CreateBigModel +from copy import deepcopy +import numpy as np +from analyzer import kerpy +from PIL import Image + +class MujocoEnv(): + def __init__(self, name, lb, ub): + model_types = {'SMALL': CreateModel, + 'MEDIUM': CreateMediumModel, + 'BIG': CreateBigModel} + self.name = name + env, task, model_type = get_env(name) + self.env = env + self.metaworld_task = task + self.model_type = model_type + self.model = model_types[model_type](self.env) + self.lb = lb + self.ub = ub + self.info = None + self.episodes = 100 + + def setInfo(self, info): + self.info = info + self.D = info.D + + def test(self, num_episodes, max_steps=None, candidate=None, render=False, + decode=False, frame_skip=1): + if candidate is not None: + if decode: + candidate = self._extract_candidate(candidate) + candidate = self._decode_candidate(candidate) + self._set_weights(candidate) + + if max_steps is None: + self.max_steps = self.env.max_path_length + else: + self.max_steps = max_steps + rewards = [] + win_times = np.full((num_episodes,), 0) + for episode in range(num_episodes): + episode_reward = 0 + self.env.set_task(self.metaworld_task) + obs = deepcopy(self.env.reset()) + for step in range(self.max_steps): + action = self.model.predict(np.array([obs]))[0] + obs, r, done, info = self.env.step(action) + obs = deepcopy(obs) + episode_reward += r + if bool(info['success']): + try: + _tr = self.env.target_reward + except AttributeError: + _tr = 0 + episode_reward += (self.max_steps-step) * (_tr * 0.75) + win_times[episode] = 1 + break + rewards.append(episode_reward) + success_rate = sum(win_times) / num_episodes + # print(f"Rew: {sum(rewards)/len(rewards)} / sr: {success_rate}") + return rewards, success_rate + + def _extract_candidate(self, candidate): + new_candidate = [] + for i, l in enumerate(self.model.layers): + l_info = getattr(self.info, f"layer{i}") + if(l_info.trainable): + b = candidate[l_info.b_index: l_info.b_index + l_info.b_size] + w = candidate[l_info.w_index: l_info.w_index + l_info.w_size] + b = candidate[l_info.b_index: l_info.b_index + l_info.b_size] + new_candidate += list(w) + new_candidate += list(b) + return new_candidate + + def _decode_candidate(self, candidate): + candidate = np.array(candidate) + return self.lb + candidate * (self.ub - self.lb) + + def fitness(self, candidate): + assert self.info is not None, "Error: self.info == None" + candidate = self._extract_candidate(candidate) + candidate = self._decode_candidate(candidate) + self._set_weights(candidate) + rews, perc = self.test(self.episodes) + f = -round(np.mean(rews), 6) + # print(f"fitness: {f}, success_rate: {perc}, + return f, candidate, perc + + def _set_weights(self, new_weights): + m = self.model + w = new_weights + k = self.info.mask + s = self.info.shapes + kerpy.set_weights(m, w, k, s) + del m, w, k, s diff --git a/mujoco/Network.py b/mujoco/Network.py new file mode 100644 index 0000000000000000000000000000000000000000..2a51d4227299ee8c6e3fe2acd599dc2a93629256 --- /dev/null +++ b/mujoco/Network.py @@ -0,0 +1,37 @@ +from .myNet import MLP + + +def CreateModel(env): + nb_actions = env.action_space.shape[0] + newnet = MLP() + newnet.AddLayer(256, input_units=env.observation_space.shape[0], activation='relu') + newnet.AddLayer(128, activation='relu') + newnet.AddLayer(128, activation='relu') + newnet.AddLayer(nb_actions, activation='linear') + newnet.summary() + return newnet + + +def CreateMediumModel(env): + nb_actions = env.action_space.shape[0] + newnet = MLP() + newnet.AddLayer(256, input_units=env.observation_space.shape[0], activation='relu') + newnet.AddLayer(128, activation='relu') + newnet.AddLayer(128, activation='relu') + newnet.AddLayer(128, activation='relu') + newnet.AddLayer(nb_actions, activation='linear') + newnet.summary() + return newnet + + +def CreateBigModel(env): + nb_actions = env.action_space.shape[0] + newnet = MLP() + newnet.AddLayer(256, input_units=env.observation_space.shape[0], activation='relu') + newnet.AddLayer(128, activation='relu') + newnet.AddLayer(128, activation='relu') + newnet.AddLayer(128, activation='relu') + newnet.AddLayer(128, activation='relu') + newnet.AddLayer(nb_actions, activation='linear') + newnet.summary() + return newnet diff --git a/mujoco/__init__.py b/mujoco/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/mujoco/environments_dict.py b/mujoco/environments_dict.py new file mode 100644 index 0000000000000000000000000000000000000000..5e008733176ce70108039ff16ef7474942a239a7 --- /dev/null +++ b/mujoco/environments_dict.py @@ -0,0 +1,31 @@ +# !/usr/bin/env python3 +# -*- coding: utf-8 -*- +from metaworld.envs.mujoco import env_dict +from metaworld import _make_tasks +from .model_types import ENV_ARQ + + +ALL_ENV_DICT = env_dict.ALL_V2_ENVIRONMENTS +ALLV2 = dict(ALL_ENV_DICT) +ALLV1 = dict(env_dict.ALL_V1_ENVIRONMENTS) +ALL_ENV_DICT.update(ALLV1) +FLAG = dict(partially_observable=False) + + +def get_env(name): + kwargs = dict(task_id=0) + if name == 'reach-v1' or name == 'reach-wall-v1': + kwargs['task_type'] = 'reach' + elif name == 'push-v1' or name == 'push-wall-v1': + kwargs['task_type'] = 'push' + elif name == 'pick-place-v1' or name == 'pick-place-wall-v1': + kwargs['task_type'] = 'pick_place' + env_args = {name: {'args': [], 'kwargs': kwargs}} + task = _make_tasks(ALL_ENV_DICT, env_args, FLAG)[0] + kwargs.pop('task_id') + return ALL_ENV_DICT[name](), task, ENV_ARQ[name] # (**kwargs) + + +def list_environments(): + print("________ ENVIRONMENT LIST ________\n") + print(f"{list(ALL_ENV_DICT)}\n") diff --git a/mujoco/model_types.py b/mujoco/model_types.py new file mode 100644 index 0000000000000000000000000000000000000000..f3092038592af6f80d1563bfc4faa14da009d0dd --- /dev/null +++ b/mujoco/model_types.py @@ -0,0 +1,103 @@ +ARQ_TYPE = ['SMALL', 'MEDIUM', 'BIG'] +ENV_ARQ = { + 'reach-v1': ARQ_TYPE[0], + 'push-v1': ARQ_TYPE[0], + 'pick-place-v1': ARQ_TYPE[2], + 'door-open-v1': ARQ_TYPE[2], + 'drawer-open-v1': ARQ_TYPE[2], + 'drawer-close-v1': ARQ_TYPE[2], + 'button-press-topdown-v1': ARQ_TYPE[1], + 'peg-insert-side-v1': ARQ_TYPE[2], + 'window-open-v1': ARQ_TYPE[2], + 'window-close-v1': ARQ_TYPE[2], + 'door-close-v1': ARQ_TYPE[2], + 'reach-wall-v1': ARQ_TYPE[0], + 'pick-place-wall-v1': ARQ_TYPE[2], + 'push-wall-v1': ARQ_TYPE[1], + 'button-press-v1': ARQ_TYPE[1], + 'button-press-topdown-wall-v1': ARQ_TYPE[2], + 'button-press-wall-v1': ARQ_TYPE[2], + 'peg-unplug-side-v1': ARQ_TYPE[2], + 'disassemble-v1': ARQ_TYPE[2], + 'hammer-v1': ARQ_TYPE[2], + 'plate-slide-v1': ARQ_TYPE[1], + 'plate-slide-side-v1': ARQ_TYPE[1], + 'plate-slide-back-v1': ARQ_TYPE[1], + 'plate-slide-back-side-v1': ARQ_TYPE[1], + 'handle-press-v1': ARQ_TYPE[2], + 'handle-pull-v1': ARQ_TYPE[2], + 'handle-press-side-v1': ARQ_TYPE[2], + 'handle-pull-side-v1': ARQ_TYPE[2], + 'stick-push-v1': ARQ_TYPE[2], + 'stick-pull-v1': ARQ_TYPE[2], + 'basketball-v1': ARQ_TYPE[2], + 'soccer-v1': ARQ_TYPE[0], + 'faucet-open-v1': ARQ_TYPE[1], + 'faucet-close-v1': ARQ_TYPE[1], + 'coffee-push-v1': ARQ_TYPE[1], + 'coffee-pull-v1': ARQ_TYPE[1], + 'coffee-button-v1': ARQ_TYPE[2], + 'sweep-v1': ARQ_TYPE[0], + 'sweep-into-v1': ARQ_TYPE[0], + 'pick-out-of-hole-v1': ARQ_TYPE[2], + 'assembly-v1': ARQ_TYPE[2], + 'shelf-place-v1': ARQ_TYPE[2], + 'push-back-v1': ARQ_TYPE[0], + 'lever-pull-v1': ARQ_TYPE[1], + 'dial-turn-v1': ARQ_TYPE[2], + 'bin-picking-v1': ARQ_TYPE[2], + 'box-close-v1': ARQ_TYPE[2], + 'hand-insert-v1': ARQ_TYPE[1], + 'door-lock-v1': ARQ_TYPE[2], + 'door-unlock-v1': ARQ_TYPE[1], + 'assembly-v2': ARQ_TYPE[2], + 'basketball-v2': ARQ_TYPE[2], + 'bin-picking-v2': ARQ_TYPE[2], + 'box-close-v2': ARQ_TYPE[2], + 'button-press-topdown-v2': ARQ_TYPE[1], + 'button-press-topdown-wall-v2': ARQ_TYPE[2], + 'button-press-v2': ARQ_TYPE[1], + 'button-press-wall-v2': ARQ_TYPE[2], + 'coffee-button-v2': ARQ_TYPE[2], + 'coffee-pull-v2': ARQ_TYPE[1], + 'coffee-push-v2': ARQ_TYPE[1], + 'dial-turn-v2': ARQ_TYPE[2], + 'disassemble-v2': ARQ_TYPE[2], + 'door-close-v2': ARQ_TYPE[2], + 'door-lock-v2': ARQ_TYPE[1], + 'door-open-v2': ARQ_TYPE[2], + 'door-unlock-v2': ARQ_TYPE[1], + 'hand-insert-v2': ARQ_TYPE[1], + 'drawer-close-v2': ARQ_TYPE[1], + 'drawer-open-v2': ARQ_TYPE[1], + 'faucet-open-v2': ARQ_TYPE[1], + 'faucet-close-v2': ARQ_TYPE[1], + 'hammer-v2': ARQ_TYPE[2], + 'handle-press-side-v2': ARQ_TYPE[1], + 'handle-press-v2': ARQ_TYPE[1], + 'handle-pull-side-v2': ARQ_TYPE[2], + 'handle-pull-v2': ARQ_TYPE[2], + 'lever-pull-v2': ARQ_TYPE[2], + 'peg-insert-side-v2': ARQ_TYPE[2], + 'pick-place-wall-v2': ARQ_TYPE[2], + 'pick-out-of-hole-v2': ARQ_TYPE[2], + 'reach-v2': ARQ_TYPE[0], + 'push-back-v2': ARQ_TYPE[0], + 'push-v2': ARQ_TYPE[0], + 'pick-place-v2': ARQ_TYPE[2], + 'plate-slide-v2': ARQ_TYPE[1], + 'plate-slide-side-v2': ARQ_TYPE[1], + 'plate-slide-back-v2': ARQ_TYPE[1], + 'plate-slide-back-side-v2': ARQ_TYPE[1], + 'peg-unplug-side-v2': ARQ_TYPE[2], + 'soccer-v2': ARQ_TYPE[0], + 'stick-push-v2': ARQ_TYPE[2], + 'stick-pull-v2': ARQ_TYPE[2], + 'push-wall-v2': ARQ_TYPE[1], + 'reach-wall-v2': ARQ_TYPE[0], + 'shelf-place-v2': ARQ_TYPE[2], + 'sweep-into-v2': ARQ_TYPE[1], + 'sweep-v2': ARQ_TYPE[0], + 'window-open-v2': ARQ_TYPE[2], + 'window-close-v2': ARQ_TYPE[2] +} diff --git a/mujoco/myNet.py b/mujoco/myNet.py new file mode 100644 index 0000000000000000000000000000000000000000..7d9b2c917ecf82d61b5e2ebf6e64fd68ea34ba89 --- /dev/null +++ b/mujoco/myNet.py @@ -0,0 +1,87 @@ +import numpy as np + + +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# ACTIVATION FUNCTIONS +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +def linear(X): + return X + + +def relu(X): + return np.maximum(X, np.zeros(X.shape)) + + +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# LAYERS +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +class Layer(object): + def __init__(self, input_units, output_units, activation='linear'): + activations = {'relu': relu, 'linear': linear} + self.input_units = input_units + self.output_units = output_units + self.w = np.random.uniform(-1, 1, (input_units, output_units)) + self.b = np.random.uniform(-1, 1, (output_units,)) + self.activation = activations[activation] + self.trainable_params = np.prod(self.w.shape) + np.prod(self.b.shape) + + def forward(self, input_data): + return self.activation(np.dot(input_data, self.w) + self.b) + + def get_config(self): + return None + + def get_weights(self): + return [self.w, self.b] + + def set_weights(self, weights): + new_w = weights[0] + new_b = weights[1] + w_shape = new_w.shape + b_shape = new_b.shape + assert w_shape == self.w.shape, f"Error: shape in {w_shape} != {self.w.shape}" + assert b_shape == self.b.shape, f"Error: shape in {b_shape} != {self.b.shape}" + self.w = new_w + self.b = new_b + + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# MULTI-LAYER PERCEPTRON IMPLEMENTATION +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +class MLP(object): + def __init__(self): + self.layers = None + + def set_weights(self, weights): + i = 0 + for layer in self.layers: + new_w_shape = weights[i].shape + new_b_shape = weights[i + 1].shape + w_shape = layer.w.shape + b_shape = layer.b.shape + assert new_w_shape == w_shape, f"ErrorW: shape {w_shape} != {new_w_shape}" + assert new_b_shape == b_shape, f"ErrorB shape {b_shape} != {new_b_shape}" + layer.w = weights[i] + layer.b = weights[i + 1] + i += 2 + + def summary(self): + print("===========================") + print(" MODEL SUMMARY") + print("===========================") + for i, layer in enumerate(self.layers): + msg = f"Dense-{i} => shape(in,out): ({layer.input_units},{layer.output_units})," + msg += f"trainable_params: {format(layer.trainable_params)}" + print(msg) + print("~"*len(msg)) + print("Total params: " + "{:,}".format(self.count_params())) + + def predict(self, data): + res = data + for layer in self.layers: + res = layer.forward(res) + return res + + def AddLayer(self, output_units, input_units=None, activation='linear'): pass + def get_weights(self): pass + def count_params(self): pass diff --git a/mujoco36.yml b/mujoco36.yml new file mode 100644 index 0000000000000000000000000000000000000000..da6441e4b9ee450a53760fdc7e60a5aed35c00e8 --- /dev/null +++ b/mujoco36.yml @@ -0,0 +1,42 @@ +name: mujoco36 +channels: + - defaults +dependencies: + - _libgcc_mutex=0.1=main + - ca-certificates=2020.6.24=0 + - certifi=2020.6.20=py36_0 + - ld_impl_linux-64=2.33.1=h53a641e_7 + - libedit=3.1.20191231=h7b6447c_0 + - libffi=3.3=he6710b0_2 + - libgcc-ng=9.1.0=hdf63c60_0 + - libstdcxx-ng=9.1.0=hdf63c60_0 + - ncurses=6.2=he6710b0_1 + - openssl=1.1.1g=h7b6447c_0 + - pip=20.1.1=py36_1 + - python=3.6.10=h7579374_2 + - readline=8.0=h7b6447c_0 + - setuptools=47.3.1=py36_0 + - sqlite=3.32.3=h62c20be_0 + - tk=8.6.10=hbc83047_0 + - wheel=0.34.2=py36_0 + - xz=5.2.5=h7b6447c_0 + - zlib=1.2.11=h7b6447c_3 + - pip: + - beautifulsoup4==4.9.2 + - google==3.0.0 + - grpcio==1.32.0 + - metaworld==0.0.0 + - multidict==4.7.6 + - opt-einsum==3.3.0 + - prometheus-client==0.8.0 + - psutil==5.7.2 + - pyrsistent==0.17.3 + - ray==1.0.0 + - soupsieve==2.0.1 + - tensorflow==1.14.0 + - theano==1.0.5 + - typing-extensions==3.7.4.3 + - urllib3==1.25.10 + - yarl==1.6.0 +prefix: /home/aritz/.anaconda3/envs/mujoco36 + diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/utils/callback.py b/utils/callback.py new file mode 100644 index 0000000000000000000000000000000000000000..f3753aabb6d0b2123c18fd3b01af72f0acfb88e3 --- /dev/null +++ b/utils/callback.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +import pickle as pk +import pandas as pd + + +FIRST_CALL = True + + +def callback_function(pop_info, fname, generation): + global FIRST_CALL + if FIRST_CALL: + from shutil import rmtree + from os import mkdir + + try: + rmtree(f"summary/{fname}/") + except FileNotFoundError: + pass + mkdir(f"summary/{fname}") + + # print("Crossover Matrix:") + # print(pd.DataFrame(pop_info['crossover_matrix'], dtype=np.uintc)) + # print("\nEffective Crossover:") + # print(pd.DataFrame(pop_info['effective_crossovers'], dtype=np.uintc)) + # print("Mutation Matrix:") + # print(pd.DataFrame(pop_info['mutation_matrix'], dtype=np.uintc)) + # print("\nEffective Mutation:") + # print(pd.DataFrame(pop_info['effective_mutations'], dtype=np.uintc)) + print("\nRMP Matrix:") + print(pop_info['rmp_matrix']) + # print("RMP por capas") + # print("Total") + # print(pop_info['layer_crossover_matrix']) + print("Effectives") + print(pop_info['layer_effective_crossovers']) + + print('\n ~~ TASKS INFO ~~') + envs = [] + means = [] + stds = [] + bests = [] + srates = [] + names = [] + pops = [] + for i in range(pop_info['num_tasks']): + name = pop_info['task'+str(i)]['name'] + mean = pop_info['task'+str(i)]['mean'] + std = pop_info['task'+str(i)]['std'] + best = pop_info['task'+str(i)]['pole'][0][1] + p = pop_info['task'+str(i)]['num_ind'] + srate = pop_info['task'+str(i)]['pole'][0][2] + means.append(round(mean, 3)) + stds.append(round(std, 3)) + bests.append(round(best, 3)) + srates.append(round(srate, 6)) + names.append(name) + pops.append(p) + + # Delete ENV + if FIRST_CALL: + envs.append(pop_info['task'+str(i)]['env']) + + # print(f'Task-{i} ({name}): {p} indivs', end=' ') + # print(f'Best/Success(%):{best}/{srate} Mean: {mean} Std: {std} ') + path = f"summary/{fname}/{str(generation)}.pickle" + path_envs = f"summary/{fname}/envs.pickle" + + # PRINT + colms = ['Name', 'Pop Amount', 'Best', 'Success Rate', 'Mean', 'Std'] + df = pd.DataFrame(list(zip(names, pops, bests, srates, means, stds)), + columns=colms) + print(df.to_string(justify='center')) + with open(path, 'wb') as f: + pk.dump(pop_info, f) + f.close() + + # Helps saving space on next calls + if FIRST_CALL: + with open(path_envs, 'wb') as f: + pk.dump(envs, f) + f.close() + + FIRST_CALL = False diff --git a/utils/utils.py b/utils/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c12a8309f90221eaedc17b0b858c5f291f188382 --- /dev/null +++ b/utils/utils.py @@ -0,0 +1,9 @@ +# https://medium.com/better-programming/load-fast-load-big-with-compressed-pickles-5f311584507e +import bz2 +import _pickle as cPickle + + +def load_compressed_pickle(path): + data = bz2.BZ2File(path, 'rb') + data = cPickle.load(data) + return data