Add 1D environments

dynamicslab · Jun 15, 2024 · 805fa5e · 805fa5e
1 parent bb2688b
commit 805fa5e
Show file tree

Hide file tree

Showing 19 changed files with 283,150 additions and 38 deletions.
diff --git a/hydrogym/__init__.py b/hydrogym/__init__.py
@@ -1,2 +1,5 @@
-from . import distributed, firedrake
+from . import distributed, firedrake, torch_env
 from .core import CallbackBase, FlowEnv, PDEBase, TransientSolver
+from .core_1DEnvs import OneDimEnv, PDESolverBase1D
+
+from .torch_env import Kuramoto_Sivashinsky, Burgers # isort:skip
diff --git a/hydrogym/core.py b/hydrogym/core.py
@@ -1,8 +1,8 @@
 import abc
 from typing import Any, Callable, Iterable, Tuple, TypeVar, Union
 
-# import gym
-import gymnasium as gym
+import gym
+# import gymnasium as gym
 import numpy as np
 from numpy.typing import ArrayLike
 
@@ -48,6 +48,7 @@ class PDEBase(metaclass=abc.ABCMeta):
 
   def __init__(self, **config):
     self.mesh = self.load_mesh(name=config.get("mesh", self.DEFAULT_MESH))
+    self.reward_lambda = config.get("reward_lambda", 0.0)
     self.initialize_state()
 
     self.reset()
@@ -187,6 +188,7 @@ def advance_time(self, dt: float, act: list[float] = None) -> list[float]:
         Returns:
           Iterable[ArrayLike]: Updated actuator state
         """
+
     if act is None:
       act = self.control_state
     self.t += dt
@@ -363,7 +365,6 @@ def reset(self, t=0.0):
 class FlowEnv(gym.Env):
 
   def __init__(self, env_config: dict):
-
     self.flow: PDEBase = env_config.get("flow")(
         **env_config.get("flow_config", {}))
 
@@ -438,10 +439,14 @@ def step(
         Returns:
             Tuple[ObsType, float, bool, dict]: obs, reward, done, info
         """
+        action = action * self.flow.CONTROL_SCALING
+
         if self.num_sim_substeps_per_actuation is not None and self.num_sim_substeps_per_actuation > 1:
             self.action = action
             self.flow = self.solver.solve_multistep(num_substeps=self.num_sim_substeps_per_actuation, callbacks=[self.rewardLogCallback], controller=self.constant_action_controller, start_iteration_value=self.iter)
             if self.reward_aggreation_rule == "mean":
+                # print('flow_array', self.flow.reward_array,flush=True)
+                # print('mean flow_array', np.mean(self.flow.reward_array, axis=0),flush=True)
                 averaged_objective_values = np.mean(self.flow.reward_array, axis=0)
             elif self.reward_aggreation_rule == "sum":
                 averaged_objective_values = np.sum(self.flow.reward_array, axis=0)
@@ -509,3 +514,6 @@ def render(self, mode="human", **kwargs):
   def close(self):
     for cb in self.callbacks:
       cb.close()
+
+
+
diff --git a/hydrogym/core_1DEnvs.py b/hydrogym/core_1DEnvs.py
@@ -0,0 +1,194 @@
+import abc
+from typing import Any, Callable, Iterable, Tuple, TypeVar, Union
+
+import gym
+# import gymnasium as gym
+import numpy as np
+import torch
+from numpy.typing import ArrayLike
+
+class OneDimEnv(gym.Env):
+
+  def __init__(self, env_config: dict):
+    self.backend = env_config.get("backend", {"torch"})
+
+
+    self.solver: PDESolverBase1D = env_config.get("flow")(
+        **env_config.get("flow_config", {}))
+
+    self.set_seed(env_config.get("seed"))
+
+    self.observation_space = gym.spaces.Box(
+      low=-np.inf,
+      high=np.inf,
+      shape=(self.solver.num_outputs,),
+      dtype=float,
+    )
+
+    self.action_space = gym.spaces.Box(
+        low=self.solver.MAX_CONTROL_LOW,
+        high=self.solver.MAX_CONTROL_UP,
+        shape=(self.solver.num_inputs,),
+        dtype=float,
+    )
+
+    self.max_steps = env_config.get("max_steps", 1e6)
+    self.dt = env_config.get("flow_config", {}).get("dt", None)
+    assert self.dt is not None, f"Error: Solver timestep dt ({self.dt}) must not be None"
+
+  def set_seed(self, seed):
+    np.random.seed(seed)
+    self.solver.set_seed(seed)
+
+  def constant_action_controller(self, t, y):
+        return self.action
+
+  def step(
+        self, action: Iterable[ArrayLike] = None
+    ) -> Tuple[ArrayLike, float, bool, dict]:
+        """Advance the state of the environment.  See gym.Env documentation
+
+        Args:
+            action (Iterable[ActType], optional): Control inputs. Defaults to None.
+
+        Returns:
+            Tuple[ObsType, float, bool, dict]: obs, reward, done, info
+        """
+
+        self.solver.step(control=action)
+        reward = self.get_reward()
+        obs = self.solver.get_observations()
+        if reward == -torch.inf:
+           done = True
+        else:
+          done = self.check_complete()
+        info = {}
+
+        self.solver.iter += 1
+
+        return obs, reward, done, info
+
+  def get_reward(self):    
+        return -self.solver.evaluate_objective()
+
+  def check_complete(self):
+    return False if self.solver.iter < self.max_steps else True
+
+  def reset(self) -> Union[ArrayLike, Tuple[ArrayLike, dict]]:
+
+    self.solver.reset()
+    info = {}
+
+    return self.solver.get_observations(), info
+
+  def render(self, mode="human", **kwargs):
+    self.solver.render(mode=mode, **kwargs)
+
+
+class PDESolverBase1D(metaclass=abc.ABCMeta):
+  """
+    Basic configuration of the 1D PDE Solver
+
+  """
+
+  MAX_CONTROL = np.inf
+  DEFAULT_DT = np.inf
+
+  def __init__(self, **config):
+
+
+    self.initialize_state()
+
+    self.reset()
+
+    if config.get("restart"):
+      self.load_checkpoint(config["restart"][0])
+
+  @property
+  @abc.abstractmethod
+  def num_inputs(self) -> int:
+    """Length of the control vector (number of actuators)"""
+    pass
+
+  @property
+  @abc.abstractmethod
+  def num_outputs(self) -> int:
+    """Number of scalar observed variables"""
+    pass
+
+  def reset(self):
+    """Reset the PDE to an initial state"""
+    pass
+
+  @abc.abstractmethod
+  def get_observations(self) -> Iterable[ArrayLike]:
+    """Return the set of measurements/observations"""
+    pass
+
+  @abc.abstractmethod
+  def render(self, **kwargs):
+    """Plot the current PDE state (called by `gym.Env`)"""
+    pass
+
+  @abc.abstractmethod
+  def evaluate_objective(self) -> ArrayLike:
+    """Return the objective function to be minimized"""
+    pass
+
+if __name__ == '__main__':
+    # torch.set_default_tensor_type('torch.cuda.FloatTensor')
+    from tqdm import tqdm
+    import torch
+
+    torch.manual_seed(0)
+    np.random.seed(0)
+
+    config = {
+            "flow": Kuramoto_Sivashinsky,
+            "flow_config": {
+                            "dt": 0.001,
+                            "restart": 'ks_init.tensor',
+                            "num_sim_substeps_per_actuation": 250,
+                            "device": 'cpu'   
+                        }, 
+            "max_steps": 100,
+        }
+
+    # ks_init = torch.load('/net/beegfs-hpc/work/lagemannk/container/hydrogym_dev2/home/firedrake/hydrogym_rllib_multiEnv/hydrogym/hydrogym/torch_env/ks_init.tensor')
+    env = OneDimEnv(config)
+
+    action_space = env.action_space 
+    observation_space = env.observation_space
+
+    env.reset()
+
+    action = torch.zeros(action_space.shape)
+
+    results = []
+    for _ in tqdm(range(2000)):
+        obs, reward, done, info = env.step(action)
+        results.append(obs.cpu().numpy())
+        if done:
+           print('envrionment should be resetted')
+           env.reset()
+
+    results = np.stack(results)
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+
+    plt.figure(figsize=(16,8))
+    plt.imshow(np.transpose(results), cmap='bwr', vmin=-2.0, vmax=2.0)
+    plt.xlabel('step')
+    plt.ylabel('observation')
+    plt.colorbar()
+    plt.show()
+    plt.savefig('/net/beegfs-hpc/work/lagemannk/container/workspace_christian_ext/logs/result.png', bbox_inches='tight')
+    plt.close()
+
+    env.reset()
+
+
+
+
+
diff --git a/hydrogym/firedrake/envs/cavity/flow.py b/hydrogym/firedrake/envs/cavity/flow.py
@@ -17,14 +17,19 @@ class Cavity(FlowConfig):
   X, Y = np.meshgrid(xp, yp)
   DEFAULT_VEL_PROBES = [(x, y) for x, y in zip(X.ravel(), Y.ravel())]
 
-  # Pressure probes (spaced equally around the cylinder)
-  xp = np.linspace(0.25, 0.95, 4)
-  yp = np.linspace(-0.075, 0.075, 3)
+  # # Pressure probes (spaced equally around the cylinder)
+  # xp = np.linspace(0.25, 0.95, 4)
+  # yp = np.linspace(-0.075, 0.075, 3)
+  # X, Y = np.meshgrid(xp, yp)
+  # DEFAULT_PRES_PROBES = [(x, y) for x, y in zip(X.ravel(), Y.ravel())]
+  # DEFAULT_PRES_PROBES.extend([(0.4833333, -0.15), (0.4833333, -0.225),
+  #                            (0.7166666, -0.15), (0.7166666, -0.225)])
+  # DEFAULT_PRES_PROBES.extend([(1.05, 0.005), (1.2, 0.005), (1.35, 0.005), (1.5, 0.005)])
+
+  xp = np.linspace(0.1, 0.5, 4)
+  yp = np.linspace(-0.025, 0.025, 3)
   X, Y = np.meshgrid(xp, yp)
   DEFAULT_PRES_PROBES = [(x, y) for x, y in zip(X.ravel(), Y.ravel())]
-  DEFAULT_PRES_PROBES.extend([(0.4833333, -0.15), (0.4833333, -0.225),
-                             (0.7166666, -0.15), (0.7166666, -0.225)])
-  DEFAULT_PRES_PROBES.extend([(1.05, 0.005), (1.2, 0.005), (1.35, 0.005), (1.5, 0.005)])
 
   DEFAULT_VORT_PROBES = DEFAULT_VEL_PROBES
 
@@ -36,7 +41,8 @@ class Cavity(FlowConfig):
   FUNCTIONS = ("q", "qB"
               )  # This flow needs a base flow to compute fluctuation KE
 
-  MAX_CONTROL = 0.1
+  MAX_CONTROL = 1.0
+  CONTROL_SCALING = 0.1
   TAU = 0.075  # Time constant for controller damping (0.01*instability frequency)
 
   # Domain labels
@@ -131,7 +137,7 @@ def evaluate_objective(self, q=None, qB=None, averaged_objective_values=None, re
         uB = qB.subfunctions[0]
         KE = 0.5 * fd.assemble(fd.inner(u - uB, u - uB) * fd.dx)
     else:
-        KE = averaged_objective_values
+        KE = averaged_objective_values[0]
     return KE
 
   def render(self, mode="human", clim=None, levels=None, cmap="RdBu", **kwargs):

diff --git a/hydrogym/firedrake/envs/cylinder/flow.py b/hydrogym/firedrake/envs/cylinder/flow.py
@@ -56,8 +56,8 @@ class CylinderBase(FlowConfig):
 
   # MAX_CONTROL = 0.5 * np.pi
   # TAU = 0.556  # Time constant for controller damping (0.1*vortex shedding period)
-  TAU = 0.278  # Time constant for controller damping (0.05*vortex shedding period)
-  # TAU = 0.0556  # Time constant for controller damping (0.01*vortex shedding period)
+  # TAU = 0.278  # Time constant for controller damping (0.05*vortex shedding period)
+  TAU = 0.0556  # Time constant for controller damping (0.01*vortex shedding period)
 
   # Domain labels
   FLUID = 1
@@ -232,7 +232,8 @@ def render(self, mode="human", clim=None, levels=None, cmap="RdBu", **kwargs):
 
 
 class RotaryCylinder(CylinderBase):
-  MAX_CONTROL = 4.0
+  MAX_CONTROL = 1.0
+  CONTROL_SCALING = 5.0
   DEFAULT_DT = 1e-2
 
   @property
@@ -245,7 +246,8 @@ def cyl_velocity_field(self):
 
 
 class Cylinder(CylinderBase):
-  MAX_CONTROL = 0.15
+  MAX_CONTROL = 0.1
+  CONTROL_SCALING = 1.0
   DEFAULT_DT = 1e-2
 
   @property

diff --git a/hydrogym/firedrake/envs/pinball/flow.py b/hydrogym/firedrake/envs/pinball/flow.py
@@ -16,8 +16,8 @@ class Pinball(FlowConfig):
   rad = 0.5
 
   # Velocity probes
-  xp = np.linspace(rad * 1.5 * 1.866 + 1.5, rad * 1.5 * 1.866 + 4.5, 4)
-  yp = np.linspace(-(1.5 * rad + 0.75), (1.5 * rad + 0.75), 4)
+  xp = np.linspace(3, 9, 6)
+  yp = np.linspace(-1.25, 1.25, 5)
   X, Y = np.meshgrid(xp, yp)
   DEFAULT_VEL_PROBES = [(x, y) for x, y in zip(X.ravel(), Y.ravel())]
 
@@ -58,8 +58,10 @@ class Pinball(FlowConfig):
   x0 = [0.0, rad * 1.5 * 1.866, rad * 1.5 * 1.866]
   y0 = [0.0, 1.5 * rad, -1.5 * rad]
 
-  MAX_CONTROL = 4.0 #0.5 * np.pi
-  TAU = 1.0  # TODO: Tune this based on vortex shedding period
+  MAX_CONTROL = 1.0 #0.5 * np.pi
+  CONTROL_SCALING = 5.0
+  # TAU = 0.5  # TODO: Tune this based on vortex shedding period
+  TAU = 0.04  # Time constant for controller damping (0.01*vortex shedding period)
 
   MESH_DIR = os.path.abspath(f"{__file__}/..")
 
@@ -152,7 +154,9 @@ def evaluate_objective(self, q: fd.Function = None, averaged_objective_values=No
           return [*CL, *CD]
     else:
         CL, CD = averaged_objective_values[:3], averaged_objective_values[3:]
-    return sum(np.square(CD))
+    # print('pinball lambda', self.reward_lambda, CD, sum(np.square(CD)), flush=True)
+    return sum(np.square(CD)) + self.reward_lambda * sum(np.square(CL))
+    # return -(1.5 - (sum(CD) + self.reward_lambda * np.abs(sum(CL))))
 
   def render(self, mode="human", clim=None, levels=None, cmap="RdBu", **kwargs):
     if clim is None: