more info

nkdnnlr · Nov 28, 2019 · d85d21d · d85d21d
1 parent 24aacae
commit d85d21d
Show file tree

Hide file tree

Showing 5 changed files with 61 additions and 9 deletions.
diff --git a/SLURM.txt b/SLURM.txt
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+#SBATCH  --mail-type=ALL                      # mail configuration: NONE, BEGIN, END, FAIL, REQUEUE, ALL
+#SBATCH  --output=/itet-stor/matthmey/net_scratch/logs/log%j.log      # where to store the output ( %j is the JOBID )
+#SBATCH  --cpus-per-task=1                    # Use 16 CPUS
+#SBATCH  --gres=gpu:1                         # Use 1 GPUS
+#SBATCH  --mem=32G                            # use 32GB
+#SBATCH  --account=tik                        # we are TIK!
+
+#eval "$(pyenv init -)"
+#eval "$(pyenv virtualenv-init -)"
+source activate permafrost
+#
+echo Running on host: `hostname`
+echo In directory: `pwd`
+echo Starting on: `date`
+echo SLURM_JOB_ID: $SLURM_JOB_ID
+#
+# binary to execute
+cd /home/matthmey/data/projects/stuett/frontends/permafrostanalytics/
+python -u ideas/machine_learning/classification.py -p /home/perma/permasense_vault/datasets/permafrost_hackathon/ -l --classifier seismic
+echo finished at: `date`
+exit 0;
+
diff --git a/ideas/annotation/README.md b/ideas/annotation/README.md
@@ -6,15 +6,21 @@ From the repositories base directory execute
 ```
 python ideas/annotation/images.py
 ```
+In the console you will see a web-address. Click on it or enter it into your browser.
 
-The data will be loaded from Azure. You have the option to either load sparse (in time) high-resolution images or low resolution images with a higher capture frequency. (This has been done to minimize transfer costs; all images are available in high-resolution upon request)
+The data will be loaded from Azure. You have the option to either load sparse (in time) high-resolution images or low resolution images with a higher capture frequency (this has been done to minimize transfer costs; all images are available in high-resolution by adding the argument `-hq` when invoking the script).
 You can also download the data (6.7 GB) yourself and use it (again executed from the base directory).
 
 ```
 python utils/download_files.py -f timelapse_images_fast.zip
 python ideas/annotation/images.py --local
 ```
 
+## Annotations
+
+## Data collection
+The tool stores your labels in the cloud and locally. Locally this is done by using one ID per browser session (if you close and open the browser you cannot see your old labels). In frequent intervals we will merge all your labels (with the script `utils/merge_annotations.py`) and load them to the data folder on Azure. The image labeling tool will automatically download the annotations done by the all participants and you can fine-tune, label in addition and of course use all the labels.
+
 ## What next?
 There are many tools available for image labeling, but what if we want to label timeseries data? How do we even label timeseries data? Can we simply draw on the plot of a time series or are there certain characteristics which are hidden and can only revealed with another view on the data?
 A way to plot annotations over time is presented in [timeseries.py](./timeseries.py)
diff --git a/ideas/annotation/images.py b/ideas/annotation/images.py
@@ -66,15 +66,15 @@
     help="The path to the folder containing the permafrost hackathon data",
 )
 parser.add_argument("-l", "--local", action="store_true", help="Only use local files and not data from Azure")
-parser.add_argument("-f", "--high_res", action="store_true", help="Use the high resolution images (timelapse_images).")
+parser.add_argument("-hq", "--high_quality", action="store_true", help="Use the high resolution images (timelapse_images).")
 args = parser.parse_args()
 
 data_path = Path(args.path)
 
-if args.high_res:
+if args.high_quality:
     prefix = "timelapse_images"
 else:
-    prefix = "timelapse_images_fast"
+    prefix = "timelapse_images_536"
 
 if not args.local:
     from stuett.global_config import get_setting, setting_exists
@@ -186,7 +186,7 @@
                     }
 bb_label_reverse_mapping = {v: k for k, v in bb_label_mapping.items()}
 img_shape = (4288, 2848, 3)
-if args.high_res:
+if args.high_quality:
     img_downsampling = 2
 else:
     img_downsampling = 1
@@ -261,13 +261,15 @@ def serve_layout():
             ),
             html.Div(
                 [
+                    dcc.Markdown("Class names for bounding boxes:"),
                     dcc.Dropdown(
                         id="bb_label_dropdown",
                         options=[
                             {"label": bb_label_mapping[m], "value": m} for m in bb_label_mapping.keys()
                         ],
                         value="#1f77b4",
                     ),
+                    dcc.Markdown("Class names for per image Labels:"),
                     dcc.Dropdown(
                         id="static_label_dropdown",
                         options=[
@@ -604,4 +606,4 @@ def update_output(date, session_id, user_id):
 
 
 if __name__ == "__main__":
-    app.run_server(debug=True)
+    app.run_server(debug=False)
diff --git a/ideas/machine_learning/README.md b/ideas/machine_learning/README.md
@@ -0,0 +1,20 @@
+# Idea: Classifier for Images and Seimsic Data
+
+
+## Quickstart
+To train the image classifier, execute from the repositories base directory:
+```
+python ideas/machine_learning/classification.py --classifier image
+```
+
+or
+```
+python ideas/machine_learning/classification.py --classifier seismic
+```
+
+
+
+## What next?
+* We need other labels! Other noise sources than mountaineers influence our system. We would like to reduce as many as possible.
+* More fine-grained predictions, bounding boxes, region-based methods. A lot more can be done.
+* Correlation of images with other sources, such as weather or temperature. Snow/temperature relation and much more.
diff --git a/ideas/machine_learning/datasets.py b/ideas/machine_learning/datasets.py
@@ -185,9 +185,9 @@ def __getitem__(self, idx):
         #     self.shape = data.shape
         # elif data.shape != self.shape:
         #     warnings.warn(f"Inconsistency in the data for item {indexers['time']}, its shape {data.shape} does not match shape {self.shape}")
-        #     padded_data = torch.zeros(self.shape)
-        #     pad = data.shape - self.shape
-        #     padded_data = torch.nn.functional.pad(data,pad)
+        #     # padded_data = torch.zeros(self.shape)
+        #     # pad = data.shape - self.shape
+        #     # padded_data = torch.nn.functional.pad(data,pad)
 
         return data, target