upload files

bang123-box · Nov 26, 2024 · 120802a · 120802a
1 parent ce372cc
commit 120802a
Show file tree

Hide file tree

Showing 76 changed files with 4,042 additions and 21 deletions.
diff --git a/LICENSE b/LICENSE
@@ -1,21 +1,21 @@
-MIT License
-
-Copyright (c) 2024 bang123-box
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+MIT License
+
+Copyright (c) 2024 bang123-box
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/configs/charset/36_lowercase.yaml b/configs/charset/36_lowercase.yaml
@@ -0,0 +1,3 @@
+# @package _global_
+model:
+  charset_train: "0123456789abcdefghijklmnopqrstuvwxyz"
diff --git a/configs/charset/62_mixed-case.yaml b/configs/charset/62_mixed-case.yaml
@@ -0,0 +1,3 @@
+# @package _global_
+model:
+  charset_train: "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
diff --git a/configs/charset/94_full.yaml b/configs/charset/94_full.yaml
@@ -0,0 +1,3 @@
+# @package _global_
+model:
+  charset_train: "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
diff --git a/configs/dataset/real.yaml b/configs/dataset/real.yaml
@@ -0,0 +1,3 @@
+# @package _global_
+data:
+  train_dir: real
diff --git a/configs/dataset/synth.yaml b/configs/dataset/synth.yaml
@@ -0,0 +1,3 @@
+# @package _global_
+data:
+  train_dir: synth
diff --git a/configs/dataset/union14m.yaml b/configs/dataset/union14m.yaml
@@ -0,0 +1,3 @@
+# @package _global_
+data:
+  train_dir: Union14M
diff --git a/configs/main.yaml b/configs/main.yaml
@@ -0,0 +1,51 @@
+defaults:
+  - _self_
+  - model: cfe
+  - charset: 36_lowercase # 94_full, 36_lowercase
+  - dataset: synth
+
+model:
+  _convert_: all
+  img_size: [32, 128]  # [ height, width ]
+  max_label_length: 25
+  # The ordering in charset_train matters. It determines the token IDs assigned to each character.
+  charset_train: ???
+  # For charset_test, ordering doesn't matter.
+  charset_test: "0123456789abcdefghijklmnopqrstuvwxyz"
+  batch_size: 384
+  weight_decay: 0.0
+  warmup_pct: 0.075  # equivalent to 1.5 epochs of warm up
+
+data:
+  _target_: strhub.data.module.SceneTextDataModule
+  root_dir: /home/zbb/data
+  train_dir: ???
+  batch_size: ${model.batch_size}
+  img_size: ${model.img_size}
+  charset_train: ${model.charset_train}
+  charset_test: ${model.charset_test}
+  max_label_length: ${model.max_label_length}
+  remove_whitespace: true
+  normalize_unicode: true
+  augment: True
+  num_workers: 12
+
+trainer:
+  _target_: pytorch_lightning.Trainer
+  _convert_: all
+  val_check_interval: 2000
+  max_epochs: 20
+  gradient_clip_val: 20
+  accelerator: gpu
+  devices: 4
+
+ckpt_path: null
+pretrained: null
+
+hydra:
+  output_subdir: config
+  run:
+    dir: ./output/${model.name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${model.name}/${now:%Y-%m-%d}_${now:%H-%M-%S}
+    subdir: ${hydra.job.override_dirname}
diff --git a/configs/model/cfe.yaml b/configs/model/cfe.yaml
@@ -0,0 +1,45 @@
+name: cfe
+_target_: strhub.models.cfe.system.CFE
+
+# Architecture
+num_control_points: 20
+enc_mlp_ratio: 4
+window_size: [[7, 11], [7, 11], [7, 11]]
+merge_types: 'Conv'
+local_type: 'r2'
+prenorm: False
+tps: False
+use_pe: True
+cclossexist: True
+cc_weights: 0.2
+fpn_layers: [0,1,2]
+dec_mlp_ratio: 4
+dec_depth: 1
+
+# base
+embed_dim: [128,256,384]
+enc_num_heads: [4,8,12]  
+depth: [3,6,9]  
+mixer_types: ['Local', 8, "Global", 10] 
+decoder_dim: 256      
+dec_num_heads: 8     
+
+# small
+# embed_dim: [96,192,256] 64,128,256
+# enc_num_heads: [3,6,8], 2,4,8
+# depth: [3,6,6] 3,6,3
+# mixer_types: ['Local', 8, "Global", 7] 
+# decoder_dim: 192      
+# dec_num_heads: 6     
+
+## tiny
+# embed_dim: [64,128,256]
+# enc_num_heads: [2,4,8]
+# depth: [3,6,3]
+# mixer_types: ['Local', 6, "Global", 6] 
+# decoder_dim: 128
+# dec_num_heads: 4
+
+# Training
+lr: 5e-4  
+dropout: 0.1
diff --git a/requirements/bench.in b/requirements/bench.in
@@ -0,0 +1,4 @@
+-c ${CONSTRAINTS}
+
+hydra-core >=1.2.0
+fvcore >=0.1.5.post20220512
diff --git a/requirements/bench.txt b/requirements/bench.txt
@@ -0,0 +1,17 @@
+antlr4-python3-runtime==4.9.3
+fvcore==0.1.5.post20221221
+hydra-core==1.3.2
+importlib-resources==5.12.0
+iopath==0.1.10
+numpy==1.24.3
+omegaconf==2.3.0
+packaging==23.1
+pillow==9.5.0
+portalocker==2.7.0
+pyyaml==6.0
+tabulate==0.9.0
+termcolor==2.3.0
+tqdm==4.65.0
+typing-extensions==4.6.2
+yacs==0.1.8
+zipp==3.15.0