add sam2clip

HarborYuan · Jan 7, 2024 · 5394c78 · 5394c78
1 parent 647a1bd
commit 5394c78
Show file tree

Hide file tree

Showing 134 changed files with 10,615 additions and 2 deletions.
diff --git a/.gitignore b/.gitignore
@@ -157,4 +157,4 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
diff --git a/README.md b/README.md
@@ -1 +1 @@
-# ovsam
+# Open-Vocabulary SAM
diff --git a/ext/meta/sam_meta.py b/ext/meta/sam_meta.py
@@ -0,0 +1,41 @@
+meta_dict = {
+    'vit_h': dict(
+        encoder_embed_dim=1280,
+        encoder_depth=32,
+        encoder_num_heads=16,
+        encoder_global_attn_indexes=[7, 15, 23, 31],
+        # common
+        prompt_embed_dim=256,
+        image_size=1024,
+        vit_patch_size=16,
+        image_embedding_size=64
+    ),
+    'vit_l': dict(
+        encoder_embed_dim=1024,
+        encoder_depth=24,
+        encoder_num_heads=16,
+        encoder_global_attn_indexes=[5, 11, 17, 23],
+        # common
+        prompt_embed_dim=256,
+        image_size=1024,
+        vit_patch_size=16,
+        image_embedding_size=64
+    ),
+    'vit_b': dict(
+        encoder_embed_dim=768,
+        encoder_depth=12,
+        encoder_num_heads=12,
+        encoder_global_attn_indexes=[2, 5, 8, 11],
+        # common
+        prompt_embed_dim=256,
+        image_size=1024,
+        vit_patch_size=16,
+        image_embedding_size=64
+    )
+}
+
+checkpoint_dict = {
+    'vit_h': 'https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth',
+    'vit_l': 'https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth',
+    'vit_b': 'https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth',
+}
diff --git a/ext/open_clip/__init__.py b/ext/open_clip/__init__.py
@@ -0,0 +1,15 @@
+from .coca_model import CoCa
+from .constants import OPENAI_DATASET_MEAN, OPENAI_DATASET_STD
+from .factory import create_model, create_model_and_transforms, create_model_from_pretrained, get_tokenizer, create_loss
+from .factory import list_models, add_model_config, get_model_config, load_checkpoint
+from .loss import ClipLoss, DistillClipLoss, CoCaLoss
+from .model import CLIP, CustomTextCLIP, CLIPTextCfg, CLIPVisionCfg, \
+    convert_weights_to_lp, convert_weights_to_fp16, trace_model, get_cast_dtype, get_input_dtype
+from .openai import load_openai_model, list_openai_models
+from .pretrained import list_pretrained, list_pretrained_models_by_tag, list_pretrained_tags_by_model, \
+    get_pretrained_url, download_pretrained_from_url, is_pretrained_cfg, get_pretrained_cfg, download_pretrained
+from .push_to_hf_hub import push_pretrained_to_hf_hub, push_to_hf_hub
+from .tokenizer import SimpleTokenizer, tokenize, decode
+from .transform import image_transform, AugmentationCfg
+from .zero_shot_classifier import build_zero_shot_classifier, build_zero_shot_classifier_legacy
+from .zero_shot_metadata import OPENAI_IMAGENET_TEMPLATES, SIMPLE_IMAGENET_TEMPLATES, IMAGENET_CLASSNAMES
diff --git a/ext/open_clip/bpe_simple_vocab_16e6.txt.gz b/ext/open_clip/bpe_simple_vocab_16e6.txt.gz