diff --git a/lavis/tasks/vqa.py b/lavis/tasks/vqa.py index 78bc050c8..3c6211801 100644 --- a/lavis/tasks/vqa.py +++ b/lavis/tasks/vqa.py @@ -209,9 +209,9 @@ def _report_metrics(self, result_file, split): gt_ans = res["gt_ans"] pred = res["pred_ans"] - if self.inference_method == "generate": - pred = vqa_tool.processPunctuation(pred) - pred = vqa_tool.processDigitArticle(pred) + # if self.inference_method == "generate": + pred = vqa_tool.processPunctuation(pred) + pred = vqa_tool.processDigitArticle(pred) vqa_acc = 1 if pred == gt_ans else 0 diff --git a/projects/blip-diffusion/README.md b/projects/blip-diffusion/README.md new file mode 100644 index 000000000..9663220ed --- /dev/null +++ b/projects/blip-diffusion/README.md @@ -0,0 +1,8 @@ +## BLIP-Diffusion: Pre-trained Subject Representation for Controllable Text-to-Image Generation and Editing +[Paper](https://arxiv.org/abs/2305.14720), [Demo Site](https://dxli94.github.io/BLIP-Diffusion-website/), [Video](https://youtu.be/Wf09s4JnDb0) + +This repo will host the official implementation of BLIP-Diffusion, a text-to-image diffusion model with built-in support for multimodal subject-and-text condition. BLIP-Diffusion enables zero-shot subject-driven generation, and efficient fine-tuning for customized subjects with up to 20x speedup. In addition, BLIP-Diffusion can be flexibly combiend with ControlNet and prompt-to-prompt to enable novel subject-driven generation and editing applications. + + + +Implementations are coming soon. diff --git a/projects/blip-diffusion/teaser-website.png b/projects/blip-diffusion/teaser-website.png new file mode 100644 index 000000000..8c9889625 Binary files /dev/null and b/projects/blip-diffusion/teaser-website.png differ diff --git a/requirements.txt b/requirements.txt index 247190d57..e4790e2aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ contexttimer -decord +decord; platform_system != "Darwin" +eva-decord; platform_system == "Darwin" einops>=0.4.1 fairscale==0.4.4 ftfy