Skip to content
This repository has been archived by the owner on Oct 16, 2023. It is now read-only.

Commit

Permalink
use colossal for tp
Browse files Browse the repository at this point in the history
cached npy
  • Loading branch information
dujiangsu committed May 20, 2022
1 parent e796d7f commit bfab911
Show file tree
Hide file tree
Showing 53 changed files with 76 additions and 2,540 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ For Bert, Google reports a [super-large Bert with 481B parameters](https://mlcom
### Installation
``` bash
$ git clone https://github.com/hpcaitech/ColossalAI-Inference.git
$ pip install -r requirements.txt
$ pip install .
```

Expand Down
5 changes: 3 additions & 2 deletions energon/cli/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import torch
import inspect
import energon.server as server
from multiprocessing import Process
import multiprocessing as mp

from energon.context import Config

Expand Down Expand Up @@ -53,8 +53,9 @@ def launches(model_class=None,
worker_rank = 1 # start from 1

process_list = []
mp.set_start_method('spawn')
for i in range(num_worker):
p = Process(target=server.launch_worker,
p = mp.Process(target=server.launch_worker,
args=(host, port, tp_init_size, pp_init_size, "nccl", 1024, True, worker_rank + i, worker_rank + i,
server_host, worker_port + i, log_level))
p.start()
Expand Down
4 changes: 2 additions & 2 deletions energon/communication/collective.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from torch.distributed import ReduceOp
from torch import Tensor

from energon.context import ParallelMode
from energon.core import global_context as gpc
from colossalai.core import global_context as gpc
from colossalai.context import ParallelMode
from energon.utils import get_current_device


Expand Down
4 changes: 2 additions & 2 deletions energon/communication/p2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import torch
import torch.distributed as dist

from energon.context.parallel_mode import ParallelMode
from energon.core import global_context as gpc
from colossalai.core import global_context as gpc
from colossalai.context import ParallelMode
from energon.utils import get_current_device
from functools import reduce
import operator
Expand Down
4 changes: 2 additions & 2 deletions energon/communication/ring.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import torch

from energon.context.parallel_mode import ParallelMode
from energon.core import global_context as gpc
from colossalai.core import global_context as gpc
from colossalai.context import ParallelMode
from energon.utils import get_current_device, synchronize


Expand Down
4 changes: 2 additions & 2 deletions energon/communication/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import torch
import torch.distributed as dist

from energon.context.parallel_mode import ParallelMode
from energon.core import global_context as gpc
from colossalai.core import global_context as gpc
from colossalai.context import ParallelMode
from energon.utils import get_current_device


Expand Down
4 changes: 0 additions & 4 deletions energon/context/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1 @@
from .config import Config, ConfigException
from .parallel_context import ParallelContext
from .parallel_mode import ParallelMode
from .process_group_initializer import *
from .random import *
Loading

0 comments on commit bfab911

Please sign in to comment.