Skip to content

Commit 7c041ab

Browse files
authored
Refactor system architecture (#82)
1 parent 8917782 commit 7c041ab

40 files changed

+195
-447
lines changed
File renamed without changes.
File renamed without changes.

cacheflow/master/scheduler.py renamed to cacheflow/core/scheduler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import time
55
from typing import Any, Dict, List, Optional, Tuple
66

7-
from cacheflow.master.block_manager import BlockSpaceManager
8-
from cacheflow.master.policy import PolicyFactory
7+
from cacheflow.core.block_manager import BlockSpaceManager
8+
from cacheflow.core.policy import PolicyFactory
99
from cacheflow.sampling_params import SamplingParams
1010
from cacheflow.sequence import Sequence
1111
from cacheflow.sequence import SequenceGroup

cacheflow/master/server.py renamed to cacheflow/core/server.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,21 @@
88
except ImportError:
99
ray = None
1010

11+
from cacheflow.core.scheduler import Scheduler
12+
from cacheflow.frontend.simple_frontend import SimpleFrontend
1113
from cacheflow.logger import init_logger
12-
from cacheflow.master.scheduler import Scheduler
13-
from cacheflow.master.simple_frontend import SimpleFrontend
14-
from cacheflow.models import get_memory_analyzer
15-
from cacheflow.worker.controller import Controller, DeviceID
14+
from cacheflow.model_executor import get_memory_analyzer
1615
from cacheflow.sequence import SequenceGroup
1716
from cacheflow.sampling_params import SamplingParams
1817
from cacheflow.utils import get_gpu_memory, get_cpu_memory
18+
from cacheflow.worker.controller import Controller, DeviceID
1919

2020

2121
logger = init_logger(__name__)
2222

2323

2424
class Server:
25+
2526
def __init__(
2627
self,
2728
model: str,

cacheflow/http_frontend/fastapi_frontend.py renamed to cacheflow/frontend/fastapi_frontend.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
11
import argparse
22
import asyncio
3+
import json
34
import time
45
from typing import List, Dict, Optional
5-
import json
66

7-
import ray
8-
from transformers import AutoTokenizer
97
from fastapi import FastAPI, Request
108
from fastapi.responses import StreamingResponse
9+
import ray
10+
from transformers import AutoTokenizer
1111
import uvicorn
1212

13+
from cacheflow.core.server import (Server, add_server_arguments,
14+
process_server_arguments,
15+
initialize_cluster)
1316
from cacheflow.sampling_params import SamplingParams
1417
from cacheflow.sequence import Sequence, SequenceGroup
15-
from cacheflow.master.server import (Server, add_server_arguments,
16-
process_server_arguments,
17-
initialize_cluster)
18-
from cacheflow.worker.controller import DeviceID
1918
from cacheflow.utils import Counter, get_gpu_memory, get_cpu_memory
19+
from cacheflow.worker.controller import DeviceID
2020

2121
TIMEOUT_TO_PREVENT_DEADLOCK = 1 # seconds
2222
app = FastAPI()
File renamed without changes.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from cacheflow.model_executor.input_metadata import InputMetadata
2+
from cacheflow.model_executor.model_loader import get_model, get_memory_analyzer
3+
from cacheflow.model_executor.utils import set_random_seed
4+
5+
6+
__all__ = [
7+
"InputMetadata",
8+
"get_model",
9+
"get_memory_analyzer",
10+
"set_random_seed",
11+
]
File renamed without changes.
File renamed without changes.

cacheflow/models/attention.py renamed to cacheflow/model_executor/layers/attention.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from cacheflow import attention_ops
88
from cacheflow import cache_ops
99
from cacheflow import pos_encoding_ops
10-
from cacheflow.models import InputMetadata
10+
from cacheflow.model_executor.input_metadata import InputMetadata
1111

1212

1313
class GPTCacheFlowAttention(nn.Module):

0 commit comments

Comments
 (0)