1
1
import torch
2
2
3
- from vllm .multimodal .base import MultiModalInputs , NestedTensors
3
+ from vllm .multimodal .base import MultiModalKwargs , NestedTensors
4
4
5
5
6
6
def assert_nested_tensors_equal (expected : NestedTensors ,
@@ -13,40 +13,40 @@ def assert_nested_tensors_equal(expected: NestedTensors,
13
13
assert_nested_tensors_equal (expected_item , actual_item )
14
14
15
15
16
- def assert_multimodal_inputs_equal (expected : MultiModalInputs ,
17
- actual : MultiModalInputs ):
16
+ def assert_multimodal_inputs_equal (expected : MultiModalKwargs ,
17
+ actual : MultiModalKwargs ):
18
18
assert set (expected .keys ()) == set (actual .keys ())
19
19
for key in expected :
20
20
assert_nested_tensors_equal (expected [key ], actual [key ])
21
21
22
22
23
23
def test_multimodal_input_batch_single_tensor ():
24
24
t = torch .rand ([1 , 2 ])
25
- result = MultiModalInputs .batch ([{"image" : t }])
25
+ result = MultiModalKwargs .batch ([{"image" : t }])
26
26
assert_multimodal_inputs_equal (result , {"image" : t .unsqueeze (0 )})
27
27
28
28
29
29
def test_multimodal_input_batch_multiple_tensors ():
30
30
a = torch .rand ([1 , 1 , 2 ])
31
31
b = torch .rand ([1 , 1 , 2 ])
32
32
c = torch .rand ([1 , 1 , 2 ])
33
- result = MultiModalInputs .batch ([{"image" : a }, {"image" : b }, {"image" : c }])
33
+ result = MultiModalKwargs .batch ([{"image" : a }, {"image" : b }, {"image" : c }])
34
34
assert_multimodal_inputs_equal (result , {"image" : torch .stack ([a , b , c ])})
35
35
36
36
37
37
def test_multimodal_input_batch_multiple_heterogeneous_tensors ():
38
38
a = torch .rand ([1 , 2 , 2 ])
39
39
b = torch .rand ([1 , 3 , 2 ])
40
40
c = torch .rand ([1 , 4 , 2 ])
41
- result = MultiModalInputs .batch ([{"image" : a }, {"image" : b }, {"image" : c }])
41
+ result = MultiModalKwargs .batch ([{"image" : a }, {"image" : b }, {"image" : c }])
42
42
assert_multimodal_inputs_equal (result , {"image" : [a , b , c ]})
43
43
44
44
45
45
def test_multimodal_input_batch_nested_tensors ():
46
46
a = torch .rand ([2 , 3 ])
47
47
b = torch .rand ([2 , 3 ])
48
48
c = torch .rand ([2 , 3 ])
49
- result = MultiModalInputs .batch ([{
49
+ result = MultiModalKwargs .batch ([{
50
50
"image" : [a ]
51
51
}, {
52
52
"image" : [b ]
@@ -65,7 +65,7 @@ def test_multimodal_input_batch_heterogeneous_lists():
65
65
a = torch .rand ([1 , 2 , 3 ])
66
66
b = torch .rand ([1 , 2 , 3 ])
67
67
c = torch .rand ([1 , 2 , 3 ])
68
- result = MultiModalInputs .batch ([{"image" : [a , b ]}, {"image" : [c ]}])
68
+ result = MultiModalKwargs .batch ([{"image" : [a , b ]}, {"image" : [c ]}])
69
69
assert_multimodal_inputs_equal (
70
70
result ,
71
71
{"image" : [torch .stack ([a , b ]), c .unsqueeze (0 )]})
@@ -76,7 +76,7 @@ def test_multimodal_input_batch_multiple_batchable_lists():
76
76
b = torch .rand ([1 , 2 , 3 ])
77
77
c = torch .rand ([1 , 2 , 3 ])
78
78
d = torch .rand ([1 , 2 , 3 ])
79
- result = MultiModalInputs .batch ([{"image" : [a , b ]}, {"image" : [c , d ]}])
79
+ result = MultiModalKwargs .batch ([{"image" : [a , b ]}, {"image" : [c , d ]}])
80
80
assert_multimodal_inputs_equal (
81
81
result ,
82
82
{"image" : torch .stack ([torch .stack ([a , b ]),
@@ -88,8 +88,8 @@ def test_multimodal_input_batch_mixed_stacking_depths():
88
88
b = torch .rand ([1 , 3 , 3 ])
89
89
c = torch .rand ([1 , 4 , 3 ])
90
90
91
- result = MultiModalInputs .batch ([{"image" : [a , b ]}, {"image" : [c ]}])
91
+ result = MultiModalKwargs .batch ([{"image" : [a , b ]}, {"image" : [c ]}])
92
92
assert_multimodal_inputs_equal (result , {"image" : [[a , b ], c .unsqueeze (0 )]})
93
93
94
- result = MultiModalInputs .batch ([{"image" : [a ]}, {"image" : [b , c ]}])
94
+ result = MultiModalKwargs .batch ([{"image" : [a ]}, {"image" : [b , c ]}])
95
95
assert_multimodal_inputs_equal (result , {"image" : [a .unsqueeze (0 ), [b , c ]]})
0 commit comments