11import torch
22
3- from vllm .multimodal .base import MultiModalInputs , NestedTensors
3+ from vllm .multimodal .base import MultiModalKwargs , NestedTensors
44
55
66def assert_nested_tensors_equal (expected : NestedTensors ,
@@ -13,40 +13,40 @@ def assert_nested_tensors_equal(expected: NestedTensors,
1313 assert_nested_tensors_equal (expected_item , actual_item )
1414
1515
16- def assert_multimodal_inputs_equal (expected : MultiModalInputs ,
17- actual : MultiModalInputs ):
16+ def assert_multimodal_inputs_equal (expected : MultiModalKwargs ,
17+ actual : MultiModalKwargs ):
1818 assert set (expected .keys ()) == set (actual .keys ())
1919 for key in expected :
2020 assert_nested_tensors_equal (expected [key ], actual [key ])
2121
2222
2323def test_multimodal_input_batch_single_tensor ():
2424 t = torch .rand ([1 , 2 ])
25- result = MultiModalInputs .batch ([{"image" : t }])
25+ result = MultiModalKwargs .batch ([{"image" : t }])
2626 assert_multimodal_inputs_equal (result , {"image" : t .unsqueeze (0 )})
2727
2828
2929def test_multimodal_input_batch_multiple_tensors ():
3030 a = torch .rand ([1 , 1 , 2 ])
3131 b = torch .rand ([1 , 1 , 2 ])
3232 c = torch .rand ([1 , 1 , 2 ])
33- result = MultiModalInputs .batch ([{"image" : a }, {"image" : b }, {"image" : c }])
33+ result = MultiModalKwargs .batch ([{"image" : a }, {"image" : b }, {"image" : c }])
3434 assert_multimodal_inputs_equal (result , {"image" : torch .stack ([a , b , c ])})
3535
3636
3737def test_multimodal_input_batch_multiple_heterogeneous_tensors ():
3838 a = torch .rand ([1 , 2 , 2 ])
3939 b = torch .rand ([1 , 3 , 2 ])
4040 c = torch .rand ([1 , 4 , 2 ])
41- result = MultiModalInputs .batch ([{"image" : a }, {"image" : b }, {"image" : c }])
41+ result = MultiModalKwargs .batch ([{"image" : a }, {"image" : b }, {"image" : c }])
4242 assert_multimodal_inputs_equal (result , {"image" : [a , b , c ]})
4343
4444
4545def test_multimodal_input_batch_nested_tensors ():
4646 a = torch .rand ([2 , 3 ])
4747 b = torch .rand ([2 , 3 ])
4848 c = torch .rand ([2 , 3 ])
49- result = MultiModalInputs .batch ([{
49+ result = MultiModalKwargs .batch ([{
5050 "image" : [a ]
5151 }, {
5252 "image" : [b ]
@@ -65,7 +65,7 @@ def test_multimodal_input_batch_heterogeneous_lists():
6565 a = torch .rand ([1 , 2 , 3 ])
6666 b = torch .rand ([1 , 2 , 3 ])
6767 c = torch .rand ([1 , 2 , 3 ])
68- result = MultiModalInputs .batch ([{"image" : [a , b ]}, {"image" : [c ]}])
68+ result = MultiModalKwargs .batch ([{"image" : [a , b ]}, {"image" : [c ]}])
6969 assert_multimodal_inputs_equal (
7070 result ,
7171 {"image" : [torch .stack ([a , b ]), c .unsqueeze (0 )]})
@@ -76,7 +76,7 @@ def test_multimodal_input_batch_multiple_batchable_lists():
7676 b = torch .rand ([1 , 2 , 3 ])
7777 c = torch .rand ([1 , 2 , 3 ])
7878 d = torch .rand ([1 , 2 , 3 ])
79- result = MultiModalInputs .batch ([{"image" : [a , b ]}, {"image" : [c , d ]}])
79+ result = MultiModalKwargs .batch ([{"image" : [a , b ]}, {"image" : [c , d ]}])
8080 assert_multimodal_inputs_equal (
8181 result ,
8282 {"image" : torch .stack ([torch .stack ([a , b ]),
@@ -88,8 +88,8 @@ def test_multimodal_input_batch_mixed_stacking_depths():
8888 b = torch .rand ([1 , 3 , 3 ])
8989 c = torch .rand ([1 , 4 , 3 ])
9090
91- result = MultiModalInputs .batch ([{"image" : [a , b ]}, {"image" : [c ]}])
91+ result = MultiModalKwargs .batch ([{"image" : [a , b ]}, {"image" : [c ]}])
9292 assert_multimodal_inputs_equal (result , {"image" : [[a , b ], c .unsqueeze (0 )]})
9393
94- result = MultiModalInputs .batch ([{"image" : [a ]}, {"image" : [b , c ]}])
94+ result = MultiModalKwargs .batch ([{"image" : [a ]}, {"image" : [b , c ]}])
9595 assert_multimodal_inputs_equal (result , {"image" : [a .unsqueeze (0 ), [b , c ]]})
0 commit comments