33 lines
1.1 KiB
Python
33 lines
1.1 KiB
Python
import torch
|
|
from transformers import PreTrainedModel
|
|
|
|
from ..utils import torch_gc
|
|
|
|
|
|
class CPUTextEncoderWrapper(PreTrainedModel):
|
|
def __init__(self, text_encoder, torch_dtype):
|
|
super().__init__(text_encoder.config)
|
|
self.config = text_encoder.config
|
|
# cpu not support float16
|
|
self.text_encoder = text_encoder.to(torch.device("cpu"), non_blocking=True)
|
|
self.text_encoder = self.text_encoder.to(torch.float32, non_blocking=True)
|
|
self.torch_dtype = torch_dtype
|
|
del text_encoder
|
|
torch_gc()
|
|
|
|
def __call__(self, x, **kwargs):
|
|
input_device = x.device
|
|
original_output = self.text_encoder(x.to(self.text_encoder.device), **kwargs)
|
|
for k, v in original_output.items():
|
|
if isinstance(v, tuple):
|
|
original_output[k] = [
|
|
v[i].to(input_device).to(self.torch_dtype) for i in range(len(v))
|
|
]
|
|
else:
|
|
original_output[k] = v.to(input_device).to(self.torch_dtype)
|
|
return original_output
|
|
|
|
@property
|
|
def dtype(self):
|
|
return self.torch_dtype
|