Implementing Sota CV, NLP, Speech and Multimodal Models with PyTorch
- Pytorch
- Lightning
git clone https://github.com/lemonmindyes/MiniDream.git
cd your_project
pip install -r requirements.txtpython train_mini_(model name).py # model name- BPE
- BYOL
- GPT
- MAE
- MobileNetV1 | MobileNetV2 | MobileNetV3
- MoCo
- ResNet
- SimCLR
- SimSiam
- U-Net
- VIT
- VQ-VAE
from mini_bpe import Config, BPE
config = Config()
config.vocab_size = 1000
bpe = BPE(config)
text = 'very long text'
# train
bpe.train(text)
# save
bpe.save('bpe')
# load
bpe.load('bpe')
# encode
ids = bpe.encode(text)
# decode
text = bpe.encode(ids)import torch
from mini_byol import Config, BYOL
config = Config()
config.dim = 256
model = BYOL(config)
img_q = torch.randn(2, 3, 224, 224)
img_k = torch.randn(2, 3, 224, 224)
qq, qk, zqt, zkt = model(img_q, img_k) # [2, 256]
loss1 = 2 - 2 * (qq * zkt).sum(dim = -1)
loss2 = 2 - 2 * (qk * zqt).sum(dim = -1)
loss = (loss1 + loss2).mean()import torch
from mini_mobilenet import Config, MobileNetV1, MobileNetV2
config = Config()
config.num_class = 1000
config.alpha = 1.0
config.rou = 1.0
model_v1 = MobileNetV1(config)
model_v2 = MobileNetV2(config)
img = torch.randn(2, 3, 224, 224)
out1 = model_v1(img) # [2, num_class]
out2 = model_v2(img) # [2, num_class]import torch
from mini_moco import Config, MoCo
config = Config()
config.K = 65536
model = MoCo(config)
img_q = torch.randn(2, 3, 224, 224)
img_k = torch.randn(2, 3, 224, 224)
logits, labels = model(img_q, img_k) # logits [2, 1 + K], labels [2]
print(logits.shape, labels.shape)import torch
from mini_resnet import Config, ResNet
config = Config()
# resnet18
config.n_layer = [2, 2, 2, 2]
config.resnet_name = 'resnet18'
config.num_class = 1000
resnet18 = ResNet(config)
# resnet34
config.n_layer = [3, 4, 6, 3]
config.resnet_name = 'resnet34'
resnet34 = ResNet(config)
# resnet50
config.n_layer = [3, 4, 6, 3]
config.resnet_name = 'resnet50'
resnet50 = ResNet(config)
# resnet101
config.n_layer = [3, 4, 23, 3]
config.resnet_name = 'resnet101'
resnet101 = ResNet(config)
# resnet152
config.n_layer = [3, 8, 36, 3]
config.resnet_name = 'resnet152'
resnet152 = ResNet(config)
img = torch.randn(1, 3, 224, 224)
out = resnet18(img) # out [1, num_class]import torch
from mini_simclr import Config, NTXentLoss, SimCLR
config = Config()
config.dim = 128
model = SimCLR(config)
loss_fn = NTXentLoss(config)
img_q = torch.randn(2, 3, 224, 224)
img_k = torch.randn(2, 3, 224, 224)
q, k = model(img_q, img_k) # q [2, dim], k [2, dim]
loss = loss_fn(q, k)import torch
from mini_simsiam import Config, NegCosineLoss, SimSiam
config = Config()
config.dim = 128
model = SimSiam(config)
loss_fn = NegCosineLoss(config)
img_q = torch.randn(2, 3, 224, 224)
img_k = torch.randn(2, 3, 224, 224)
z1, z2, p1, p2 = model(img_q, img_k) # [2, 128]
loss1 = loss_fn(p1, z2)
loss2 = loss_fn(p2, z1)
loss = (loss1 + loss2) / 2import torch
from mini_unet import Config, Unet
from mini_unet.utils import show
config = Config()
config.num_class = 61
model = Unet(config)
img = torch.randn(2, 3, 256, 256)
mask = torch.randn(2, 256, 256)
out = model(img) # out [2, 256, 256]
show(img[0], mask[0]) # visualize segmentation imgimport torch
from mini_vit import Config, VIT
config = Config()
config.num_class = 1000
model = VIT(config)
img = torch.randn(1, 3, 224, 224)
out = model(img) # out [1, num_class]import torch
from mini_vqvae import Config, VQVAE
config = Config()
model = VQVAE(config)
img = torch.randn(1, 3, 224, 224)
embedding_loss, x_hat, perplexity = model(img) # x_hat [1, 3, 224, 224]








