Skip to content

To explore and run popular and interesting SOTA models across computer vision, NLP, audio, and multimodal tasks.

Notifications You must be signed in to change notification settings

lemonmindyes/MiniDream

Folders and files

NameName
Last commit message
Last commit date

Latest commit

Β 

History

36 Commits
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 
Β 

Repository files navigation

MiniDream

Implementing Sota CV, NLP, Speech and Multimodal Models with PyTorch

🌟 Features

  • Pytorch
  • Lightning

πŸ“¦ Installation

git clone https://github.com/lemonmindyes/MiniDream.git
cd your_project
pip install -r requirements.txt

πŸš€ Usage

python train_mini_(model name).py # model name

πŸ“š Model

BPE

from mini_bpe import Config, BPE
config = Config()
config.vocab_size = 1000
bpe = BPE(config)
text = 'very long text'
# train
bpe.train(text)
# save
bpe.save('bpe')
# load
bpe.load('bpe')
# encode
ids = bpe.encode(text)
# decode
text = bpe.encode(ids)

BYOL

import torch
from mini_byol import Config, BYOL

config = Config()
config.dim = 256
model = BYOL(config)
img_q = torch.randn(2, 3, 224, 224)
img_k = torch.randn(2, 3, 224, 224)
qq, qk, zqt, zkt = model(img_q, img_k) # [2, 256]
loss1 = 2 - 2 * (qq * zkt).sum(dim = -1)
loss2 = 2 - 2 * (qk * zqt).sum(dim = -1)
loss = (loss1 + loss2).mean()

MobileNet

import torch
from mini_mobilenet import Config, MobileNetV1, MobileNetV2
config = Config()
config.num_class = 1000
config.alpha = 1.0
config.rou = 1.0
model_v1 = MobileNetV1(config)
model_v2 = MobileNetV2(config)

img = torch.randn(2, 3, 224, 224)
out1 = model_v1(img) # [2, num_class]
out2 = model_v2(img) # [2, num_class]

MoCo

import torch
from mini_moco import Config, MoCo

config = Config()
config.K = 65536
model = MoCo(config)
img_q = torch.randn(2, 3, 224, 224)
img_k = torch.randn(2, 3, 224, 224)
logits, labels = model(img_q, img_k) # logits [2, 1 + K], labels [2]
print(logits.shape, labels.shape)

ResNet

import torch
from mini_resnet import Config, ResNet

config = Config()
# resnet18
config.n_layer = [2, 2, 2, 2]
config.resnet_name = 'resnet18'
config.num_class = 1000
resnet18 = ResNet(config)

# resnet34
config.n_layer = [3, 4, 6, 3]
config.resnet_name = 'resnet34'
resnet34 = ResNet(config)

# resnet50
config.n_layer = [3, 4, 6, 3]
config.resnet_name = 'resnet50'
resnet50 = ResNet(config)

# resnet101
config.n_layer = [3, 4, 23, 3]
config.resnet_name = 'resnet101'
resnet101 = ResNet(config)

# resnet152
config.n_layer = [3, 8, 36, 3]
config.resnet_name = 'resnet152'
resnet152 = ResNet(config)

img = torch.randn(1, 3, 224, 224)
out = resnet18(img) # out [1, num_class]

SimCLR

import torch
from mini_simclr import Config, NTXentLoss, SimCLR

config = Config()
config.dim = 128
model = SimCLR(config)
loss_fn = NTXentLoss(config)

img_q = torch.randn(2, 3, 224, 224)
img_k = torch.randn(2, 3, 224, 224)
q, k = model(img_q, img_k)  # q [2, dim], k [2, dim]
loss = loss_fn(q, k)

SimSiam

import torch
from mini_simsiam import Config, NegCosineLoss, SimSiam

config = Config()
config.dim = 128
model = SimSiam(config)
loss_fn = NegCosineLoss(config)

img_q = torch.randn(2, 3, 224, 224)
img_k = torch.randn(2, 3, 224, 224)
z1, z2, p1, p2 = model(img_q, img_k) # [2, 128]
loss1 = loss_fn(p1, z2)
loss2 = loss_fn(p2, z1)
loss = (loss1 + loss2) / 2

U-Net

import torch
from mini_unet import Config, Unet
from mini_unet.utils import show

config = Config()
config.num_class = 61
model = Unet(config)

img = torch.randn(2, 3, 256, 256)
mask = torch.randn(2, 256, 256)
out = model(img) # out [2, 256, 256]
show(img[0], mask[0]) # visualize segmentation img

VIT

import torch
from mini_vit import Config, VIT

config = Config()
config.num_class = 1000
model = VIT(config)

img = torch.randn(1, 3, 224, 224)
out = model(img) # out [1, num_class]

VQ-VAE

import torch
from mini_vqvae import Config, VQVAE

config = Config()
model = VQVAE(config)

img = torch.randn(1, 3, 224, 224)
embedding_loss, x_hat, perplexity = model(img) # x_hat [1, 3, 224, 224]

About

To explore and run popular and interesting SOTA models across computer vision, NLP, audio, and multimodal tasks.

Topics

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages