Giter Club home page Giter Club logo

modified_wrn's People

Contributors

bob48523 avatar

Stargazers

 avatar

Watchers

 avatar  avatar  avatar

Forkers

congmonkey

modified_wrn's Issues

parallel

-- coding: utf-8 --

License: BSD

Author: Sasank Chilamkurthy

from future import print_function, division

import argparse

from models import *
import config_parallel as cf
from preprocess import *

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.backends.cudnn as cudnn

import torchvision
from torchvision import datasets, models, transforms

import matplotlib.pyplot as plt
import time
import copy
import os
import setproctitle
import numpy as np
import random

os.environ["CUDA_VISIBLE_DEVICES"] = "4"

parser = argparse.ArgumentParser(description='PyTorch VGG Training')
parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
parser.add_argument('--save')
parser.add_argument('--testOnly', '-t', action='store_true', help='Test mode with the saved model')
parser.add_argument('--check', '-c', action='store_true', help='Check the saved model')

args = parser.parse_args()
args.save = args.save or './parallelb.base'
setproctitle.setproctitle(args.save)

use_cuda = torch.cuda.is_available()
best_acc = 0 # best test accuracy
start_epoch = cf.start_epoch # start from epoch 0 or last checkpoint epoch
epochs = cf.num_epochs
######################################################################

Load Data

data_dir = {
'cifar': '/ssd/RookieProject/CIFAR100/',
'tiny': '/ssd/RookieProject/TinyImageNet',
'vgg': '/ssd/RookieProject/VGGFlowers',
}

test_transforms = {
x: transforms.Compose([
transforms.RandomSizedCrop(64),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(cf.mean[x], cf.std[x])
])
for x in ['cifar', 'tiny', 'vgg']
}

val_transforms = {
x: transforms.Compose([
transforms.Scale(72),
transforms.CenterCrop(64),
transforms.ToTensor(),
transforms.Normalize(cf.mean[x], cf.std[x])
])
for x in ['cifar', 'tiny', 'vgg']
}

trainset = {
'cifar': TxtFolder(os.path.join(data_dir['cifar']), 'list_train.txt', test_transforms['cifar']),
'tiny': datasets.ImageFolder(os.path.join(data_dir['tiny'], 'train'), test_transforms['tiny']),
'vgg': datasets.ImageFolder(os.path.join(data_dir['vgg'], 'train'), test_transforms['vgg']),
}

valset = {
'cifar': TxtFolder(os.path.join(data_dir['cifar']), 'list_val.txt', val_transforms['cifar']),
'tiny': datasets.ImageFolder(os.path.join(data_dir['tiny'], 'val'), val_transforms['tiny']),
'vgg': datasets.ImageFolder(os.path.join(data_dir['vgg'], 'val'), val_transforms['vgg']),
}

trainloader = {
x: torch.utils.data.DataLoader(trainset[x], batch_size=cf.batch_size[x],
shuffle=True, num_workers=4)
for x in ['cifar', 'tiny', 'vgg']
}

valloader = {
x: torch.utils.data.DataLoader(valset[x], batch_size=cf.batch_size[x],
shuffle=False, num_workers=4)
for x in ['cifar', 'tiny', 'vgg']
}

use_gpu = torch.cuda.is_available()

class AverageMeter(object):
"""Computes and stores the average and current value"""
def init(self):
self.reset()

def reset(self):
    self.val = 0
    self.avg = 0
    self.sum = 0
    self.count = 0

def update(self, val, n=1):
    self.val = val
    self.sum += val * n
    self.count += n
    self.avg = self.sum / self.count

if (args.check):
print('\n[Check Phase] : Model setup')
checkpoint = torch.load(os.path.join(args.save, 'cifar.t7'))
net = checkpoint['net']
if use_gpu:
model_ft = net.cuda()
model_ft = torch.nn.DataParallel(net, device_ids=[0])
cudnn.benchmark = True
CheckF = open(os.path.join(args.save, 'weight.txt'), 'w')
CheckF.write('{}\n'.format(net))

for m in net.modules():
    if isinstance(m, nn.Conv2d):
        CheckF.write('{}\n{}\n'.format(m, m.weight.data))
    elif isinstance(m, nn.Linear):
        CheckF.write('{}\n{}\n'.format(m, m.weight.data))

sys.exit(0)

######################################################################

Training the model

if (args.testOnly):
print('\n[Test Phase] : Model setup')
#assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!'
checkpoint = torch.load(os.path.join(args.save, 'cifar.t7'))
net = checkpoint['net']

classes = {'cifar': 100, 
           'tiny': 1000, 
           'vgg': 102}
if use_cuda:
    net.cuda()
   # net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
   # cudnn.benchmark = True
net.eval()
trainW = {x: open(os.path.join(args.save, 'class_' + x + '.csv'), 'w') for x in ['cifar','tiny','vgg']}

for data_name in ['cifar','tiny','vgg']:
    test_loss = 0
    correct = 0
    total = 0   
    #print (classes[data_name])
    
    #compute_class(valloader[data_name], net, classes[data_name], args.save, data_name)
    class_correct = list(0. for i in range(classes[data_name]))
    class_total = list(0. for i in range(classes[data_name]))        
    for batch_idx, (inputs, targets) in enumerate(valloader[data_name]):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        target = targets
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        cifaro, tinyo, vggo = net(inputs, inputs, inputs)

        if data_name == 'cifar':
            outputs = cifaro
        elif data_name == 'tiny':
            outputs = tinyo
        elif data_name == 'vgg':
            outputs = vggo
            
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == target).squeeze()
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()
        
        for i in range(len(targets)):
            #print (len(targets))
            label = target[i]

            class_correct[label] += c[i]
            class_total[label] += 1
        
    for i in range(classes[data_name]):
        trainW[data_name].write('{},{},{},{}\n'.format(i, class_correct[i], class_total[i], class_correct[i]/class_total[i]))
        trainW[data_name].flush()
    trainW[data_name].close()

# Save checkpoint when best model
    acc = 100.*correct/total
    print("| Test " + data_name +" Result\tAcc@1: %.2f%%" %(acc))

sys.exit(0)

def train(net, criterion, epoch, trainF):
print('\nEpoch: %d' % epoch)
net.train()
train_loss = 0
top1 = {x: AverageMeter() for x in ['cifar','tiny','vgg']}
top5 = {x: AverageMeter() for x in ['cifar','tiny','vgg']}
losses = {x: AverageMeter() for x in ['cifar','tiny','vgg']}

correct = 0

total = 0
nProcessed = 0
k = {
 'cifar': 1,
 'tiny': 0.5,
 'vgg': 0.5,
}

N= len(trainloader['cifar'])
#nTrain = len(trainloader[data_name].dataset)
idx = [i for i in range(N)]
optimizer = optim.SGD(net.parameters(), lr=cf.adjust_rate(epoch), momentum=cf.momentum, weight_decay=1e-4)
dataiter = {x: iter(trainloader[x]) for x in ['cifar','tiny','vgg']}
for batch_idx in idx:
       
    #inputs, targets = {x: dataiter[x].next() for x in ['cifar','tiny','vgg']}
    inputs = {}
    targets = {}
    for x in ['cifar','tiny','vgg']:
        inputs[x], targets[x] = dataiter[x].next()
        if use_cuda:
            inputs[x], targets[x] = inputs[x].cuda(), targets[x].cuda()

inputs_cifar, targets_cifar = dataiter['cifar'].next()

inputs_tiny, targets_tiny = dataiter['tiny'].next()

inputs_vgg, targets_vgg = dataiter['vgg'].next()

if use_cuda:

inputs_cifar, targets_cifar = inputs_cifar.cuda(), targets_cifar.cuda()

inputs_tiny, targets_tiny = inputs_tiny.cuda(), targets_tiny.cuda()

inputs_vgg, targets_vgg = inputs_vgg.cuda(), targets_vgg.cuda()

    optimizer.zero_grad()
    target = {x: targets[x] for x in ['cifar','tiny','vgg']}
    inputs= {x: Variable(inputs[x]) for x in ['cifar','tiny','vgg']}
    targets= {x: Variable(targets[x]) for x in ['cifar','tiny','vgg']}

target_cifar = targets_cifar

target_tiny = targets_tiny

target_vgg = targets_vgg

inputs_cifar, targets_cifar = Variable(inputs_cifar), Variable(targets_cifar)

inputs_tiny, targets_tiny = Variable(inputs_tiny), Variable(targets_tiny)

inputs_vgg, targets_vgg = Variable(inputs_vgg), Variable(targets_vgg)

    cifaro, tinyo, vggo = net(inputs['cifar'], inputs['tiny'], inputs['vgg'])
    outputs = {
        'cifar': cifaro, 
        'tiny': tinyo,
        'vgg': vggo,
    }
        
    #make_graph.save('./t.dot', loss.creator); assert(False)
    loss = k['cifar']*criterion(outputs['cifar'], targets['cifar']) +\
    k['tiny']*criterion(outputs['tiny'], targets['tiny']) +\
    k['vgg']*criterion(outputs['vgg'], targets['vgg'])
           
    loss.backward()
    optimizer.step()
    
    for x in ['cifar','tiny','vgg']:
        _, predicted = torch.max(outputs[x].data, 1)
        total += targets[x].size(0)
        prec1, prec5 = accuracy(outputs[x].data, target[x], topk=(1, 5))
        top1[x].update(prec1[0], inputs[x].size(0))
        top5[x].update(prec5[0], inputs[x].size(0))
        losses[x].update(loss.data[0], inputs[x].size(0))

_, predicted = torch.max(outputs.data, 1)

total += targets.size(0)

prec1, prec5 = accuracy(outputs.data, target, topk=(1, 5))

top1['cifar'].update(prec1[0], inputs.size(0))

top5['cifar'].update(prec5[0], inputs.size(0))

losses['cifar'].update(loss.data[0], inputs.size(0))

        print('Train Epoch: [{}/{} ({:.0f}%)]\t'.format(batch_idx, N, 100. * batch_idx / N))
        print(x+' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1[x], top5=top5[x]))
    
for x in ['cifar','tiny','vgg']:    
    trainF[x].write('{},{},{},{}\n'.format(epoch, losses[x].avg, top1[x].avg, top5[x].avg))
    trainF[x].flush()

def test(net, epoch, testF):
global best_acc
net.eval()
top1 = {x: AverageMeter() for x in ['cifar','tiny','vgg']}
top5 = {x: AverageMeter() for x in ['cifar','tiny','vgg']}
losses = {x: AverageMeter() for x in ['cifar','tiny','vgg']}
k = {
'cifar': 0.33,
'tiny': 0.33,
'vgg': (1-0.66),
}
err_total = 0
for data_name in ['cifar','tiny','vgg']:
test_loss = 0
incorrect = 0
correct = 0
total = 0

    for batch_idx, (inputs, targets) in enumerate(valloader[data_name]):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        target = targets
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        cifaro, tinyo, vggo = net(inputs, inputs, inputs)
        
        if data_name == 'cifar':
            loss = criterion(cifaro, targets)
        elif data_name == 'tiny':
            loss = criterion(tinyo, targets)
        elif data_name == 'vgg':
            loss = criterion(vggo, targets) 

        if data_name == 'cifar':
            outputs = cifaro

        elif data_name == 'tiny':
            outputs = tinyo
        
        elif data_name == 'vgg':
            outputs = vggo

        test_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()
        incorrect += predicted.ne(targets.data).cpu().sum()
        prec1, prec5 = accuracy(outputs.data, target, topk=(1, 5))
        top1[data_name].update(prec1[0], inputs.size(0))
        top5[data_name].update(prec5[0], inputs.size(0))

    nTotal = len(valloader[data_name].dataset)
    err  = 100.*incorrect/nTotal
    test_loss /= len(valloader[data_name])
    err_total += k[data_name]*err
    
    #print (err_total)

    print('\nTest set: Average loss: {:.4f}, Error: {}/{} ({:.0f}%)\n'.format(
        test_loss, incorrect, nTotal, err))
    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1[data_name], top5=top5[data_name]))
    testF[data_name].write('{},{},{},{}\n'. format(epoch, test_loss, top1[data_name].avg, top5[data_name].avg))
    testF[data_name].flush()
    
return err_total

# Save checkpoint.

acc = 100.*correct/total

state = {

'net': net.module if use_cuda else net,

#'net': net,

'acc': acc,

'epoch': epoch,

}

torch.save(state, os.path.join(args.save, 'cifar.t7'))

if acc > best_acc:

torch.save(state, os.path.join(args.save, 'cifar.t7'))

best_acc = acc

######################################################################

Visualizing the model predictions

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Generic function to display predictions for a few images

def accuracy(output, target, topk=(1,)):
"""Computes the precision@k for the specified values of k"""
maxk = max(topk)
batch_size = target.size(0)

_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))

res = []
for k in topk:
    correct_k = correct[:k].view(-1).float().sum(0)
    res.append(correct_k.mul_(100.0 / batch_size))
return res

######################################################################

Finetuning the convnet

----------------------

Model

if args.resume:
# Load checkpoint.
print('==> Resuming from checkpoint..')
#assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
checkpoint = torch.load(os.path.join(args.save, 'cifar.t7'))
model_ft = checkpoint['net']
best_acc = checkpoint['acc']
start_epoch = checkpoint['epoch']
else:
print('==> Building new model..')
model_ft = Resnet_sen_share(PreActBlock, [2,2,2,2], 100, 1000, 102)
#net = ResNet34()
print ('Number of parames:{}'.format(
sum([p.data.nelement() for p in model_ft.parameters()])))

if use_gpu:
model_ft = model_ft.cuda()
model_ft = torch.nn.DataParallel(model_ft, device_ids=[0])
cudnn.benchmark = True

criterion = nn.CrossEntropyLoss()

######################################################################

Train and evaluate

^^^^^^^^^^^^^^^^^^

if args.resume:
trainF = {x: open(os.path.join(args.save, 'train_' + x + '.csv'), 'a')
for x in ['cifar', 'tiny', 'vgg']
}
testF = {x: open(os.path.join(args.save, 'test_' + x + '.csv'), 'a')
for x in ['cifar', 'tiny', 'vgg']
}
else:
trainF = {x: open(os.path.join(args.save, 'train_' + x + '.csv'), 'w')
for x in ['cifar', 'tiny', 'vgg']
}
testF = {x: open(os.path.join(args.save, 'test_' + x + '.csv'), 'w')
for x in ['cifar', 'tiny', 'vgg']
}

elapsed_time = 0
#epochs = 300
for epoch in range(start_epoch, start_epoch+epochs):
err = 0
start_time = time.time()
train(model_ft, criterion, epoch, trainF)
err = test(model_ft, epoch, testF)
epoch_time = time.time() - start_time
elapsed_time += epoch_time
print('| Elapsed time : %d:%02d:%02d' %(cf.get_hms(elapsed_time)))
# # Save checkpoint.

acc = 100.*correct/total

state = {
    'net': model_ft.module if use_cuda else model_ft,
    #'net': net,
    'acc': 100-err,
    'epoch': epoch,
}
print (100-err)
print (best_acc)
torch.save(state, os.path.join(args.save, 'cifar_latest.t7'))
if (100-err) > best_acc:
    torch.save(state, os.path.join(args.save, 'cifar.t7'))
    best_acc = 100-err 

for x in ['cifar', 'tiny', 'vgg']:
trainF[x].close()
testF[x].close()

error

https://discuss.pytorch.org/t/how-to-perform-finetuning-in-pytorch/419/7

ignored_params = list(map(id, model.fc.parameters()))
base_params = filter(lambda p: id(p) not in ignored_params,
model.parameters())

optimizer = torch.optim.SGD([
{'params': base_params},
{'params': model.fc.parameters(), 'lr': opt.lr}
], lr=opt.lr*0.1, momentum=0.9)

TypeError: optimizer can only optimize Variables, but one of the params is int

if stride!=1 or bottom.shape[1] != num_filter*widen_factor: ('Top' object has no attribute 'shape')

-- coding: utf-8 --

"""
Created on Sat Aug 19 15:19:32 2017

@author: chenjiaxu
"""

from future import print_function

import math
from caffe import layers as L, params as P, to_proto
from caffe.proto import caffe_pb2
import caffe
#model = L.Eltwise(model, conv1,operation = 'SUM')
def bn_relu_conv(bottom, ks, nout, stride, pad, dropout):
batch_norm = L.BatchNorm(bottom, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
scale = L.Scale(batch_norm, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0))
relu = L.ReLU(scale, in_place=True)
conv = L.Convolution(relu, kernel_size=ks, stride=stride,
num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))
if dropout>0:
conv = L.Dropout(conv, dropout_ratio=dropout)
return conv

def preactbottleneck(bottom, ks, nout, stride, pad, groups=1):
batch_norm = L.BatchNorm(bottom, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
scale = L.Scale(batch_norm, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0))
relu = L.ReLU(scale, in_place=True)
conv = L.Convolution(relu, kernel_size=ks, stride=stride, group=groups,
num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))

return conv

def shortcut(bottom, nout, stride):
conv = L.Convolution(bottom, kernel_size=1, stride=stride,
num_output=nout, pad=0, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))

return conv

def add_layer(bottom, num_filter, stride):
widen_factor = 4

if stride!=1 or bottom.shape[1] != num_filter*widen_factor:
    x = shortcut(bottom, nout=num_filter*widen_factor, stride=stride)
else:
    x = bottom

conv = preactbottleneck(bottom, ks=1, nout=num_filter, stride=1, pad=0, groups=1)
conv = preactbottleneck(conv, ks=3, nout=num_filter, stride=stride, pad=1, groups=16)
conv = preactbottleneck(conv, ks=1, nout=num_filter*widen_factor, stride=1, pad=0, groups=16)

out = L.Eltwise(x, conv)
return out

def transition(bottom, num_filter, dropout):
conv = bn_relu_conv(bottom, ks=1, nout=num_filter, stride=1, pad=0, dropout=dropout)
pooling = L.Pooling(conv, pool=P.Pooling.AVE, kernel_size=2, stride=2)
return pooling

#change the line below to experiment with different setting
#depth -- must be 3n+4
#first_output -- channels before entering the first dense block, twice the growth_rate for DenseNet-BC
#growth_rate -- growth rate
#dropout -- set to 0 to disable dropout, non-zero number to set dropout rate
def resnetx(data_file, mode='train', batch_size=64, depth=[3,4,6,3], width=[32,64,128,256]):
data, label = L.Data(source=data_file, backend=P.Data.LMDB, batch_size=batch_size, ntop=2,
transform_param=dict(mean_file="/data1/zuotongchun/cifar100_mean/cifar100_train_all_mean.binaryproto"))

nchannels = 32
model = L.Convolution(data, kernel_size=3, stride=1, num_output=nchannels,
                    pad=1, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))
model = L.BatchNorm(model, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
# 1 x 1 convolution produce 4k feature-maps
# nchannels_B = 4 * growth_rate
#N1 = 6
#N2 = 12
#N3 = 32
#N4 = 32
strides = [1] + [1]*(depth[0]-1)
for stride in strides:        
    model = add_layer(model, width[0], stride)
    
strides = [2] + [1]*(depth[1]-1)
for stride in strides:        
    model = add_layer(model, width[1], stride)

strides = [2] + [1]*(depth[2]-1)
for stride in strides:        
    model = add_layer(model, width[2], stride)
    
strides = [2] + [1]*(depth[3]-1)
for stride in strides:        
    model = add_layer(model, width[3], stride)
    
model = L.Pooling(model, pool=P.Pooling.AVE, global_pooling=True)
model = L.InnerProduct(model, num_output=10, bias_term=True, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))
loss = L.SoftmaxWithLoss(model, label)
accuracy = L.Accuracy(model, label)
return to_proto(loss, accuracy)

def make_net():

with open('train-resnetx-BC-50.prototxt', 'w') as f:
    # change the path to your data. If it's not lmdb format, also change first line of densenet() function
    print(str(resnetx('/data1/zuotongchun/cifar10_train_all_lmdb', batch_size=64)), file=f)

with open('test-resnetx-BC-50.prototxt', 'w') as f:
    print(str(resnetx('/data1/zuotongchun/cifar10_test_lmdb', batch_size=50)), file=f)

def make_solver():
s = caffe_pb2.SolverParameter()
s.random_seed = 0xCAFFE

s.train_net = 'train-resnetx-BC-50.prototxt'
s.test_net.append('train-resnetx-BC-50.prototxt')
s.test_interval = 200
s.test_iter.append(200)

s.max_iter = 230000
s.type = 'Nesterov'
s.display = 100

s.base_lr = 0.1
s.momentum = 0.9
s.weight_decay = 1e-1

s.lr_policy='multistep'
s.gamma = 0.1
s.stepvalue.append(int(0.5 * s.max_iter))
s.stepvalue.append(int(0.75 * s.max_iter))
s.solver_mode = caffe_pb2.SolverParameter.GPU

# add by sdh
s.device_id = 6 # 2 or 3
s.snapshot = 1000
s.snapshot_prefix = 'snapshot/densenet-BC-169'

solver_path = 'solver.prototxt'
with open(solver_path, 'w') as f:
    f.write(str(s))
    f.write('\n')

if name == 'main':

make_net()
make_solver()

make_wrn_inception

from future import print_function

import math
from caffe import layers as L, params as P, to_proto
from caffe.proto import caffe_pb2
import caffe

def bn_relu_conv(bottom, ks, nout, stride, pad, dropout):
batch_norm = L.BatchNorm(bottom, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
scale = L.Scale(batch_norm, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0))
relu = L.ReLU(scale, in_place=True)
conv = L.Convolution(relu, kernel_size=ks, stride=stride,
num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))
if dropout>0:
conv = L.Dropout(conv, dropout_ratio=dropout)
return conv

def inception(bottom, stride, nout):

outplane1 = nout // 2
outplane2 = nout - outplane1 

conv1 = bn_relu_conv(bottom, ks=1, nout=64, stride=stride, pad=0, dropout=0)
conv1 = bn_relu_conv(conv1, ks=3, nout=outplane1, stride=1, pad=0, dropout=0)
branch1 = bn_relu_conv(conv1, ks=3, nout=outplane1, stride=1, pad=0, dropout=0)

conv2 = bn_relu_conv(bottom, ks=1, nout=48, stride=stride, pad=0, dropout=0)
branch2 = bn_relu_conv(conv2, ks=3, nout=outplane2, stride=1, pad=0, dropout=0)

concate = L.Concat(branch1, branch2, axis=1)

return concate

def shortcut(bottom, nout, stride):
conv = L.Convolution(bottom, kernel_size=1, stride=stride,
num_output=nout, pad=0, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))

return conv

def add_layer(bottom, num_filter, stride, first):

if stride!=1 or first == 1:
    x = shortcut(bottom, nout=num_filter, stride=stride)
else:
    x = bottom

conv = inception(bottom, stride=stride, nout=num_filter)
   
out = L.Eltwise(x, conv)
return out

def transition(bottom, num_filter, dropout):
conv = bn_relu_conv(bottom, ks=1, nout=num_filter, stride=1, pad=0, dropout=dropout)
pooling = L.Pooling(conv, pool=P.Pooling.AVE, kernel_size=2, stride=2)
return pooling

#change the line below to experiment with different setting
#depth -- must be 3n+4
#first_output -- channels before entering the first dense block, twice the growth_rate for DenseNet-BC
#growth_rate -- growth rate
#dropout -- set to 0 to disable dropout, non-zero number to set dropout rate
def wrnnet(data_file, mode='train', batch_size=64, depth=[3,4,6,3], widen_factor=4, dropout=0.3):
data, label = L.Data(source=data_file, backend=P.Data.LMDB, batch_size=batch_size, ntop=2,
transform_param=dict(mean_file="/data1/zuotongchun/cifar100_mean/cifar100_train_all_mean.binaryproto"))

nchannels = 64
k = widen_factor
width = [64*k, 128*k, 256*k]
model = L.Convolution(data, kernel_size=3, stride=1, num_output=nchannels,
                    pad=1, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))
model = L.BatchNorm(model, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
model = L.Scale(model, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0))
model = L.ReLU(model, in_place=True)

# 1 x 1 convolution produce 4k feature-maps
# nchannels_B = 4 * growth_rate
#N = (depth-4)/3/2
#N1 = 6
#N2 = 12
#N3 = 32
#N4 = 32

strides = [1] + [1]*(depth[0]-1)
first = 1
for stride in strides:        
    model = add_layer(model, width[0], stride, first)
    first = 0

first = 1
strides = [2] + [1]*(depth[1]-1)
for stride in strides:        
    model = add_layer(model, width[1], stride, first)
    first = 0

first = 1
strides = [2] + [1]*(depth[2]-1)
for stride in strides:        
    model = add_layer(model, width[2], stride, first)
    first = 0
    		
model = L.Pooling(model, pool=P.Pooling.AVE, global_pooling=True)
model = L.InnerProduct(model, num_output=10, bias_term=True, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))
loss = L.SoftmaxWithLoss(model, label)
accuracy = L.Accuracy(model, label)
return to_proto(loss, accuracy)

def make_net():

with open('train-wrn-inception-250.prototxt', 'w') as f:
    # change the path to your data. If it's not lmdb format, also change first line of densenet() function
    print(str(wrnnet('/data1/zuotongchun/cifar100_train_all_lmdb', batch_size=64)), file=f)

with open('test-wrn-inception-250.prototxt', 'w') as f:
    print(str(wrnnet('/data1/zuotongchun/cifar100_test_lmdb', batch_size=50)), file=f)

def make_solver():
s = caffe_pb2.SolverParameter()
s.random_seed = 0xCAFFE

s.train_net = 'train-wrn-inception-250.prototxt'
s.test_net.append('train-wrn-inception-250.prototxt')
s.test_interval = 200
s.test_iter.append(200)

s.max_iter = 230000
s.type = 'Nesterov'
s.display = 100

s.base_lr = 0.1
s.momentum = 0.9
s.weight_decay = 1e-1

s.lr_policy='multistep'
s.gamma = 0.1
s.stepvalue.append(int(0.5 * s.max_iter))
s.stepvalue.append(int(0.75 * s.max_iter))
s.solver_mode = caffe_pb2.SolverParameter.GPU

# add by sdh
s.device_id = 6 # 2 or 3
s.snapshot = 1000
s.snapshot_prefix = 'snapshot/densenet-BC-169'

solver_path = 'solver.prototxt'
with open(solver_path, 'w') as f:
    f.write(str(s))
    f.write('\n')

if name == 'main':

make_net()
make_solver()

?

class PreActBottleneck_p(nn.Module):
'''Pre-activation version of the original Bottleneck module.'''
widen_factor = 4

def __init__(self, in_planes, planes, cardinality, stride=1):
    super(PreActBottleneck_p, self).__init__()
    
    self.inplanes= in_planes
    self.bn1 = nn.BatchNorm2d(in_planes)
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
    
    self.bn2 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
    
    self.bn3 = nn.BatchNorm2d(planes)
    self.conv3 = nn.Conv2d(planes, planes*self.widen_factor, kernel_size=1, bias=False)   
    
    self.bn4 = nn.BatchNorm2d(planes*self.widen_factor)

    if stride != 1:
        self.shortcut = nn.Sequential(
            nn.AvgPool2d(kernel_size=2, stride=2)             
        )
        
        

def forward(self, x):
    global FLOPS
    out = self.bn1(x)
    shortcut = self.shortcut(x) if hasattr(self, 'shortcut') else x
    
    out = self.conv1(out)  
    FLOPS += self.conv1.in_channels*self.conv1.out_channels*self.conv1.kernel_size[0]*self.conv1.kernel_size[1]*out.size(2)*out.size(3)
    
    out = self.conv2(F.relu(self.bn2(out)))
    temp = self.conv2.in_channels*self.conv2.out_channels*self.conv2.kernel_size[0]*self.conv2.kernel_size[1]*out.size(2)*out.size(3)
    FLOPS += temp // self.conv2.groups        

    out = self.conv3(F.relu(self.bn3(out)))
    FLOPS += self.conv3.in_channels*self.conv3.out_channels*self.conv3.kernel_size[0]*self.conv3.kernel_size[1]*out.size(2)*out.size(3) 
    
    out = self.bn4(out)
    out = torch.cat([shortcut[:,:self.inplanes,:,:]+out[:,:self.inplanes,:,:], out[:,self.inplanes:,:,:]], 1)
    return out

make_resnext

-- coding: utf-8 --

"""
Created on Sat Aug 19 15:19:32 2017

@author: chenjiaxu
"""

from future import print_function

import math
from caffe import layers as L, params as P, to_proto
from caffe.proto import caffe_pb2
import caffe
#model = L.Eltwise(model, conv1,operation = 'SUM')
def bn_relu_conv(bottom, ks, nout, stride, pad, dropout):
batch_norm = L.BatchNorm(bottom, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
scale = L.Scale(batch_norm, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0))
relu = L.ReLU(scale, in_place=True)
conv = L.Convolution(relu, kernel_size=ks, stride=stride,
num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))
if dropout>0:
conv = L.Dropout(conv, dropout_ratio=dropout)
return conv

def preactbottleneck(bottom, ks, nout, stride, pad, groups=1):
batch_norm = L.BatchNorm(bottom, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
scale = L.Scale(batch_norm, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0))
relu = L.ReLU(scale, in_place=True)
conv = L.Convolution(relu, kernel_size=ks, stride=stride, group=groups,
num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))

return conv

def shortcut(bottom, nout, stride):
conv = L.Convolution(bottom, kernel_size=1, stride=stride,
num_output=nout, pad=0, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))

return conv

def add_layer(bottom, num_filter, stride, first):
widen_factor = 4

if stride!=1 or first == 1:
    x = shortcut(bottom, nout=num_filter*widen_factor, stride=stride)
else:
    x = bottom

conv = preactbottleneck(bottom, ks=1, nout=num_filter, stride=1, pad=0, groups=1)
conv = preactbottleneck(conv, ks=3, nout=num_filter, stride=stride, pad=1, groups=16)
conv = preactbottleneck(conv, ks=1, nout=num_filter*widen_factor, stride=1, pad=0, groups=16)

out = L.Eltwise(x, conv)
return out

def transition(bottom, num_filter, dropout):
conv = bn_relu_conv(bottom, ks=1, nout=num_filter, stride=1, pad=0, dropout=dropout)
pooling = L.Pooling(conv, pool=P.Pooling.AVE, kernel_size=2, stride=2)
return pooling

#change the line below to experiment with different setting
#depth -- must be 3n+4
#first_output -- channels before entering the first dense block, twice the growth_rate for DenseNet-BC
#growth_rate -- growth rate
#dropout -- set to 0 to disable dropout, non-zero number to set dropout rate
def resnetx(data_file, mode='train', batch_size=64, depth=[3,4,6,3], width=[32,64,128,256]):
data, label = L.Data(source=data_file, backend=P.Data.LMDB, batch_size=batch_size, ntop=2,
transform_param=dict(mean_file="/data1/zuotongchun/cifar100_mean/cifar100_train_all_mean.binaryproto"))

nchannels = 32
model = L.Convolution(data, kernel_size=3, stride=1, num_output=nchannels,
                    pad=1, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))
model = L.BatchNorm(model, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
# 1 x 1 convolution produce 4k feature-maps
# nchannels_B = 4 * growth_rate
strides = [1] + [1]*(depth[0]-1)
first = 1
for stride in strides:        
    model = add_layer(model, width[0], stride, first)
    first = 0

first = 1 
strides = [2] + [1]*(depth[1]-1)
for stride in strides:        
    model = add_layer(model, width[1], stride, first)
    first = 0
    
    
first = 1
strides = [2] + [1]*(depth[2]-1)
for stride in strides:        
    model = add_layer(model, width[2], stride, first)
    first = 0
    
    
first = 1    
strides = [2] + [1]*(depth[3]-1)
for stride in strides:        
    model = add_layer(model, width[3], stride, first)
    first = 0
    
model = L.Pooling(model, pool=P.Pooling.AVE, global_pooling=True)
model = L.InnerProduct(model, num_output=10, bias_term=True, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))
loss = L.SoftmaxWithLoss(model, label)
accuracy = L.Accuracy(model, label)
return to_proto(loss, accuracy)

def make_net():

with open('train-resnetx-BC-50.prototxt', 'w') as f:
    # change the path to your data. If it's not lmdb format, also change first line of densenet() function
    print(str(resnetx('/data1/zuotongchun/cifar10_train_all_lmdb', batch_size=64)), file=f)

with open('test-resnetx-BC-50.prototxt', 'w') as f:
    print(str(resnetx('/data1/zuotongchun/cifar10_test_lmdb', batch_size=50)), file=f)

def make_solver():
s = caffe_pb2.SolverParameter()
s.random_seed = 0xCAFFE

s.train_net = 'train-resnetx-BC-50.prototxt'
s.test_net.append('train-resnetx-BC-50.prototxt')
s.test_interval = 200
s.test_iter.append(200)

s.max_iter = 230000
s.type = 'Nesterov'
s.display = 100

s.base_lr = 0.1
s.momentum = 0.9
s.weight_decay = 1e-1

s.lr_policy='multistep'
s.gamma = 0.1
s.stepvalue.append(int(0.5 * s.max_iter))
s.stepvalue.append(int(0.75 * s.max_iter))
s.solver_mode = caffe_pb2.SolverParameter.GPU

# add by sdh
s.device_id = 6 # 2 or 3
s.snapshot = 1000
s.snapshot_prefix = 'snapshot/densenet-BC-169'

solver_path = 'solver.prototxt'
with open(solver_path, 'w') as f:
    f.write(str(s))
    f.write('\n')

if name == 'main':

make_net()
make_solver()

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.