bob48523 / modified_wrn Goto Github PK

View Code? Open in Web Editor NEW

1.0 1.0 1.0 26.35 MB

License: Apache License 2.0

Python 100.00%

modified_wrn's People

Contributors

Stargazers

Watchers

Forkers

congmonkey

modified_wrn's Issues

fine-tune栗子

https://github.com/Spandan-Madan/Pytorch_fine_tuning_Tutorial

-- coding: utf-8 --

License: BSD

Author: Sasank Chilamkurthy

from future import print_function, division

import argparse

from models import *
import config_parallel as cf
from preprocess import *

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.backends.cudnn as cudnn

import torchvision
from torchvision import datasets, models, transforms

import matplotlib.pyplot as plt
import time
import copy
import os
import setproctitle
import numpy as np
import random

os.environ["CUDA_VISIBLE_DEVICES"] = "4"

parser = argparse.ArgumentParser(description='PyTorch VGG Training')
parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
parser.add_argument('--save')
parser.add_argument('--testOnly', '-t', action='store_true', help='Test mode with the saved model')
parser.add_argument('--check', '-c', action='store_true', help='Check the saved model')

args = parser.parse_args()
args.save = args.save or './parallelb.base'
setproctitle.setproctitle(args.save)

use_cuda = torch.cuda.is_available()
best_acc = 0 # best test accuracy
start_epoch = cf.start_epoch # start from epoch 0 or last checkpoint epoch
epochs = cf.num_epochs
######################################################################

Load Data

data_dir = {
'cifar': '/ssd/RookieProject/CIFAR100/',
'tiny': '/ssd/RookieProject/TinyImageNet',
'vgg': '/ssd/RookieProject/VGGFlowers',
}

test_transforms = {
x: transforms.Compose([
transforms.RandomSizedCrop(64),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(cf.mean[x], cf.std[x])
])
for x in ['cifar', 'tiny', 'vgg']
}

val_transforms = {
x: transforms.Compose([
transforms.Scale(72),
transforms.CenterCrop(64),
transforms.ToTensor(),
transforms.Normalize(cf.mean[x], cf.std[x])
])
for x in ['cifar', 'tiny', 'vgg']
}

trainset = {
'cifar': TxtFolder(os.path.join(data_dir['cifar']), 'list_train.txt', test_transforms['cifar']),
'tiny': datasets.ImageFolder(os.path.join(data_dir['tiny'], 'train'), test_transforms['tiny']),
'vgg': datasets.ImageFolder(os.path.join(data_dir['vgg'], 'train'), test_transforms['vgg']),
}

valset = {
'cifar': TxtFolder(os.path.join(data_dir['cifar']), 'list_val.txt', val_transforms['cifar']),
'tiny': datasets.ImageFolder(os.path.join(data_dir['tiny'], 'val'), val_transforms['tiny']),
'vgg': datasets.ImageFolder(os.path.join(data_dir['vgg'], 'val'), val_transforms['vgg']),
}

trainloader = {
x: torch.utils.data.DataLoader(trainset[x], batch_size=cf.batch_size[x],
shuffle=True, num_workers=4)
for x in ['cifar', 'tiny', 'vgg']
}

valloader = {
x: torch.utils.data.DataLoader(valset[x], batch_size=cf.batch_size[x],
shuffle=False, num_workers=4)
for x in ['cifar', 'tiny', 'vgg']
}

use_gpu = torch.cuda.is_available()

class AverageMeter(object):
"""Computes and stores the average and current value"""
def init(self):
self.reset()

def reset(self):
    self.val = 0
    self.avg = 0
    self.sum = 0
    self.count = 0

def update(self, val, n=1):
    self.val = val
    self.sum += val * n
    self.count += n
    self.avg = self.sum / self.count

if (args.check):
print('\n[Check Phase] : Model setup')
checkpoint = torch.load(os.path.join(args.save, 'cifar.t7'))
net = checkpoint['net']
if use_gpu:
model_ft = net.cuda()
model_ft = torch.nn.DataParallel(net, device_ids=[0])
cudnn.benchmark = True
CheckF = open(os.path.join(args.save, 'weight.txt'), 'w')
CheckF.write('{}\n'.format(net))

for m in net.modules():
    if isinstance(m, nn.Conv2d):
        CheckF.write('{}\n{}\n'.format(m, m.weight.data))
    elif isinstance(m, nn.Linear):
        CheckF.write('{}\n{}\n'.format(m, m.weight.data))

sys.exit(0)

######################################################################

Training the model

if (args.testOnly):
print('\n[Test Phase] : Model setup')
#assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!'
checkpoint = torch.load(os.path.join(args.save, 'cifar.t7'))
net = checkpoint['net']

classes = {'cifar': 100, 
           'tiny': 1000, 
           'vgg': 102}
if use_cuda:
    net.cuda()
   # net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count()))
   # cudnn.benchmark = True
net.eval()
trainW = {x: open(os.path.join(args.save, 'class_' + x + '.csv'), 'w') for x in ['cifar','tiny','vgg']}

for data_name in ['cifar','tiny','vgg']:
    test_loss = 0
    correct = 0
    total = 0   
    #print (classes[data_name])
    
    #compute_class(valloader[data_name], net, classes[data_name], args.save, data_name)
    class_correct = list(0. for i in range(classes[data_name]))
    class_total = list(0. for i in range(classes[data_name]))        
    for batch_idx, (inputs, targets) in enumerate(valloader[data_name]):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        target = targets
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        cifaro, tinyo, vggo = net(inputs, inputs, inputs)

        if data_name == 'cifar':
            outputs = cifaro
        elif data_name == 'tiny':
            outputs = tinyo
        elif data_name == 'vgg':
            outputs = vggo
            
        _, predicted = torch.max(outputs.data, 1)
        c = (predicted == target).squeeze()
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()
        
        for i in range(len(targets)):
            #print (len(targets))
            label = target[i]

            class_correct[label] += c[i]
            class_total[label] += 1
        
    for i in range(classes[data_name]):
        trainW[data_name].write('{},{},{},{}\n'.format(i, class_correct[i], class_total[i], class_correct[i]/class_total[i]))
        trainW[data_name].flush()
    trainW[data_name].close()

# Save checkpoint when best model
    acc = 100.*correct/total
    print("| Test " + data_name +" Result\tAcc@1: %.2f%%" %(acc))

sys.exit(0)

def train(net, criterion, epoch, trainF):
print('\nEpoch: %d' % epoch)
net.train()
train_loss = 0
top1 = {x: AverageMeter() for x in ['cifar','tiny','vgg']}
top5 = {x: AverageMeter() for x in ['cifar','tiny','vgg']}
losses = {x: AverageMeter() for x in ['cifar','tiny','vgg']}

correct = 0

total = 0
nProcessed = 0
k = {
 'cifar': 1,
 'tiny': 0.5,
 'vgg': 0.5,
}

N= len(trainloader['cifar'])
#nTrain = len(trainloader[data_name].dataset)
idx = [i for i in range(N)]
optimizer = optim.SGD(net.parameters(), lr=cf.adjust_rate(epoch), momentum=cf.momentum, weight_decay=1e-4)
dataiter = {x: iter(trainloader[x]) for x in ['cifar','tiny','vgg']}
for batch_idx in idx:
       
    #inputs, targets = {x: dataiter[x].next() for x in ['cifar','tiny','vgg']}
    inputs = {}
    targets = {}
    for x in ['cifar','tiny','vgg']:
        inputs[x], targets[x] = dataiter[x].next()
        if use_cuda:
            inputs[x], targets[x] = inputs[x].cuda(), targets[x].cuda()

inputs_cifar, targets_cifar = dataiter['cifar'].next()

inputs_tiny, targets_tiny = dataiter['tiny'].next()

inputs_vgg, targets_vgg = dataiter['vgg'].next()

if use_cuda:

inputs_cifar, targets_cifar = inputs_cifar.cuda(), targets_cifar.cuda()

inputs_tiny, targets_tiny = inputs_tiny.cuda(), targets_tiny.cuda()

inputs_vgg, targets_vgg = inputs_vgg.cuda(), targets_vgg.cuda()

    optimizer.zero_grad()
    target = {x: targets[x] for x in ['cifar','tiny','vgg']}
    inputs= {x: Variable(inputs[x]) for x in ['cifar','tiny','vgg']}
    targets= {x: Variable(targets[x]) for x in ['cifar','tiny','vgg']}

target_cifar = targets_cifar

target_tiny = targets_tiny

target_vgg = targets_vgg

inputs_cifar, targets_cifar = Variable(inputs_cifar), Variable(targets_cifar)

inputs_tiny, targets_tiny = Variable(inputs_tiny), Variable(targets_tiny)

inputs_vgg, targets_vgg = Variable(inputs_vgg), Variable(targets_vgg)

    cifaro, tinyo, vggo = net(inputs['cifar'], inputs['tiny'], inputs['vgg'])
    outputs = {
        'cifar': cifaro, 
        'tiny': tinyo,
        'vgg': vggo,
    }
        
    #make_graph.save('./t.dot', loss.creator); assert(False)
    loss = k['cifar']*criterion(outputs['cifar'], targets['cifar']) +\
    k['tiny']*criterion(outputs['tiny'], targets['tiny']) +\
    k['vgg']*criterion(outputs['vgg'], targets['vgg'])
           
    loss.backward()
    optimizer.step()
    
    for x in ['cifar','tiny','vgg']:
        _, predicted = torch.max(outputs[x].data, 1)
        total += targets[x].size(0)
        prec1, prec5 = accuracy(outputs[x].data, target[x], topk=(1, 5))
        top1[x].update(prec1[0], inputs[x].size(0))
        top5[x].update(prec5[0], inputs[x].size(0))
        losses[x].update(loss.data[0], inputs[x].size(0))

_, predicted = torch.max(outputs.data, 1)

total += targets.size(0)

prec1, prec5 = accuracy(outputs.data, target, topk=(1, 5))

top1['cifar'].update(prec1[0], inputs.size(0))

top5['cifar'].update(prec5[0], inputs.size(0))

losses['cifar'].update(loss.data[0], inputs.size(0))

        print('Train Epoch: [{}/{} ({:.0f}%)]\t'.format(batch_idx, N, 100. * batch_idx / N))
        print(x+' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1[x], top5=top5[x]))
    
for x in ['cifar','tiny','vgg']:    
    trainF[x].write('{},{},{},{}\n'.format(epoch, losses[x].avg, top1[x].avg, top5[x].avg))
    trainF[x].flush()

def test(net, epoch, testF):
global best_acc
net.eval()
top1 = {x: AverageMeter() for x in ['cifar','tiny','vgg']}
top5 = {x: AverageMeter() for x in ['cifar','tiny','vgg']}
losses = {x: AverageMeter() for x in ['cifar','tiny','vgg']}
k = {
'cifar': 0.33,
'tiny': 0.33,
'vgg': (1-0.66),
}
err_total = 0
for data_name in ['cifar','tiny','vgg']:
test_loss = 0
incorrect = 0
correct = 0
total = 0

    for batch_idx, (inputs, targets) in enumerate(valloader[data_name]):
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        target = targets
        inputs, targets = Variable(inputs, volatile=True), Variable(targets)
        cifaro, tinyo, vggo = net(inputs, inputs, inputs)
        
        if data_name == 'cifar':
            loss = criterion(cifaro, targets)
        elif data_name == 'tiny':
            loss = criterion(tinyo, targets)
        elif data_name == 'vgg':
            loss = criterion(vggo, targets) 

        if data_name == 'cifar':
            outputs = cifaro

        elif data_name == 'tiny':
            outputs = tinyo
        
        elif data_name == 'vgg':
            outputs = vggo

        test_loss += loss.data[0]
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()
        incorrect += predicted.ne(targets.data).cpu().sum()
        prec1, prec5 = accuracy(outputs.data, target, topk=(1, 5))
        top1[data_name].update(prec1[0], inputs.size(0))
        top5[data_name].update(prec5[0], inputs.size(0))

    nTotal = len(valloader[data_name].dataset)
    err  = 100.*incorrect/nTotal
    test_loss /= len(valloader[data_name])
    err_total += k[data_name]*err
    
    #print (err_total)

    print('\nTest set: Average loss: {:.4f}, Error: {}/{} ({:.0f}%)\n'.format(
        test_loss, incorrect, nTotal, err))
    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
          .format(top1=top1[data_name], top5=top5[data_name]))
    testF[data_name].write('{},{},{},{}\n'. format(epoch, test_loss, top1[data_name].avg, top5[data_name].avg))
    testF[data_name].flush()
    
return err_total

# Save checkpoint.

acc = 100.*correct/total

state = {

'net': net.module if use_cuda else net,

#'net': net,

'acc': acc,

'epoch': epoch,

}

torch.save(state, os.path.join(args.save, 'cifar.t7'))

if acc > best_acc:

torch.save(state, os.path.join(args.save, 'cifar.t7'))

best_acc = acc

######################################################################

Visualizing the model predictions

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Generic function to display predictions for a few images

def accuracy(output, target, topk=(1,)):
"""Computes the precision@k for the specified values of k"""
maxk = max(topk)
batch_size = target.size(0)

_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))

res = []
for k in topk:
    correct_k = correct[:k].view(-1).float().sum(0)
    res.append(correct_k.mul_(100.0 / batch_size))
return res

######################################################################

Finetuning the convnet

----------------------

Model

if args.resume:
# Load checkpoint.
print('==> Resuming from checkpoint..')
#assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
checkpoint = torch.load(os.path.join(args.save, 'cifar.t7'))
model_ft = checkpoint['net']
best_acc = checkpoint['acc']
start_epoch = checkpoint['epoch']
else:
print('==> Building new model..')
model_ft = Resnet_sen_share(PreActBlock, [2,2,2,2], 100, 1000, 102)
#net = ResNet34()
print ('Number of parames:{}'.format(
sum([p.data.nelement() for p in model_ft.parameters()])))

if use_gpu:
model_ft = model_ft.cuda()
model_ft = torch.nn.DataParallel(model_ft, device_ids=[0])
cudnn.benchmark = True

criterion = nn.CrossEntropyLoss()

######################################################################

Train and evaluate

^^^^^^^^^^^^^^^^^^

if args.resume:
trainF = {x: open(os.path.join(args.save, 'train_' + x + '.csv'), 'a')
for x in ['cifar', 'tiny', 'vgg']
}
testF = {x: open(os.path.join(args.save, 'test_' + x + '.csv'), 'a')
for x in ['cifar', 'tiny', 'vgg']
}
else:
trainF = {x: open(os.path.join(args.save, 'train_' + x + '.csv'), 'w')
for x in ['cifar', 'tiny', 'vgg']
}
testF = {x: open(os.path.join(args.save, 'test_' + x + '.csv'), 'w')
for x in ['cifar', 'tiny', 'vgg']
}

elapsed_time = 0
#epochs = 300
for epoch in range(start_epoch, start_epoch+epochs):
err = 0
start_time = time.time()
train(model_ft, criterion, epoch, trainF)
err = test(model_ft, epoch, testF)
epoch_time = time.time() - start_time
elapsed_time += epoch_time
print('| Elapsed time : %d:%02d:%02d' %(cf.get_hms(elapsed_time)))
# # Save checkpoint.

acc = 100.*correct/total

state = {
    'net': model_ft.module if use_cuda else model_ft,
    #'net': net,
    'acc': 100-err,
    'epoch': epoch,
}
print (100-err)
print (best_acc)
torch.save(state, os.path.join(args.save, 'cifar_latest.t7'))
if (100-err) > best_acc:
    torch.save(state, os.path.join(args.save, 'cifar.t7'))
    best_acc = 100-err

for x in ['cifar', 'tiny', 'vgg']:
trainF[x].close()
testF[x].close()

error

https://discuss.pytorch.org/t/how-to-perform-finetuning-in-pytorch/419/7

ignored_params = list(map(id, model.fc.parameters()))
base_params = filter(lambda p: id(p) not in ignored_params,
model.parameters())

optimizer = torch.optim.SGD([
{'params': base_params},
{'params': model.fc.parameters(), 'lr': opt.lr}
], lr=opt.lr*0.1, momentum=0.9)

TypeError: optimizer can only optimize Variables, but one of the params is int

if stride!=1 or bottom.shape[1] != num_filter*widen_factor: ('Top' object has no attribute 'shape')

-- coding: utf-8 --

"""
Created on Sat Aug 19 15:19:32 2017

@author: chenjiaxu
"""

from future import print_function

import math
from caffe import layers as L, params as P, to_proto
from caffe.proto import caffe_pb2
import caffe
#model = L.Eltwise(model, conv1,operation = 'SUM')
def bn_relu_conv(bottom, ks, nout, stride, pad, dropout):
batch_norm = L.BatchNorm(bottom, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
scale = L.Scale(batch_norm, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0))
relu = L.ReLU(scale, in_place=True)
conv = L.Convolution(relu, kernel_size=ks, stride=stride,
num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))
if dropout>0:
conv = L.Dropout(conv, dropout_ratio=dropout)
return conv

def preactbottleneck(bottom, ks, nout, stride, pad, groups=1):
batch_norm = L.BatchNorm(bottom, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
scale = L.Scale(batch_norm, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0))
relu = L.ReLU(scale, in_place=True)
conv = L.Convolution(relu, kernel_size=ks, stride=stride, group=groups,
num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))

return conv

def shortcut(bottom, nout, stride):
conv = L.Convolution(bottom, kernel_size=1, stride=stride,
num_output=nout, pad=0, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))

return conv

def add_layer(bottom, num_filter, stride):
widen_factor = 4

if stride!=1 or bottom.shape[1] != num_filter*widen_factor:
    x = shortcut(bottom, nout=num_filter*widen_factor, stride=stride)
else:
    x = bottom

conv = preactbottleneck(bottom, ks=1, nout=num_filter, stride=1, pad=0, groups=1)
conv = preactbottleneck(conv, ks=3, nout=num_filter, stride=stride, pad=1, groups=16)
conv = preactbottleneck(conv, ks=1, nout=num_filter*widen_factor, stride=1, pad=0, groups=16)

out = L.Eltwise(x, conv)
return out

def transition(bottom, num_filter, dropout):
conv = bn_relu_conv(bottom, ks=1, nout=num_filter, stride=1, pad=0, dropout=dropout)
pooling = L.Pooling(conv, pool=P.Pooling.AVE, kernel_size=2, stride=2)
return pooling

#change the line below to experiment with different setting
#depth -- must be 3n+4
#first_output -- channels before entering the first dense block, twice the growth_rate for DenseNet-BC
#growth_rate -- growth rate
#dropout -- set to 0 to disable dropout, non-zero number to set dropout rate
def resnetx(data_file, mode='train', batch_size=64, depth=[3,4,6,3], width=[32,64,128,256]):
data, label = L.Data(source=data_file, backend=P.Data.LMDB, batch_size=batch_size, ntop=2,
transform_param=dict(mean_file="/data1/zuotongchun/cifar100_mean/cifar100_train_all_mean.binaryproto"))

nchannels = 32
model = L.Convolution(data, kernel_size=3, stride=1, num_output=nchannels,
                    pad=1, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))
model = L.BatchNorm(model, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
# 1 x 1 convolution produce 4k feature-maps
# nchannels_B = 4 * growth_rate
#N1 = 6
#N2 = 12
#N3 = 32
#N4 = 32
strides = [1] + [1]*(depth[0]-1)
for stride in strides:        
    model = add_layer(model, width[0], stride)
    
strides = [2] + [1]*(depth[1]-1)
for stride in strides:        
    model = add_layer(model, width[1], stride)

strides = [2] + [1]*(depth[2]-1)
for stride in strides:        
    model = add_layer(model, width[2], stride)
    
strides = [2] + [1]*(depth[3]-1)
for stride in strides:        
    model = add_layer(model, width[3], stride)
    
model = L.Pooling(model, pool=P.Pooling.AVE, global_pooling=True)
model = L.InnerProduct(model, num_output=10, bias_term=True, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))
loss = L.SoftmaxWithLoss(model, label)
accuracy = L.Accuracy(model, label)
return to_proto(loss, accuracy)

def make_net():

with open('train-resnetx-BC-50.prototxt', 'w') as f:
    # change the path to your data. If it's not lmdb format, also change first line of densenet() function
    print(str(resnetx('/data1/zuotongchun/cifar10_train_all_lmdb', batch_size=64)), file=f)

with open('test-resnetx-BC-50.prototxt', 'w') as f:
    print(str(resnetx('/data1/zuotongchun/cifar10_test_lmdb', batch_size=50)), file=f)

def make_solver():
s = caffe_pb2.SolverParameter()
s.random_seed = 0xCAFFE

s.train_net = 'train-resnetx-BC-50.prototxt'
s.test_net.append('train-resnetx-BC-50.prototxt')
s.test_interval = 200
s.test_iter.append(200)

s.max_iter = 230000
s.type = 'Nesterov'
s.display = 100

s.base_lr = 0.1
s.momentum = 0.9
s.weight_decay = 1e-1

s.lr_policy='multistep'
s.gamma = 0.1
s.stepvalue.append(int(0.5 * s.max_iter))
s.stepvalue.append(int(0.75 * s.max_iter))
s.solver_mode = caffe_pb2.SolverParameter.GPU

# add by sdh
s.device_id = 6 # 2 or 3
s.snapshot = 1000
s.snapshot_prefix = 'snapshot/densenet-BC-169'

solver_path = 'solver.prototxt'
with open(solver_path, 'w') as f:
    f.write(str(s))
    f.write('\n')

if name == 'main':

make_net()
make_solver()

plot

https://github.com/leelabcnbc/cnnvis-pytorch

make_wrn_inception

from future import print_function

import math
from caffe import layers as L, params as P, to_proto
from caffe.proto import caffe_pb2
import caffe

def bn_relu_conv(bottom, ks, nout, stride, pad, dropout):
batch_norm = L.BatchNorm(bottom, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
scale = L.Scale(batch_norm, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0))
relu = L.ReLU(scale, in_place=True)
conv = L.Convolution(relu, kernel_size=ks, stride=stride,
num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))
if dropout>0:
conv = L.Dropout(conv, dropout_ratio=dropout)
return conv

def inception(bottom, stride, nout):

outplane1 = nout // 2
outplane2 = nout - outplane1 

conv1 = bn_relu_conv(bottom, ks=1, nout=64, stride=stride, pad=0, dropout=0)
conv1 = bn_relu_conv(conv1, ks=3, nout=outplane1, stride=1, pad=0, dropout=0)
branch1 = bn_relu_conv(conv1, ks=3, nout=outplane1, stride=1, pad=0, dropout=0)

conv2 = bn_relu_conv(bottom, ks=1, nout=48, stride=stride, pad=0, dropout=0)
branch2 = bn_relu_conv(conv2, ks=3, nout=outplane2, stride=1, pad=0, dropout=0)

concate = L.Concat(branch1, branch2, axis=1)

return concate

return conv

def add_layer(bottom, num_filter, stride, first):

if stride!=1 or first == 1:
    x = shortcut(bottom, nout=num_filter, stride=stride)
else:
    x = bottom

conv = inception(bottom, stride=stride, nout=num_filter)
   
out = L.Eltwise(x, conv)
return out

#change the line below to experiment with different setting
#depth -- must be 3n+4
#first_output -- channels before entering the first dense block, twice the growth_rate for DenseNet-BC
#growth_rate -- growth rate
#dropout -- set to 0 to disable dropout, non-zero number to set dropout rate
def wrnnet(data_file, mode='train', batch_size=64, depth=[3,4,6,3], widen_factor=4, dropout=0.3):
data, label = L.Data(source=data_file, backend=P.Data.LMDB, batch_size=batch_size, ntop=2,
transform_param=dict(mean_file="/data1/zuotongchun/cifar100_mean/cifar100_train_all_mean.binaryproto"))

nchannels = 64
k = widen_factor
width = [64*k, 128*k, 256*k]
model = L.Convolution(data, kernel_size=3, stride=1, num_output=nchannels,
                    pad=1, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))
model = L.BatchNorm(model, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
model = L.Scale(model, bias_term=True, in_place=True, filler=dict(value=1), bias_filler=dict(value=0))
model = L.ReLU(model, in_place=True)

# 1 x 1 convolution produce 4k feature-maps
# nchannels_B = 4 * growth_rate
#N = (depth-4)/3/2
#N1 = 6
#N2 = 12
#N3 = 32
#N4 = 32

strides = [1] + [1]*(depth[0]-1)
first = 1
for stride in strides:        
    model = add_layer(model, width[0], stride, first)
    first = 0

first = 1
strides = [2] + [1]*(depth[1]-1)
for stride in strides:        
    model = add_layer(model, width[1], stride, first)
    first = 0

first = 1
strides = [2] + [1]*(depth[2]-1)
for stride in strides:        
    model = add_layer(model, width[2], stride, first)
    first = 0
    		
model = L.Pooling(model, pool=P.Pooling.AVE, global_pooling=True)
model = L.InnerProduct(model, num_output=10, bias_term=True, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))
loss = L.SoftmaxWithLoss(model, label)
accuracy = L.Accuracy(model, label)
return to_proto(loss, accuracy)

def make_net():

with open('train-wrn-inception-250.prototxt', 'w') as f:
    # change the path to your data. If it's not lmdb format, also change first line of densenet() function
    print(str(wrnnet('/data1/zuotongchun/cifar100_train_all_lmdb', batch_size=64)), file=f)

with open('test-wrn-inception-250.prototxt', 'w') as f:
    print(str(wrnnet('/data1/zuotongchun/cifar100_test_lmdb', batch_size=50)), file=f)

def make_solver():
s = caffe_pb2.SolverParameter()
s.random_seed = 0xCAFFE

s.train_net = 'train-wrn-inception-250.prototxt'
s.test_net.append('train-wrn-inception-250.prototxt')
s.test_interval = 200
s.test_iter.append(200)

s.max_iter = 230000
s.type = 'Nesterov'
s.display = 100

s.base_lr = 0.1
s.momentum = 0.9
s.weight_decay = 1e-1

s.lr_policy='multistep'
s.gamma = 0.1
s.stepvalue.append(int(0.5 * s.max_iter))
s.stepvalue.append(int(0.75 * s.max_iter))
s.solver_mode = caffe_pb2.SolverParameter.GPU

# add by sdh
s.device_id = 6 # 2 or 3
s.snapshot = 1000
s.snapshot_prefix = 'snapshot/densenet-BC-169'

solver_path = 'solver.prototxt'
with open(solver_path, 'w') as f:
    f.write(str(s))
    f.write('\n')

if name == 'main':

make_net()
make_solver()

?

class PreActBottleneck_p(nn.Module):
'''Pre-activation version of the original Bottleneck module.'''
widen_factor = 4

def __init__(self, in_planes, planes, cardinality, stride=1):
    super(PreActBottleneck_p, self).__init__()
    
    self.inplanes= in_planes
    self.bn1 = nn.BatchNorm2d(in_planes)
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
    
    self.bn2 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
    
    self.bn3 = nn.BatchNorm2d(planes)
    self.conv3 = nn.Conv2d(planes, planes*self.widen_factor, kernel_size=1, bias=False)   
    
    self.bn4 = nn.BatchNorm2d(planes*self.widen_factor)

    if stride != 1:
        self.shortcut = nn.Sequential(
            nn.AvgPool2d(kernel_size=2, stride=2)             
        )
        
        

def forward(self, x):
    global FLOPS
    out = self.bn1(x)
    shortcut = self.shortcut(x) if hasattr(self, 'shortcut') else x
    
    out = self.conv1(out)  
    FLOPS += self.conv1.in_channels*self.conv1.out_channels*self.conv1.kernel_size[0]*self.conv1.kernel_size[1]*out.size(2)*out.size(3)
    
    out = self.conv2(F.relu(self.bn2(out)))
    temp = self.conv2.in_channels*self.conv2.out_channels*self.conv2.kernel_size[0]*self.conv2.kernel_size[1]*out.size(2)*out.size(3)
    FLOPS += temp // self.conv2.groups        

    out = self.conv3(F.relu(self.bn3(out)))
    FLOPS += self.conv3.in_channels*self.conv3.out_channels*self.conv3.kernel_size[0]*self.conv3.kernel_size[1]*out.size(2)*out.size(3) 
    
    out = self.bn4(out)
    out = torch.cat([shortcut[:,:self.inplanes,:,:]+out[:,:self.inplanes,:,:], out[:,self.inplanes:,:,:]], 1)
    return out

make_resnext

-- coding: utf-8 --

"""
Created on Sat Aug 19 15:19:32 2017

@author: chenjiaxu
"""

from future import print_function

return conv

return conv

def add_layer(bottom, num_filter, stride, first):
widen_factor = 4

if stride!=1 or first == 1:
    x = shortcut(bottom, nout=num_filter*widen_factor, stride=stride)
else:
    x = bottom

conv = preactbottleneck(bottom, ks=1, nout=num_filter, stride=1, pad=0, groups=1)
conv = preactbottleneck(conv, ks=3, nout=num_filter, stride=stride, pad=1, groups=16)
conv = preactbottleneck(conv, ks=1, nout=num_filter*widen_factor, stride=1, pad=0, groups=16)

out = L.Eltwise(x, conv)
return out

nchannels = 32
model = L.Convolution(data, kernel_size=3, stride=1, num_output=nchannels,
                    pad=1, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))
model = L.BatchNorm(model, in_place=False, param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)])
# 1 x 1 convolution produce 4k feature-maps
# nchannels_B = 4 * growth_rate
strides = [1] + [1]*(depth[0]-1)
first = 1
for stride in strides:        
    model = add_layer(model, width[0], stride, first)
    first = 0

first = 1 
strides = [2] + [1]*(depth[1]-1)
for stride in strides:        
    model = add_layer(model, width[1], stride, first)
    first = 0
    
    
first = 1
strides = [2] + [1]*(depth[2]-1)
for stride in strides:        
    model = add_layer(model, width[2], stride, first)
    first = 0
    
    
first = 1    
strides = [2] + [1]*(depth[3]-1)
for stride in strides:        
    model = add_layer(model, width[3], stride, first)
    first = 0
    
model = L.Pooling(model, pool=P.Pooling.AVE, global_pooling=True)
model = L.InnerProduct(model, num_output=10, bias_term=True, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))
loss = L.SoftmaxWithLoss(model, label)
accuracy = L.Accuracy(model, label)
return to_proto(loss, accuracy)

def make_net():

with open('train-resnetx-BC-50.prototxt', 'w') as f:
    # change the path to your data. If it's not lmdb format, also change first line of densenet() function
    print(str(resnetx('/data1/zuotongchun/cifar10_train_all_lmdb', batch_size=64)), file=f)

with open('test-resnetx-BC-50.prototxt', 'w') as f:
    print(str(resnetx('/data1/zuotongchun/cifar10_test_lmdb', batch_size=50)), file=f)

def make_solver():
s = caffe_pb2.SolverParameter()
s.random_seed = 0xCAFFE

s.train_net = 'train-resnetx-BC-50.prototxt'
s.test_net.append('train-resnetx-BC-50.prototxt')
s.test_interval = 200
s.test_iter.append(200)

s.max_iter = 230000
s.type = 'Nesterov'
s.display = 100

s.base_lr = 0.1
s.momentum = 0.9
s.weight_decay = 1e-1

s.lr_policy='multistep'
s.gamma = 0.1
s.stepvalue.append(int(0.5 * s.max_iter))
s.stepvalue.append(int(0.75 * s.max_iter))
s.solver_mode = caffe_pb2.SolverParameter.GPU

# add by sdh
s.device_id = 6 # 2 or 3
s.snapshot = 1000
s.snapshot_prefix = 'snapshot/densenet-BC-169'

solver_path = 'solver.prototxt'
with open(solver_path, 'w') as f:
    f.write(str(s))
    f.write('\n')

if name == 'main':

make_net()
make_solver()

.

https://github.com/szagoruyko/attention-transfer
Download link: https://www.dropbox.com/s/z092wmrgyqn4ys5/resnet-18-at-export.hkl?dl=0
wget https://s3.amazonaws.com/pytorch/h5models/resnet-34-export.hkl

bob48523 / modified_wrn Goto Github PK

modified_wrn's People

Contributors

Stargazers

Watchers

Forkers

modified_wrn's Issues

-- coding: utf-8 --

License: BSD

Author: Sasank Chilamkurthy

Load Data

Training the model

correct = 0

inputs_cifar, targets_cifar = dataiter['cifar'].next()

inputs_tiny, targets_tiny = dataiter['tiny'].next()

inputs_vgg, targets_vgg = dataiter['vgg'].next()

if use_cuda:

inputs_cifar, targets_cifar = inputs_cifar.cuda(), targets_cifar.cuda()

inputs_tiny, targets_tiny = inputs_tiny.cuda(), targets_tiny.cuda()

inputs_vgg, targets_vgg = inputs_vgg.cuda(), targets_vgg.cuda()

target_cifar = targets_cifar

target_tiny = targets_tiny

target_vgg = targets_vgg

inputs_cifar, targets_cifar = Variable(inputs_cifar), Variable(targets_cifar)

inputs_tiny, targets_tiny = Variable(inputs_tiny), Variable(targets_tiny)

inputs_vgg, targets_vgg = Variable(inputs_vgg), Variable(targets_vgg)

_, predicted = torch.max(outputs.data, 1)

total += targets.size(0)

prec1, prec5 = accuracy(outputs.data, target, topk=(1, 5))

top1['cifar'].update(prec1[0], inputs.size(0))

top5['cifar'].update(prec5[0], inputs.size(0))

losses['cifar'].update(loss.data[0], inputs.size(0))

# Save checkpoint.

acc = 100.*correct/total

state = {

'net': net.module if use_cuda else net,

#'net': net,

'acc': acc,

'epoch': epoch,

}

torch.save(state, os.path.join(args.save, 'cifar.t7'))

if acc > best_acc:

torch.save(state, os.path.join(args.save, 'cifar.t7'))

best_acc = acc

Visualizing the model predictions

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Generic function to display predictions for a few images

Finetuning the convnet

----------------------

Model

Train and evaluate

^^^^^^^^^^^^^^^^^^

acc = 100.*correct/total

-- coding: utf-8 --

-- coding: utf-8 --

Recommend Projects

Recommend Topics

Recommend Org