-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
163 lines (134 loc) · 6.2 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import torch
import numpy as np
from torchvision import datasets, transforms
import os
import random
from utils import (initialize_model,
cuda_checking,
training,
visualize_training_results
)
import shutil
import os
import argparse
import mlflow
import mlflow.pytorch
from datetime import date
from inception import *
from efficient import EfficientNet
today = str(date.today())
if __name__ == '__main__':
# mlflow.set_tracking_uri('http://127.0.0.1:5000') # set up connection
# mlflow.set_experiment('test-experiment') # set the experiment
# mlflow.pytorch.autolog()
torch.manual_seed(10)
np.random.seed(10)
random.seed(10)
parser = argparse.ArgumentParser()
# parser.add_argument("--model", help="resnet, efficient")
parser.add_argument("--pretrained", help="Transfer learning")
parser.add_argument("--model", help="efficient, inception")
args = parser.parse_args()
DEVICE = cuda_checking()
data_dir = os.path.join(os.getcwd(),'Dataset')
# get the classes from train file
num_classes = len(os.listdir(os.path.join(data_dir,'train')))
if args.pretrained == "true":
feature_extract=True
use_pretrained=True
elif args.pretrained == "false":
feature_extract=False
use_pretrained=False
# modelName = args.model
# model_ft, input_size = initialize_model(modelName, num_classes, feature_extract=feature_extract, use_pretrained=use_pretrained)
if args.model == "inception":
model_ft = Inception()
input_size = 224
modelName = 'Inception'
use_auxiliary = True
elif args.model =="efficient":
version="b3"
model_ft = EfficientNet(version=version, num_classes=num_classes)
modelName = 'EfficientNet'
input_size = 224
use_auxiliary = False
train_transform = transforms.Compose([
transforms.Resize((input_size,input_size)),
# transforms.RandomHorizontalFlip(),
# transforms.RandomVerticalFlip(),
# transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
# transforms.RandomRotation(degrees=(30, 70)),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
)
])
# the validation transforms
valid_transform = transforms.Compose([
transforms.Resize((input_size,input_size)),
transforms.ToTensor(),
transforms.Normalize(
mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5]
)
])
# initial all the transform and original images into tensor
image_datasets_train = datasets.ImageFolder(root = os.path.join(data_dir, 'train'),
transform = train_transform
)
image_datasets_valid = datasets.ImageFolder(root = os.path.join(data_dir, 'valid'),
transform = valid_transform
)
image_datasets_original_train = datasets.ImageFolder(os.path.join(data_dir, 'train'),transform=valid_transform)
image_datasets_original_valid = datasets.ImageFolder(os.path.join(data_dir, 'valid'),transform=valid_transform)
# combine the transform and original for augmenration
increased_dataset_train = torch.utils.data.ConcatDataset([image_datasets_train,image_datasets_original_train])
increased_dataset_valid = torch.utils.data.ConcatDataset([image_datasets_valid,image_datasets_original_valid])
# Send the model to GPU
model_ft = model_ft.to(DEVICE)
# parameter setup
num_epochs = 100
batch_size = 32
# Create data loaders for our datasets; shuffle for training, not for validation
training_loader = torch.utils.data.DataLoader(increased_dataset_train, batch_size=batch_size, shuffle=True, num_workers=0)
validation_loader = torch.utils.data.DataLoader(increased_dataset_valid, batch_size=batch_size, shuffle=False, num_workers=0)
with mlflow.start_run() as run:
mlflow.log_param('dataset', data_dir)
mlflow.log_param('model name', modelName)
mlflow.log_param('number of classes', num_classes)
mlflow.log_param('Batch size', batch_size)
mlflow.log_param('epochs', num_epochs)
# mlflow.log_param('feature extracted', feature_extract)
# mlflow.log_param('pre-trained', pre_trained)
model_training_results = training(model_ft, num_epochs,
training_loader, validation_loader,
DEVICE,use_auxiliary=use_auxiliary)
model_ft, train_loss_array, train_acc_array, val_loss_array, val_acc_array = model_training_results
min_loss = min(val_loss_array)
min_loss_epoch = val_loss_array.index(min_loss)
min_loss_accuracy = val_acc_array[min_loss_epoch]
visualize_training_results(train_loss_array,
val_loss_array,
train_acc_array,
val_acc_array,
num_epochs,
model_name=modelName,
batch_size=batch_size)
print("\nTraining results:")
print("\tMin val loss {:.4f} was achieved during epoch #{}".format(min_loss, min_loss_epoch + 1))
print("\tVal accuracy during min val loss is {:.4f}".format(min_loss_accuracy))
# move the images and information to particular file
destination = os.path.join(os.getcwd(),'Result',modelName)
if not os.path.exists(destination):
os.makedirs(destination)
# move checkpoint, and visualise_images
moved_file = ['checkpoint.pt', 'loss_n_accuracy.png']
for idx,name in enumerate(moved_file):
moved_path = os.path.join(destination,name)
original_path = os.path.join(os.getcwd(),name)
shutil.move(original_path,moved_path)
print("Moved checkpoint and Losses images")
save_model = r'C:\Users\kimwa\Desktop\MMU\Computer Intellegience\Project\models'
# mlflow.pytorch.log_model(model_ft,"models")
# mlflow.pytorch.save_model(model_ft,save_model+today+'/')