Skip to content

Commit 572c319

Browse files
committed
models: Add a CRNN
1 parent 6df2beb commit 572c319

File tree

1 file changed

+95
-0
lines changed

1 file changed

+95
-0
lines changed

microesc/models/crnn.py

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
2+
3+
"""
4+
5+
Inspired by CRNN model described in
6+
7+
Sound Event Detection: A Tutorial
8+
https://arxiv.org/abs/2107.05463
9+
10+
and
11+
12+
Convolutional Recurrent Neural Networks for Polyphonic Sound Event Detection
13+
https://arxiv.org/abs/1702.06286
14+
"""
15+
16+
# related code, https://chadrick-kwag.net/tf-keras-rnn-ctc-example/
17+
18+
def build_model(frames=128, bands=40, channels=1, n_classes=10,
19+
conv_size=(3,3),
20+
conv_block='conv',
21+
downsample_size=(2,2),
22+
n_stages=3, n_blocks_per_stage=1,
23+
filters=128, kernels_growth=1.0,
24+
fully_connected=64,
25+
rnn_units=32,
26+
temporal='bigru',
27+
dropout=0.5, l2=0.001, backend='detection'):
28+
29+
30+
from tensorflow.keras import Model, Sequential
31+
from tensorflow.keras.layers import \
32+
Conv2D, LSTM, GRU, Bidirectional, MaxPooling2D, \
33+
Reshape, TimeDistributed, Softmax, Dense, SeparableConv2D
34+
35+
model = Sequential()
36+
37+
input_shape = (frames, bands, channels)
38+
39+
def add_conv_block(model, downsample_size, conv_filters=filters, kernel_size=conv_size,
40+
**kwargs):
41+
model.add(SeparableConv2D(conv_filters, conv_size, **kwargs))
42+
model.add(MaxPooling2D(downsample_size))
43+
44+
# TODO: add ReLu
45+
# TODO: BatchNorm etc?
46+
47+
# Convolutional layers
48+
add_conv_block(model, downsample_size=(1,5), input_shape=input_shape)
49+
add_conv_block(model, downsample_size=(1,2))
50+
add_conv_block(model, downsample_size=(1,2))
51+
52+
# Temporal processing
53+
if temporal == 'bigru':
54+
o = model.layers[-1].output_shape
55+
model.add(Reshape((o[1], -1)))
56+
model.add(Bidirectional(GRU(rnn_units, return_sequences=True)))
57+
model.add(Bidirectional(GRU(rnn_units, return_sequences=True)))
58+
elif temporal == 'tcn':
59+
# TODO: make downsampling adjustable
60+
model.add(SeparableConv2D(rnn_units, (9, 1), strides=(2,1)))
61+
model.add(SeparableConv2D(rnn_units, (9, 1), strides=(2,1)))
62+
else:
63+
raise ValueError(f"Unknown temporal parameter {temporal}")
64+
65+
# Output
66+
# TODO: support multiple layers
67+
# TODO: add Dropout
68+
o = model.layers[-1].output_shape
69+
if backend == 'classification':
70+
model.add(TimeDistributed(Dense(fully_connected, activation="linear")))
71+
model.add(layers.Dense(n_classes))
72+
model.add(Softmax())
73+
74+
elif backend == 'detection':
75+
#model.add(TimeDistributed(Dense(fully_connected, activation="linear")))
76+
model.add(TimeDistributed(Dense(n_classes, activation="linear"), input_shape=(o[1], o[2])))
77+
model.add(Softmax())
78+
elif not backend:
79+
pass # no backend
80+
else:
81+
raise ValueError(f"Unsupported backend '{backend}'")
82+
83+
return model
84+
85+
86+
def test_model():
87+
88+
model = build_model(filters=24, bands=64, rnn_units=16, n_classes=3, temporal='tcn')
89+
90+
print(model.summary())
91+
92+
93+
if __name__ == '__main__':
94+
test_model()
95+

0 commit comments

Comments
 (0)