tiny_dnn 1.0.0
A header only, dependency-free deep learning framework in C++11
Loading...
Searching...
No Matches
loss_function.h
1/*
2 Copyright (c) 2013, Taiga Nomi
3 All rights reserved.
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in the
11 documentation and/or other materials provided with the distribution.
12 * Neither the name of the <organization> nor the
13 names of its contributors may be used to endorse or promote products
14 derived from this software without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
17 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
20 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*/
27#pragma once
28#include "tiny_dnn/util/util.h"
29
30namespace tiny_dnn {
31
32// mean-squared-error loss function for regression
33class mse {
34public:
35 static float_t f(const vec_t& y, const vec_t& t) {
36 assert(y.size() == t.size());
37 float_t d = 0.0;
38
39 for(serial_size_t i = 0; i < y.size(); ++i)
40 d += (y[i] - t[i]) * (y[i] - t[i]);
41
42 return d/y.size();
43 }
44
45 static vec_t df(const vec_t& y, const vec_t& t) {
46 assert(y.size() == t.size());
47 vec_t d(t.size());
48 float_t factor = float_t(2) / static_cast<float_t>(t.size());
49
50 for(serial_size_t i = 0; i < y.size(); ++i)
51 d[i] = factor * (y[i] - t[i]);
52
53 return d;
54 }
55};
56
57// absolute loss function for regression
58class absolute {
59public:
60 static float_t f(const vec_t& y, const vec_t& t) {
61 assert(y.size() == t.size());
62 float_t d = float_t(0);
63
64 for(serial_size_t i = 0; i < y.size(); ++i)
65 d += std::abs(y[i] - t[i]);
66
67 return d/y.size();
68 }
69
70 static vec_t df(const vec_t& y, const vec_t& t) {
71 assert(y.size() == t.size());
72 vec_t d(t.size());
73 float_t factor = float_t(1) / static_cast<float_t>(t.size());
74
75 for(serial_size_t i = 0; i < y.size(); ++i) {
76 float_t sign = y[i] - t[i];
77 if(sign < 0.f)
78 d[i] = -float_t(1) * factor;
79 else if(sign > 0.f)
80 d[i] = float_t(1) * factor;
81 else
82 d[i] = float_t(0);
83 }
84
85 return d;
86 }
87};
88
89// absolute loss with epsilon range for regression
90// epsilon range [-eps, eps] with eps = 1./fraction
91template<int fraction>
93public:
94 static float_t f(const vec_t& y, const vec_t& t) {
95 assert(y.size() == t.size());
96 float_t d = float_t(0);
97 const float_t eps = float_t(1) / fraction;
98
99 for(serial_size_t i = 0; i < y.size(); ++i) {
100 float_t diff = std::abs(y[i] - t[i]);
101 if(diff > eps)
102 d += diff;
103 }
104 return d / y.size();
105 }
106
107 static vec_t df(const vec_t& y, const vec_t& t) {
108 assert(y.size() == t.size());
109 vec_t d(t.size());
110 const float_t factor = float_t(1) / static_cast<float_t>(t.size());
111 const float_t eps = float_t(1) / fraction;
112
113 for(serial_size_t i = 0; i < y.size(); ++i) {
114 float_t sign = y[i] - t[i];
115 if(sign < -eps)
116 d[i] = -float_t(1) * factor;
117 else if(sign > eps)
118 d[i] = float_t(1) * factor;
119 else
120 d[i] = 0.f;
121 }
122 return d;
123 }
124};
125
126// cross-entropy loss function for (multiple independent) binary classifications
128public:
129 static float_t f(const vec_t& y, const vec_t& t) {
130 assert(y.size() == t.size());
131 float_t d = float_t(0);
132
133 for(serial_size_t i = 0; i < y.size(); ++i)
134 d += -t[i] * std::log(y[i]) - (float_t(1) - t[i]) * std::log(float_t(1) - y[i]);
135
136 return d;
137 }
138
139 static vec_t df(const vec_t& y, const vec_t& t) {
140 assert(y.size() == t.size());
141 vec_t d(t.size());
142
143 for(serial_size_t i = 0; i < y.size(); ++i)
144 d[i] = (y[i] - t[i]) / (y[i] * (float_t(1) - y[i]));
145
146 return d;
147 }
148};
149
150// cross-entropy loss function for multi-class classification
152public:
153 static float_t f(const vec_t& y, const vec_t& t) {
154 assert(y.size() == t.size());
155 float_t d = 0.0;
156
157 for(serial_size_t i = 0; i < y.size(); ++i)
158 d += -t[i] * std::log(y[i]);
159
160 return d;
161 }
162
163 static vec_t df(const vec_t& y, const vec_t& t) {
164 assert(y.size() == t.size());
165 vec_t d(t.size());
166
167 for(serial_size_t i = 0; i < y.size(); ++i)
168 d[i] = - t[i] / y[i];
169
170 return d;
171 }
172};
173
174template <typename E>
175vec_t gradient(const vec_t& y, const vec_t& t) {
176 assert(y.size() == t.size());
177 return E::df(y, t);
178}
179
180template <typename E>
181std::vector<vec_t> gradient(const std::vector<vec_t>& y, const std::vector<vec_t>& t) {
182 std::vector<vec_t> grads;
183
184 assert(y.size() == t.size());
185
186 for (serial_size_t i = 0; i < y.size(); i++)
187 grads.push_back(gradient<E>(y[i], t[i]));
188
189 return grads;
190}
191
192inline void apply_cost_if_defined(std::vector<vec_t>& sample_gradient,
193 const std::vector<vec_t>& sample_cost) {
194 if (sample_gradient.size() == sample_cost.size()) {
195 // @todo consider adding parallelism
196 const serial_size_t channel_count = static_cast<serial_size_t>(sample_gradient.size());
197 for (size_t channel = 0; channel < channel_count; ++channel) {
198 if (sample_gradient[channel].size() == sample_cost[channel].size()) {
199 const size_t element_count = sample_gradient[channel].size();
200
201 // @todo optimize? (use AVX or so)
202 for (size_t element = 0; element < element_count; ++element) {
203 sample_gradient[channel][element] *= sample_cost[channel][element];
204 }
205 }
206 }
207 }
208}
209
210// gradient for a minibatch
211template <typename E>
212std::vector<tensor_t> gradient(const std::vector<tensor_t>& y,
213 const std::vector<tensor_t>& t,
214 const std::vector<tensor_t>& t_cost) {
215
216 const serial_size_t sample_count = static_cast<serial_size_t>(y.size());
217 const serial_size_t channel_count = static_cast<serial_size_t>(y[0].size());
218
219 std::vector<tensor_t> gradients(sample_count);
220
221 CNN_UNREFERENCED_PARAMETER(channel_count);
222 assert(y.size() == t.size());
223 assert(t_cost.empty() || t_cost.size() == t.size());
224
225 // @todo add parallelism
226 for (serial_size_t sample = 0; sample < sample_count; ++sample) {
227 assert(y[sample].size() == channel_count);
228 assert(t[sample].size() == channel_count);
229 assert(t_cost.empty() || t_cost[sample].empty() ||
230 t_cost[sample].size() == channel_count);
231
232 gradients[sample] = gradient<E>(y[sample], t[sample]);
233
234 if (sample < t_cost.size()) {
235 apply_cost_if_defined(gradients[sample], t_cost[sample]);
236 }
237 }
238
239 return gradients;
240}
241
242} // namespace tiny_dnn
Definition loss_function.h:92
Definition loss_function.h:58
Definition loss_function.h:151
Definition loss_function.h:127
Simple image utility class.
Definition image.h:94
Definition loss_function.h:33