OpenShot Library | libopenshot  0.3.3
ObjectDetection.cpp
Go to the documentation of this file.
1 
10 // Copyright (c) 2008-2019 OpenShot Studios, LLC
11 //
12 // SPDX-License-Identifier: LGPL-3.0-or-later
13 
14 #include <fstream>
15 #include <iostream>
16 
18 #include "effects/Tracker.h"
19 #include "Exceptions.h"
20 #include "Timeline.h"
21 #include "objdetectdata.pb.h"
22 
23 #include <QImage>
24 #include <QPainter>
25 #include <QRectF>
26 #include <QString>
27 #include <QStringList>
28 using namespace std;
29 using namespace openshot;
30 
31 
33 ObjectDetection::ObjectDetection(std::string clipObDetectDataPath) :
34 display_box_text(1.0), display_boxes(1.0)
35 {
36  // Init effect properties
37  init_effect_details();
38 
39  // Tries to load the tracker data from protobuf
40  LoadObjDetectdData(clipObDetectDataPath);
41 
42  // Initialize the selected object index as the first object index
43  selectedObjectIndex = trackedObjects.begin()->first;
44 }
45 
46 // Default constructor
48  display_box_text(1.0), display_boxes(1.0)
49 {
50  // Init effect properties
51  init_effect_details();
52 
53  // Initialize the selected object index as the first object index
54  selectedObjectIndex = trackedObjects.begin()->first;
55 }
56 
57 // Init effect settings
58 void ObjectDetection::init_effect_details()
59 {
62 
64  info.class_name = "ObjectDetection";
65  info.name = "Object Detector";
66  info.description = "Detect objects through the video.";
67  info.has_audio = false;
68  info.has_video = true;
69  info.has_tracked_object = true;
70 }
71 
72 // This method is required for all derived classes of EffectBase, and returns a
73 // modified openshot::Frame object
74 std::shared_ptr<Frame> ObjectDetection::GetFrame(std::shared_ptr<Frame> frame, int64_t frame_number) {
75  // Get the frame's QImage
76  std::shared_ptr<QImage> frame_image = frame->GetImage();
77 
78  // Check if frame isn't NULL
79  if(!frame_image || frame_image->isNull()) {
80  return frame;
81  }
82 
83  QPainter painter(frame_image.get());
84  painter.setRenderHints(QPainter::Antialiasing | QPainter::SmoothPixmapTransform);
85 
86  if (detectionsData.find(frame_number) != detectionsData.end()) {
87  DetectionData detections = detectionsData[frame_number];
88  for (int i = 0; i < detections.boxes.size(); i++) {
89  if (detections.confidences.at(i) < confidence_threshold ||
90  (!display_classes.empty() &&
91  std::find(display_classes.begin(), display_classes.end(), classNames[detections.classIds.at(i)]) == display_classes.end())) {
92  continue;
93  }
94 
95  int objectId = detections.objectIds.at(i);
96  auto trackedObject_it = trackedObjects.find(objectId);
97 
98  if (trackedObject_it != trackedObjects.end()) {
99  std::shared_ptr<TrackedObjectBBox> trackedObject = std::static_pointer_cast<TrackedObjectBBox>(trackedObject_it->second);
100 
101  Clip* parentClip = (Clip*) trackedObject->ParentClip();
102  if (parentClip && trackedObject->Contains(frame_number) && trackedObject->visible.GetValue(frame_number) == 1) {
103  BBox trackedBox = trackedObject->GetBox(frame_number);
104  QRectF boxRect((trackedBox.cx - trackedBox.width / 2) * frame_image->width(),
105  (trackedBox.cy - trackedBox.height / 2) * frame_image->height(),
106  trackedBox.width * frame_image->width(),
107  trackedBox.height * frame_image->height());
108 
109  // Get properties of tracked object (i.e. colors, stroke width, etc...)
110  std::vector<int> stroke_rgba = trackedObject->stroke.GetColorRGBA(frame_number);
111  std::vector<int> bg_rgba = trackedObject->background.GetColorRGBA(frame_number);
112  int stroke_width = trackedObject->stroke_width.GetValue(frame_number);
113  float stroke_alpha = trackedObject->stroke_alpha.GetValue(frame_number);
114  float bg_alpha = trackedObject->background_alpha.GetValue(frame_number);
115  float bg_corner = trackedObject->background_corner.GetValue(frame_number);
116 
117  // Set the pen for the border
118  QPen pen(QColor(stroke_rgba[0], stroke_rgba[1], stroke_rgba[2], 255 * stroke_alpha));
119  pen.setWidth(stroke_width);
120  painter.setPen(pen);
121 
122  // Set the brush for the background
123  QBrush brush(QColor(bg_rgba[0], bg_rgba[1], bg_rgba[2], 255 * bg_alpha));
124  painter.setBrush(brush);
125 
126  if (display_boxes.GetValue(frame_number) == 1 && trackedObject->draw_box.GetValue(frame_number) == 1) {
127  // Only draw boxes if both properties are set to YES (draw all boxes, and draw box of the selected box)
128  painter.drawRoundedRect(boxRect, bg_corner, bg_corner);
129  }
130 
131  if(display_box_text.GetValue(frame_number) == 1) {
132  // Draw text label above bounding box
133  // Get the confidence and classId for the current detection
134  int classId = detections.classIds.at(i);
135 
136  // Get the label for the class name and its confidence
137  QString label = QString::number(objectId);
138  if (!classNames.empty()) {
139  label = QString::fromStdString(classNames[classId]) + ":" + label;
140  }
141 
142  // Set up the painter, font, and pen
143  QFont font;
144  font.setPixelSize(14);
145  painter.setFont(font);
146 
147  // Calculate the size of the text
148  QFontMetrics fontMetrics(font);
149  QSize labelSize = fontMetrics.size(Qt::TextSingleLine, label);
150 
151  // Define the top left point of the rectangle
152  double left = boxRect.center().x() - (labelSize.width() / 2.0);
153  double top = std::max(static_cast<int>(boxRect.top()), labelSize.height()) - 4.0;
154 
155  // Draw the text
156  painter.drawText(QPointF(left, top), label);
157  }
158  }
159  }
160  }
161  }
162 
163  painter.end();
164 
165  // The frame's QImage has been modified in place, so we just return the original frame
166  return frame;
167 }
168 
169 // Load protobuf data file
170 bool ObjectDetection::LoadObjDetectdData(std::string inputFilePath){
171  // Create tracker message
172  pb_objdetect::ObjDetect objMessage;
173 
174  // Read the existing tracker message.
175  std::fstream input(inputFilePath, std::ios::in | std::ios::binary);
176  if (!objMessage.ParseFromIstream(&input)) {
177  std::cerr << "Failed to parse protobuf message." << std::endl;
178  return false;
179  }
180 
181  // Make sure classNames, detectionsData and trackedObjects are empty
182  classNames.clear();
183  detectionsData.clear();
184  trackedObjects.clear();
185 
186  // Seed to generate same random numbers
187  std::srand(1);
188  // Get all classes names and assign a color to them
189  for(int i = 0; i < objMessage.classnames_size(); i++)
190  {
191  classNames.push_back(objMessage.classnames(i));
192  classesColor.push_back(cv::Scalar(std::rand()%205 + 50, std::rand()%205 + 50, std::rand()%205 + 50));
193  }
194 
195  // Iterate over all frames of the saved message
196  for (size_t i = 0; i < objMessage.frame_size(); i++)
197  {
198  // Create protobuf message reader
199  const pb_objdetect::Frame& pbFrameData = objMessage.frame(i);
200 
201  // Get frame Id
202  size_t id = pbFrameData.id();
203 
204  // Load bounding box data
205  const google::protobuf::RepeatedPtrField<pb_objdetect::Frame_Box > &pBox = pbFrameData.bounding_box();
206 
207  // Construct data vectors related to detections in the current frame
208  std::vector<int> classIds;
209  std::vector<float> confidences;
210  std::vector<cv::Rect_<float>> boxes;
211  std::vector<int> objectIds;
212 
213  // Iterate through the detected objects
214  for(int i = 0; i < pbFrameData.bounding_box_size(); i++)
215  {
216  // Get bounding box coordinates
217  float x = pBox.Get(i).x();
218  float y = pBox.Get(i).y();
219  float w = pBox.Get(i).w();
220  float h = pBox.Get(i).h();
221  // Get class Id (which will be assign to a class name)
222  int classId = pBox.Get(i).classid();
223  // Get prediction confidence
224  float confidence = pBox.Get(i).confidence();
225 
226  // Get the object Id
227  int objectId = pBox.Get(i).objectid();
228 
229  // Search for the object id on trackedObjects map
230  auto trackedObject = trackedObjects.find(objectId);
231  // Check if object already exists on the map
232  if (trackedObject != trackedObjects.end())
233  {
234  // Add a new BBox to it
235  trackedObject->second->AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
236  }
237  else
238  {
239  // There is no tracked object with that id, so insert a new one
240  TrackedObjectBBox trackedObj((int)classesColor[classId](0), (int)classesColor[classId](1), (int)classesColor[classId](2), (int)0);
241  trackedObj.stroke_alpha = Keyframe(1.0);
242  trackedObj.AddBox(id, x+(w/2), y+(h/2), w, h, 0.0);
243 
244  std::shared_ptr<TrackedObjectBBox> trackedObjPtr = std::make_shared<TrackedObjectBBox>(trackedObj);
245  ClipBase* parentClip = this->ParentClip();
246  trackedObjPtr->ParentClip(parentClip);
247 
248  // Create a temp ID. This ID is necessary to initialize the object_id Json list
249  // this Id will be replaced by the one created in the UI
250  trackedObjPtr->Id(std::to_string(objectId));
251  trackedObjects.insert({objectId, trackedObjPtr});
252  }
253 
254  // Create OpenCV rectangle with the bouding box info
255  cv::Rect_<float> box(x, y, w, h);
256 
257  // Push back data into vectors
258  boxes.push_back(box);
259  classIds.push_back(classId);
260  confidences.push_back(confidence);
261  objectIds.push_back(objectId);
262  }
263 
264  // Assign data to object detector map
265  detectionsData[id] = DetectionData(classIds, confidences, boxes, id, objectIds);
266  }
267 
268  // Delete all global objects allocated by libprotobuf.
269  google::protobuf::ShutdownProtobufLibrary();
270 
271  return true;
272 }
273 
274 // Get the indexes and IDs of all visible objects in the given frame
275 std::string ObjectDetection::GetVisibleObjects(int64_t frame_number) const{
276 
277  // Initialize the JSON objects
278  Json::Value root;
279  root["visible_objects_index"] = Json::Value(Json::arrayValue);
280  root["visible_objects_id"] = Json::Value(Json::arrayValue);
281  root["visible_class_names"] = Json::Value(Json::arrayValue);
282 
283  // Check if track data exists for the requested frame
284  if (detectionsData.find(frame_number) == detectionsData.end()){
285  return root.toStyledString();
286  }
287  DetectionData detections = detectionsData.at(frame_number);
288 
289  // Iterate through the tracked objects
290  for(int i = 0; i<detections.boxes.size(); i++){
291  // Does not show boxes with confidence below the threshold
292  if(detections.confidences.at(i) < confidence_threshold){
293  continue;
294  }
295 
296  // Get class name of tracked object
297  auto className = classNames[detections.classIds.at(i)];
298 
299  // If display_classes is not empty, check if className is in it
300  if (!display_classes.empty()) {
301  auto it = std::find(display_classes.begin(), display_classes.end(), className);
302  if (it == display_classes.end()) {
303  // If not in display_classes, skip this detection
304  continue;
305  }
306  root["visible_class_names"].append(className);
307  } else {
308  // include all class names
309  root["visible_class_names"].append(className);
310  }
311 
312  int objectId = detections.objectIds.at(i);
313  // Search for the object in the trackedObjects map
314  auto trackedObject = trackedObjects.find(objectId);
315 
316  // Get the tracked object JSON properties for this frame
317  Json::Value trackedObjectJSON = trackedObject->second->PropertiesJSON(frame_number);
318 
319  if (trackedObjectJSON["visible"]["value"].asBool() &&
320  trackedObject->second->ExactlyContains(frame_number)){
321  // Save the object's index and ID if it's visible in this frame
322  root["visible_objects_index"].append(trackedObject->first);
323  root["visible_objects_id"].append(trackedObject->second->Id());
324  }
325  }
326 
327  return root.toStyledString();
328 }
329 
330 // Generate JSON string of this object
331 std::string ObjectDetection::Json() const {
332 
333  // Return formatted string
334  return JsonValue().toStyledString();
335 }
336 
337 // Generate Json::Value for this object
338 Json::Value ObjectDetection::JsonValue() const {
339 
340  // Create root json object
341  Json::Value root = EffectBase::JsonValue(); // get parent properties
342  root["type"] = info.class_name;
343  root["protobuf_data_path"] = protobuf_data_path;
344  root["selected_object_index"] = selectedObjectIndex;
345  root["confidence_threshold"] = confidence_threshold;
346  root["display_box_text"] = display_box_text.JsonValue();
347  root["display_boxes"] = display_boxes.JsonValue();
348 
349  // Add tracked object's IDs to root
350  Json::Value objects;
351  for (auto const& trackedObject : trackedObjects){
352  Json::Value trackedObjectJSON = trackedObject.second->JsonValue();
353  // add object json
354  objects[trackedObject.second->Id()] = trackedObjectJSON;
355  }
356  root["objects"] = objects;
357 
358  // return JsonValue
359  return root;
360 }
361 
362 // Load JSON string into this object
363 void ObjectDetection::SetJson(const std::string value) {
364 
365  // Parse JSON string into JSON objects
366  try
367  {
368  const Json::Value root = openshot::stringToJson(value);
369  // Set all values that match
370  SetJsonValue(root);
371  }
372  catch (const std::exception& e)
373  {
374  // Error parsing JSON (or missing keys)
375  throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
376  }
377 }
378 
379 // Load Json::Value into this object
380 void ObjectDetection::SetJsonValue(const Json::Value root) {
381  // Set parent data
383 
384  // Set data from Json (if key is found)
385  if (!root["protobuf_data_path"].isNull() && protobuf_data_path.size() <= 1){
386  protobuf_data_path = root["protobuf_data_path"].asString();
387 
388  if(!LoadObjDetectdData(protobuf_data_path)){
389  throw InvalidFile("Invalid protobuf data path", "");
390  protobuf_data_path = "";
391  }
392  }
393 
394  // Set the selected object index
395  if (!root["selected_object_index"].isNull())
396  selectedObjectIndex = root["selected_object_index"].asInt();
397 
398  if (!root["confidence_threshold"].isNull())
399  confidence_threshold = root["confidence_threshold"].asFloat();
400 
401  if (!root["display_box_text"].isNull())
402  display_box_text.SetJsonValue(root["display_box_text"]);
403 
404  if (!root["display_boxes"].isNull())
405  display_boxes.SetJsonValue(root["display_boxes"]);
406 
407  if (!root["class_filter"].isNull()) {
408  class_filter = root["class_filter"].asString();
409 
410  // Convert the class_filter to a QString
411  QString qClassFilter = QString::fromStdString(root["class_filter"].asString());
412 
413  // Split the QString by commas and automatically trim each resulting string
414  QStringList classList = qClassFilter.split(',', QString::SkipEmptyParts);
415  display_classes.clear();
416 
417  // Iterate over the QStringList and add each trimmed, non-empty string
418  for (const QString &classItem : classList) {
419  QString trimmedItem = classItem.trimmed().toLower();
420  if (!trimmedItem.isEmpty()) {
421  display_classes.push_back(trimmedItem.toStdString());
422  }
423  }
424  }
425 
426  if (!root["objects"].isNull()){
427  for (auto const& trackedObject : trackedObjects){
428  std::string obj_id = std::to_string(trackedObject.first);
429  if(!root["objects"][obj_id].isNull()){
430  trackedObject.second->SetJsonValue(root["objects"][obj_id]);
431  }
432  }
433  }
434 
435  // Set the tracked object's ids
436  if (!root["objects_id"].isNull()){
437  for (auto const& trackedObject : trackedObjects){
438  Json::Value trackedObjectJSON;
439  trackedObjectJSON["box_id"] = root["objects_id"][trackedObject.first].asString();
440  trackedObject.second->SetJsonValue(trackedObjectJSON);
441  }
442  }
443 }
444 
445 // Get all properties for a specific frame
446 std::string ObjectDetection::PropertiesJSON(int64_t requested_frame) const {
447 
448  // Generate JSON properties list
449  Json::Value root = BasePropertiesJSON(requested_frame);
450 
451  Json::Value objects;
452  if(trackedObjects.count(selectedObjectIndex) != 0){
453  auto selectedObject = trackedObjects.at(selectedObjectIndex);
454  if (selectedObject){
455  Json::Value trackedObjectJSON = selectedObject->PropertiesJSON(requested_frame);
456  // add object json
457  objects[selectedObject->Id()] = trackedObjectJSON;
458  }
459  }
460  root["objects"] = objects;
461 
462  root["selected_object_index"] = add_property_json("Selected Object", selectedObjectIndex, "int", "", NULL, 0, 200, false, requested_frame);
463  root["confidence_threshold"] = add_property_json("Confidence Theshold", confidence_threshold, "float", "", NULL, 0, 1, false, requested_frame);
464  root["class_filter"] = add_property_json("Class Filter", 0.0, "string", class_filter, NULL, -1, -1, false, requested_frame);
465 
466  root["display_box_text"] = add_property_json("Draw All Text", display_box_text.GetValue(requested_frame), "int", "", &display_box_text, 0, 1, false, requested_frame);
467  root["display_box_text"]["choices"].append(add_property_choice_json("Yes", true, display_box_text.GetValue(requested_frame)));
468  root["display_box_text"]["choices"].append(add_property_choice_json("No", false, display_box_text.GetValue(requested_frame)));
469 
470  root["display_boxes"] = add_property_json("Draw All Boxes", display_boxes.GetValue(requested_frame), "int", "", &display_boxes, 0, 1, false, requested_frame);
471  root["display_boxes"]["choices"].append(add_property_choice_json("Yes", true, display_boxes.GetValue(requested_frame)));
472  root["display_boxes"]["choices"].append(add_property_choice_json("No", false, display_boxes.GetValue(requested_frame)));
473 
474  // Return formatted string
475  return root.toStyledString();
476 }
Header file for all Exception classes.
Header file for Object Detection effect class.
Header file for Timeline class.
Header file for Tracker effect class.
This abstract class is the base class, used by all clips in libopenshot.
Definition: ClipBase.h:33
Json::Value add_property_choice_json(std::string name, int value, int selected_value) const
Generate JSON choice for a property (dropdown properties)
Definition: ClipBase.cpp:132
std::string id
ID Property for all derived Clip and Effect classes.
Definition: ClipBase.h:35
Json::Value add_property_json(std::string name, float value, std::string type, std::string memo, const Keyframe *keyframe, float min_value, float max_value, bool readonly, int64_t requested_frame) const
Generate JSON for a property.
Definition: ClipBase.cpp:96
This class represents a clip (used to arrange readers on the timeline)
Definition: Clip.h:89
virtual Json::Value JsonValue() const
Generate Json::Value for this object.
Definition: EffectBase.cpp:79
openshot::ClipBase * ParentClip()
Parent clip object of this effect (which can be unparented and NULL)
Definition: EffectBase.cpp:201
Json::Value BasePropertiesJSON(int64_t requested_frame) const
Generate JSON object of base properties (recommended to be used by all effects)
Definition: EffectBase.cpp:179
virtual void SetJsonValue(const Json::Value root)
Load Json::Value into this object.
Definition: EffectBase.cpp:115
EffectInfoStruct info
Information about the current effect.
Definition: EffectBase.h:69
std::map< int, std::shared_ptr< openshot::TrackedObjectBase > > trackedObjects
Map of Tracked Object's by their indices (used by Effects that track objects on clips)
Definition: EffectBase.h:66
Exception for files that can not be found or opened.
Definition: Exceptions.h:188
Exception for invalid JSON.
Definition: Exceptions.h:218
A Keyframe is a collection of Point instances, which is used to vary a number or property over time.
Definition: KeyFrame.h:53
void SetJsonValue(const Json::Value root)
Load Json::Value into this object.
Definition: KeyFrame.cpp:372
double GetValue(int64_t index) const
Get the value at a specific index.
Definition: KeyFrame.cpp:258
Json::Value JsonValue() const
Generate Json::Value for this object.
Definition: KeyFrame.cpp:339
Json::Value JsonValue() const override
Generate Json::Value for this object.
int selectedObjectIndex
Index of the Tracked Object that was selected to modify it's properties.
std::shared_ptr< Frame > GetFrame(std::shared_ptr< Frame > frame, int64_t frame_number) override
This method is required for all derived classes of EffectBase, and returns a modified openshot::Frame...
ObjectDetection()
Default constructor.
bool LoadObjDetectdData(std::string inputFilePath)
Load protobuf data file.
std::string GetVisibleObjects(int64_t frame_number) const override
Get the indexes and IDs of all visible objects in the given frame.
std::string Json() const override
Generate JSON string of this object.
std::string PropertiesJSON(int64_t requested_frame) const override
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
void SetJson(const std::string value) override
Load JSON string into this object.
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
Definition: ReaderBase.cpp:245
This class contains the properties of a tracked object and functions to manipulate it.
void AddBox(int64_t _frame_num, float _cx, float _cy, float _width, float _height, float _angle) override
Add a BBox to the BoxVec map.
Keyframe stroke_alpha
Stroke box opacity.
This namespace is the default namespace for all code in the openshot library.
Definition: Compressor.h:29
const Json::Value stringToJson(const std::string value)
Definition: Json.cpp:16
std::vector< cv::Rect_< float > > boxes
std::vector< float > confidences
std::vector< int > classIds
std::vector< int > objectIds
This struct holds the information of a bounding-box.
float cy
y-coordinate of the bounding box center
float height
bounding box height
float cx
x-coordinate of the bounding box center
float width
bounding box width
bool has_video
Determines if this effect manipulates the image of a frame.
Definition: EffectBase.h:40
bool has_audio
Determines if this effect manipulates the audio of a frame.
Definition: EffectBase.h:41
std::string class_name
The class name of the effect.
Definition: EffectBase.h:36
std::string name
The name of the effect.
Definition: EffectBase.h:37
std::string description
The description of this effect and what it does.
Definition: EffectBase.h:38
bool has_tracked_object
Determines if this effect track objects through the clip.
Definition: EffectBase.h:42