Commit 36500237 authored by Oksana Belyaeva's avatar Oksana Belyaeva
Browse files

init_commmit

parents
#Output files
models/
training/*
DataSet/images/test/*
DataSet/images/train-0/*
DataSet/images/train-1/*
DataSet/images/*.tar.gz
DataSet/jsons/train-*.json
DataSet/csv/train-*.csv
DataSet/labeled/test/*
DataSet/labeled/train/*
pretrain_models/*
tfrecord_data/*
tmp_ckpts/
This diff is collapsed.
from matplotlib import pyplot as plt
from PIL import Image
import json
from PIL import ImageFont, ImageDraw
from glob import glob
import numpy as np
import cv2
# Define color code
colors = {'title': (255, 0, 0),
'text': (0, 255, 0),
'figure': (0, 0, 255),
'table': (255, 255, 0),
'list': (0, 255, 255)}
def markup(image, annotations, samples, font):
''' Draws the segmentation, bounding box, and label of each annotation
'''
draw = ImageDraw.Draw(image, 'RGBA')
for annotation in annotations:
# Draw segmentation
draw.polygon(annotation['segmentation'][0],
fill=colors[samples['categories'][annotation['category_id'] - 1]['name']] + (64,))
# Draw bbox
draw.rectangle(
(annotation['bbox'][0],
annotation['bbox'][1],
annotation['bbox'][0] + annotation['bbox'][2],
annotation['bbox'][1] + annotation['bbox'][3]),
outline=colors[samples['categories'][annotation['category_id'] - 1]['name']] + (255,),
width=2
)
# Draw label
w, h = draw.textsize(text=samples['categories'][annotation['category_id'] - 1]['name'],
font=font)
if annotation['bbox'][3] < h:
draw.rectangle(
(annotation['bbox'][0] + annotation['bbox'][2],
annotation['bbox'][1],
annotation['bbox'][0] + annotation['bbox'][2] + w,
annotation['bbox'][1] + h),
fill=(64, 64, 64, 255)
)
draw.text(
(annotation['bbox'][0] + annotation['bbox'][2],
annotation['bbox'][1]),
text=samples['categories'][annotation['category_id'] - 1]['name'],
fill=(255, 255, 255, 255),
font=font
)
else:
draw.rectangle(
(annotation['bbox'][0],
annotation['bbox'][1],
annotation['bbox'][0] + w,
annotation['bbox'][1] + h),
fill=(64, 64, 64, 255)
)
draw.text(
(annotation['bbox'][0],
annotation['bbox'][1]),
text=samples['categories'][annotation['category_id'] - 1]['name'],
fill=(255, 255, 255, 255),
font=font
)
return np.array(image)
def draw_annotation(json_path='jsons/train.json', path_images="images/train-0/train/", path_out="labeled/train-0/"):
with open(json_path, 'r') as fp:
samples = json.load(fp)
# Index images
images = {}
for image in samples['images']:
images[image['id']] = {'file_name': image['file_name'], 'annotations': []}
for ann in samples['annotations']:
images[ann['image_id']]['annotations'].append(ann)
# Visualize annotations
font = ImageFont.truetype("DejaVuSans.ttf", 15)
#fig = plt.figure(figsize=(16, 100))
for i, (_, image) in enumerate(images.items()):
try:
with Image.open(path_images + image['file_name']) as img:
if img is None:
continue
#ax = plt.subplot(len(images) / 2, 2, i + 1)
image_labeled = markup(img, image['annotations'], samples, font)
cv2.imwrite(path_out + image['file_name'],
cv2.cvtColor(image_labeled, cv2.COLOR_RGB2BGR))
#ax.imshow(image_labeled)
#ax.axis('off')
except Exception as ex:
print(ex)
#plt.subplots_adjust(hspace=0, wspace=0)
if __name__ == "__main__":
draw_annotation()
\ No newline at end of file
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
# Add more class labels as needed, make sure to start at 1
def class_text_to_int(row_label: str) -> int:
if row_label == 'text':
return 1
if row_label == 'title':
return 2
if row_label == 'list':
return 3
if row_label == 'table':
return 4
if row_label == 'figure':
return 5
else:
None
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def create_tf_record(name, writer):
path = os.path.join(os.getcwd(), 'images/' + name)
examples = pd.read_csv('csv/' + name + '.csv')
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
def main(_):
writer = tf.python_io.TFRecordWriter('train-01.record')
for name in ['train-0', 'train-1']:
create_tf_record(name, writer)
print("create %s" % name)
writer.close()
print('Successfully created the train TFRecords')
'''writer = tf.python_io.TFRecordWriter('test.record')
create_tf_record(i, writer)
writer.close()
print('Successfully created the test TFRecords')'''
if __name__ == "__main__":
tf.app.run()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment