Combine/Merge every x images together with title of each pict (linux) - image

I have some folders containing many jpg pictures (number depends on the folder)
I would like for instance to combine every 4 pict** together with the title of the image (see pict below).
(In case there is not exactly 4 image on the last sequence, I should get the number of left picture such as 3 2 or 1)
**Ideally I could change that number to other numbers like 5 6 10 (the number I chose would depend on the context) and I could chose the number of columns (I showed 2 column in my example below)
How can i perform this on Linux command or any Linux free/open-source software?

As I did not find what I want I created my own python code to solve this (it's probably not the most perfects script of the century but it works)
"""
Prints a collage according to desired number of column and rows with title of file
Instruction
1. Put all jpg picture in same folder [tested sucessfully on 12mb per pict]
2. select desired columns in NO_COL
3. select desired rowsin in NO_ROW
4. run the script which will output the collage with <cur_date>_export.png files
"""
#import libraries
import time
import os
import imageio as iio
from matplotlib import pyplot as plt
def render_collage(pict_file_name_list):
""" create one collage """
fig = plt.figure(figsize=(40, 28)) #change if needed
cnt = 1
for cur_img_name in pict_file_name_list:
img_var = iio.imread(cur_img_name)
fig.add_subplot(NO_COL, NO_ROW, cnt)
plt.imshow(img_var)
plt.axis('off')
plt.title(cur_img_name, fontsize = 30) #change if needed
cnt = cnt + 1
cur_date = time.strftime("%Y-%m-%d--%H-%M-%s")
fig.savefig(cur_date+'_export.png')
NO_COL = 3
NO_ROW = 3
NBR_IMG_COLLAGE = NO_COL * NO_ROW
img_list_name = [elem for elem in os.listdir() if 'jpg' in elem] #keep only file having .jpg
while len(img_list_name) >= 1:
sub_list = img_list_name[:NBR_IMG_COLLAGE]
render_collage(sub_list)
del img_list_name[:NBR_IMG_COLLAGE]

Related

Keras custom dataset with images as label/ground truth

I'm making a model for image denoising and use ImageDataGenerator.flow_from_directory to load the dataset. It is structured in two folders, one with noisy input images and one with the corresponding clean images. I want the generator to use the images in the first folder as inputs and the other folder as "labels"/ground truth.
With the method I'm using right now all images in both folders are treated as inputs with the folder name as label. I can extract the images manually by selecting specific batches and train on that, but it's inconvenient and probably wasn't intended to be used that way.
What is the proper way of doing this? There probably is a function for this but I can't find it.
Had similar problem. Found it necessary to create a custom generator to feed the images into model.fit. Code (rather lengthy) is posted below.
import os
import pandas as pd
import numpy as np
import glob
import cv2
from sklearn.model_selection import train_test_split
def create_df(image_dir, label_dir, shuffle=True):
path=image_dir + '/**/*'
image_file_paths=glob.glob(path,recursive=True)
path=label_dir + '/**/*'
label_file_paths=glob.glob(path,recursive=True)
# run a check and make sure filename without extensions match
df=pd.DataFrame({'image': image_file_paths, 'label':label_file_paths}).astype(str)
if shuffle:
df=df.sample(frac=1.0, replace=False, weights=None, random_state=123, axis=0).reset_index(drop=True)
return df
class jpgen():
batch_index=0 #tracks the number of batches generated
def __init__(self, df, train_split=None, test_split=None):
self.train_split=train_split # float between 0 and 1 indicating the percentage of images to use for training
self.test_split=test_split
self.df=df.copy() # create a copy of the data frame
if self.train_split != None: # split the df to create a training df
self.train_df, dummy_df=train_test_split(self.df, train_size=self.train_split, shuffle=False)
if self.test_split !=None: # create as test set and a validation set
t_split=self.test_split/(1.0-self.train_split)
self.test_df, self.valid_df=train_test_split(dummy_df, train_size=t_split, shuffle=False)
self.valid_gen_len=len(self.valid_df['image'].unique())# create var to return no of samples in valid generator
self.valid_gen_filenames=list(self.valid_df['image'])# create list ofjpg file names in valid generator
else: self.test_df=dummy_df
self.test_gen_len=len(self.test_df['image'].unique())#create var to return no of test samples
self.test_gen_filenames=list(self.test_df['image']) # create list to return jpg file paths in test_gen
else:
self.train_df=self.df
self.tr_gen_len=len(self.train_df['image'].unique()) # crete variable to return no of samples in train generator
def flow(self, batch_size=32, image_shape=None,rescale=None,shuffle=True, subset=None ):
# flows batches of jpg images and png masks to model.fit
self.batch_size=batch_size
self.image_shape=image_shape
self.shuffle=shuffle
self.subset=subset
self.rescale=rescale
image_batch_list=[] # initialize list to hold a batch of jpg images
label_batch_list=[] # initialize list to hold batches of png masks
if self.subset=='training' or self.train_split ==None:
op_df=self.train_df
elif self.subset=='test':
op_df=self.test_df
else:
op_df=self.valid_df
if self.shuffle : # shuffle the op_df then rest the index
op_df=op_df.sample(frac=1.0, replace=False, weights=None, random_state=123, axis=0).reset_index(drop=True)
#op_df will be either train, test or valid depending on subset
# develop the batch of data
while True:
label_batch_list=[]
image_batch_list=[]
start=jpgen.batch_index * self.batch_size # set start value of iteration
end=start + self.batch_size # set end value of iteration to yield 1 batch of data of length batch_size
sample_count=len(op_df['image'])
for i in range(start, end): # iterate over one batch size of data
j=i % sample_count # used to roll the images back to the front if the end is reached
k=j % self.batch_size
path_to_image= op_df.iloc[j]['image']
path_to_label= op_df.iloc[j] ['label']
label_image=cv2.imread(path_to_label, -1) # read unchanged to preserve 4 th channel print (png_image.)
label_image= cv2.cvtColor(label_image, cv2.COLOR_BGR2RGB)
image_image=cv2.imread(path_to_image)
image_image= cv2.cvtColor(image_image, cv2.COLOR_BGR2RGB)
label_image=cv2.resize(label_image, self.image_shape)
image_image=cv2.resize(image_image, self.image_shape )
if rescale !=None:
label_image=label_image * self.rescale
image_image=image_image * self.rescale
label_batch_list.append(label_image)
image_batch_list.append(image_image)
image_array=np.array(image_batch_list)
label_array=np.array(label_batch_list)
jpgen.batch_index +=1
yield (image_array, label_array)
Code below shows how to use the functions above to make generators for model.fit
image_dir=r'C:\Temp\gen_test\images'# directory with clean images
label_dir=r'C:\Temp\gen_test\labels' # directory with noisy images file names same as filenames in clean dir
shuffle=False # if True shuffles the dataframe
df=create_df(image_dir, label_dir ,shuffle) # create a dataframe with columns 'images' , 'labels'
# where labels are the noisy images
train_split=.8 # use 80% of files for training
test_split=.1 # use 10% for test, automatically sets validation split at 1-train_split-test_split
batch_size=32 # set batch_size
height=224 # set image height for generator output images and labels
width=224 # set image width for generator output images and labels
channels=3 # set number of channel in images
image_shape=(height, width)
rescale=1/255 # set value to rescale image pixels
gen=jpgen(df, train_split=train_split, test_split=test_split) # create instance of generator class
tr_gen_len=gen.tr_gen_len
test_gen_len= gen.test_gen_len
valid_gen_len=gen.valid_gen_len
test_filenames=gen.test_gen_filenames # names of test file paths used for training
train_steps=tr_gen_len//batch_size # use this value in for steps_per_epoch in model.fit
valid_steps=valid_gen_len//batch_size # use this value for validation_steps in model.fit
test_steps=test_gen_len//batch_size # use this value for steps in model.predict
# instantiate generators
train_gen=gen.flow(batch_size=batch_size, image_shape=image_shape, rescale=rescale, shuffle=False, subset='training')
valid_gen=gen.flow(batch_size=batch_size, image_shape=image_shape, rescale=rescale, shuffle=False, subset='valid')
test_gen=gen.flow(batch_size=batch_size, image_shape=image_shape, rescale=rescale, shuffle=False, subset='test')
Build your model then use
history=model.fit(train_gen, epochs=epochs, steps_per_epoch=train_steps,validation_data=valid_gen,
validation_steps=valid_steps, verbose=1, shuffle=True)
predictions=model.predict(test_gen, steps=test_steps)

Python Matplotlib Add 2 Dynamic Title Components

I have 6 subplots that need 2 dynamic title components and I can code for 1 but I'm not sure how to change my code below to add a 2nd dynamic title component on the same line after searching the literature. Here is my for loop to generate the 6 subplots with the "plt.title.." line below:
list = [0,1,2,3,4,5]
now = datetime.datetime.now()
currm = now.month
import calendar
fig, ax = plt.subplots(6)
for x in list:
dam = DS.where(DS['time.year']==rmax.iloc[x,1]).groupby('time.month').mean()#iterate by index of
column "1" or the years
dam = dam.sel(month=3)#current month mean 500
dam = dam.sel(level=500)
damc = dam.to_array()
lats = damc['lat'].data
lons = damc['lon'].data
#plot data
ax = plt.axes(projection=ccrs.PlateCarree())
ax.coastlines(lw=1)
damc = damc.squeeze()
cnplot = plt.contour(lons,lats,damc,cmap='jet')
plt.title('Mean 500mb Hgt + Phase {} 2020'.format(calendar.month_name[currm-1]))
plt.show()
#plt.clf()
I need to add one of each from this list in the loop to the "plt.title.." between the "+" and the word "Phase" line above...?
tindices = ['SOI','AO','NAO','PNA','EPO','PDO']
Thank you for any help with this!
Try accessing the tindices one by one and passing them to the title
plt.title('Mean 500mb Hgt + {} Phase {} 2020'.format(tindices[x],
calendar.month_name[currm-1]))

What is the better logic for identifying the latest versioned files from a big list

I have 200 images in a folder and each file may contain several versions (example: car_image#2, car_image#2, bike_image#2, etc ). My requirement is to build a utility to copy all the latest files from this directory to another.
My approach is:
Put the imagesNames (without containing version numbers) into a list
Eliminate the duplicates from the list
Iterate through the list and identify the latest version of each unique file (I am little blurred on this step)
Can someone throw some better ideas/algorithm to achieve this?
My approach would be:
Make a list of unique names by getting each filename up to the #, only adding unique values.
Make a dictionary with filenames as keys, and set values to be the version number, updating when it's larger than the one stored.
Go through the dictionary and produce the filenames to grab.
My go-to would be a python script but you should be able to do this in pretty much whatever language you find suitable.
Ex code for getting the filename list:
#get the filename list
myList = []
for x in file_directory:
fname = x.split("#")[0]
if not fname in myList:
myList = myList + [fname]
myDict = {}
for x in myList:
if not x in myDict:
myDict[x] = 0
for x in file_directory:
fversion = x.split("#")[-1]
if myDict[x] < int(fversion):
myDict[x] = fversion
flist = []
for x in myDict:
fname = str(x) + "#" + str(myDict[x])
flist.append(fname)
Then flist would be a list of filenames of the most recent versions
I didn't run this or anything but hopefully it helps!
In Python 3
>>> images = sorted(set(sum([['%s_image#%i' % (nm, random.randint(1,9)) for i in range(random.randint(2,5))] for nm in 'car bike cat dog man tree'.split()], [])))
>>> print('\n'.join(images))
bike_image#2
bike_image#3
bike_image#4
bike_image#5
car_image#2
car_image#7
cat_image#3
dog_image#2
dog_image#5
dog_image#9
man_image#1
man_image#2
man_image#4
man_image#6
man_image#7
tree_image#3
tree_image#4
>>> from collections import defaultdict
>>> image2max = defaultdict(int)
>>> for image in images:
name, _, version = image.partition('#')
version = int(version)
if version > image2max[name]:
image2max[name] = version
>>> # Max version
>>> for image in sorted(image2max):
print('%s#%i' % (image, image2max[image]))
bike_image#5
car_image#7
cat_image#3
dog_image#9
man_image#7
tree_image#4
>>>

Detect duplicate videos from YouTube

In consideration to my M.tech Project
I want to know if there is any algorithm to detect duplicate videos from youtube.
For example (here are links of two videos):
random user upload
upload by official channel
Amongst these second is official video and T-series has it's copyright.
Is youtube officially doing something to remove duplicate videos from youtube?
Not only videos, there exists duplicate youtube channels also.
Sometimes the original video has less number of views than that of pirated version.
So, while searching found this
(see page number [49] of pdf)
What I learnt from the given link
Original vs copyright infringed video detection Classifier is used.
Given a query, firstly top k search results are being retrieved.Thereafter three parameters are used to classify the videos
Number of subscribers
user profile
username popularity
and on the basis of these parameters, original video is identified as described in the link.
EDIT 1:
There are basically two different objectives
To identify original video with the above method
To eliminate the duplicate videos
obviously identifying original video is easier than finding out all the duplicate videos.
So i preferred to first find out the original video.
Approach which i can think till now
to improve the accuracy:
We can first find out the original videos with above method
And then use the most popular publicized frames(may be multiple) of that video to search on google image. This method therefore retrieves the list of duplicate videos in google image search results.
After getting these duplicate videos, we can once again check frame by frame and reach a level of satisfaction(yes retrieved videos were "exact or "almost" duplicate copy of original video)
Will this approach work?
if not, is there any better algorithm, to improve upon the given method?
Please write in the comments section if i am unable to explain my approach clearly.
I will soon add some more details.
I've recently hacked together a small tool for that purpose. It's still work in progress but usually pretty accurate. The idea is to simply compare time between brightness maxima in the center of the video. Therefore it should work with different resolutions, frame rates and rotation of the video.
ffmpeg is used for decoding, imageio as bridge to python, numpy/scipy for maxima computation and some k-nearest-neighbor library (annoy, cyflann, hnsw) for comparison.
At the moment it's not polished at all so you should know a little python to run it or simply copy the idea.
Me too had the same problem.. So wrote a program myself..
Problem is I had videos of various formats and resolution.. So needed to take hash of each video frame and compare.
https://github.com/gklc811/duplicate_video_finder
you can just change the directories at top and you are good to go..
from os import path, walk, makedirs, rename
from time import clock
from imagehash import average_hash
from PIL import Image
from cv2 import VideoCapture, CAP_PROP_FRAME_COUNT, CAP_PROP_FRAME_WIDTH, CAP_PROP_FRAME_HEIGHT, CAP_PROP_FPS
from json import dump, load
from multiprocessing import Pool, cpu_count
input_vid_dir = r'C:\Users\gokul\Documents\data\\'
json_dir = r'C:\Users\gokul\Documents\db\\'
analyzed_dir = r'C:\Users\gokul\Documents\analyzed\\'
duplicate_dir = r'C:\Users\gokul\Documents\duplicate\\'
if not path.exists(json_dir):
makedirs(json_dir)
if not path.exists(analyzed_dir):
makedirs(analyzed_dir)
if not path.exists(duplicate_dir):
makedirs(duplicate_dir)
def write_to_json(filename, data):
file_full_path = json_dir + filename + ".json"
with open(file_full_path, 'w') as file_pointer:
dump(data, file_pointer)
return
def video_to_json(filename):
file_full_path = input_vid_dir + filename
start = clock()
size = round(path.getsize(file_full_path) / 1024 / 1024, 2)
video_pointer = VideoCapture(file_full_path)
frame_count = int(VideoCapture.get(video_pointer, int(CAP_PROP_FRAME_COUNT)))
width = int(VideoCapture.get(video_pointer, int(CAP_PROP_FRAME_WIDTH)))
height = int(VideoCapture.get(video_pointer, int(CAP_PROP_FRAME_HEIGHT)))
fps = int(VideoCapture.get(video_pointer, int(CAP_PROP_FPS)))
success, image = video_pointer.read()
video_hash = {}
while success:
frame_hash = average_hash(Image.fromarray(image))
video_hash[str(frame_hash)] = filename
success, image = video_pointer.read()
stop = clock()
time_taken = stop - start
print("Time taken for ", file_full_path, " is : ", time_taken)
data_dict = dict()
data_dict['size'] = size
data_dict['time_taken'] = time_taken
data_dict['fps'] = fps
data_dict['height'] = height
data_dict['width'] = width
data_dict['frame_count'] = frame_count
data_dict['filename'] = filename
data_dict['video_hash'] = video_hash
write_to_json(filename, data_dict)
return
def multiprocess_video_to_json():
files = next(walk(input_vid_dir))[2]
processes = cpu_count()
print(processes)
pool = Pool(processes)
start = clock()
pool.starmap_async(video_to_json, zip(files))
pool.close()
pool.join()
stop = clock()
print("Time Taken : ", stop - start)
def key_with_max_val(d):
max_value = 0
required_key = ""
for k in d:
if d[k] > max_value:
max_value = d[k]
required_key = k
return required_key
def duplicate_analyzer():
files = next(walk(json_dir))[2]
data_dict = {}
for file in files:
filename = json_dir + file
with open(filename) as f:
data = load(f)
video_hash = data['video_hash']
count = 0
duplicate_file_dict = dict()
for key in video_hash:
count += 1
if key in data_dict:
if data_dict[key] in duplicate_file_dict:
duplicate_file_dict[data_dict[key]] = duplicate_file_dict[data_dict[key]] + 1
else:
duplicate_file_dict[data_dict[key]] = 1
else:
data_dict[key] = video_hash[key]
if duplicate_file_dict:
duplicate_file = key_with_max_val(duplicate_file_dict)
duplicate_percentage = ((duplicate_file_dict[duplicate_file] / count) * 100)
if duplicate_percentage > 50:
file = file[:-5]
print(file, " is dup of ", duplicate_file)
src = analyzed_dir + file
tgt = duplicate_dir + file
if path.exists(src):
rename(src, tgt)
# else:
# print("File already moved")
def mv_analyzed_file():
files = next(walk(json_dir))[2]
for filename in files:
filename = filename[:-5]
src = input_vid_dir + filename
tgt = analyzed_dir + filename
if path.exists(src):
rename(src, tgt)
# else:
# print("File already moved")
if __name__ == '__main__':
mv_analyzed_file()
multiprocess_video_to_json()
mv_analyzed_file()
duplicate_analyzer()

How to Repeat Table Column Headings over Page Breaks in PDF output from ReportLab

I'm using ReportLab to write tables in PDF documents and am very pleased with the results (despite not having a total grasp on flowables just yet).
However, I have not been able to figure out how to make a table that spans a page break have its column headings repeated.
The code below creates a test.pdf in C:\Temp that has a heading row followed by 99 rows of data.
The heading row looks great on the first page but I would like that to repeat at the top of the second and third pages.
I'm keen to hear of any approaches that have been used to accomplish that using the SimpleDocTemplate.
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Frame, Spacer
from reportlab.lib import colors
from reportlab.lib.units import cm
from reportlab.lib.pagesizes import A3, A4, landscape, portrait
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.lib.enums import TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY
from reportlab.pdfgen import canvas
pdfReportPages = "C:\\Temp\\test.pdf"
doc = SimpleDocTemplate(pdfReportPages, pagesize=A4)
# container for the "Flowable" objects
elements = []
styles=getSampleStyleSheet()
styleN = styles["Normal"]
# Make heading for each column
column1Heading = Paragraph("<para align=center>COLUMN ONE HEADING</para>",styles['Normal'])
column2Heading = Paragraph("<para align=center>COLUMN TWO HEADING</para>",styles['Normal'])
row_array = [column1Heading,column2Heading]
tableHeading = [row_array]
tH = Table(tableHeading, [6 * cm, 6 * cm]) # These are the column widths for the headings on the table
tH.hAlign = 'LEFT'
tblStyle = TableStyle([('TEXTCOLOR',(0,0),(-1,-1),colors.black),
('VALIGN',(0,0),(-1,-1),'TOP'),
('BOX',(0,0),(-1,-1),1,colors.black),
('BOX',(0,0),(0,-1),1,colors.black)])
tblStyle.add('BACKGROUND',(0,0),(-1,-1),colors.lightblue)
tH.setStyle(tblStyle)
elements.append(tH)
# Assemble rows of data for each column
for i in range(1,100):
column1Data = Paragraph("<para align=center> " + "Row " + str(i) + " Column 1 Data" + "</font> </para>",styles['Normal'])
column2Data = Paragraph("<para align=center> " + "Row " + str(i) + " Column 2 Data" + "</font> </para>",styles['Normal'])
row_array = [column1Data,column2Data]
tableRow = [row_array]
tR=Table(tableRow, [6 * cm, 6 * cm])
tR.hAlign = 'LEFT'
tR.setStyle(TableStyle([('BACKGROUND',(0,0),(-1,-1),colors.white),
('TEXTCOLOR',(0,0),(-1,-1),colors.black),
('VALIGN',(0,0),(-1,-1),'TOP'),
('BOX',(0,0),(-1,-1),1,colors.black),
('BOX',(0,0),(0,-1),1,colors.black)]))
elements.append(tR)
del tR
elements.append(Spacer(1, 0.3 * cm))
doc.build(elements)
From the documentation (yes, I know, but it's sometimes hard to locate this stuff in the manual):
The repeatRows argument specifies the number of leading rows that
should be repeated when the Table is asked to split itself.
So when you create the table, this is one of the arguments you can pass, and it will turn the first n rows into header rows that repeat. You'll find this part of the text on page 77, but the section relating to creating a Table starts on page 76.
http://www.reportlab.com/docs/reportlab-userguide.pdf
This is the code I developed, after following Gordon's advice to reconsider using repeatRows, and it works!
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Frame, Spacer
from reportlab.lib import colors
from reportlab.lib.units import cm
from reportlab.lib.pagesizes import A3, A4, landscape, portrait
from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
from reportlab.lib.enums import TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY
from reportlab.pdfgen import canvas
pdfReportPages = "C:\\Temp\\test.pdf"
doc = SimpleDocTemplate(pdfReportPages, pagesize=A4)
# container for the "Flowable" objects
elements = []
styles=getSampleStyleSheet()
styleN = styles["Normal"]
# Make heading for each column and start data list
column1Heading = "COLUMN ONE HEADING"
column2Heading = "COLUMN TWO HEADING"
# Assemble data for each column using simple loop to append it into data list
data = [[column1Heading,column2Heading]]
for i in range(1,100):
data.append([str(i),str(i)])
tableThatSplitsOverPages = Table(data, [6 * cm, 6 * cm], repeatRows=1)
tableThatSplitsOverPages.hAlign = 'LEFT'
tblStyle = TableStyle([('TEXTCOLOR',(0,0),(-1,-1),colors.black),
('VALIGN',(0,0),(-1,-1),'TOP'),
('LINEBELOW',(0,0),(-1,-1),1,colors.black),
('BOX',(0,0),(-1,-1),1,colors.black),
('BOX',(0,0),(0,-1),1,colors.black)])
tblStyle.add('BACKGROUND',(0,0),(1,0),colors.lightblue)
tblStyle.add('BACKGROUND',(0,1),(-1,-1),colors.white)
tableThatSplitsOverPages.setStyle(tblStyle)
elements.append(tableThatSplitsOverPages)
doc.build(elements)
Use the repeatRows=1 when you create the Table...
from reportlab.platypus import Table
Table(data,repeatRows=1)
I always like to have something you can cut & paste into a .py file to run and test. So here it is...
import os
import pandas as pd
import numpy as np
import reportlab.platypus
import reportlab.lib.styles
from reportlab.lib import colors
from reportlab.lib.units import mm
from reportlab.lib.pagesizes import letter, landscape
reportoutputfilepath = os.path.join('.\\test.pdf')
pdf_file = reportlab.platypus.SimpleDocTemplate(
reportoutputfilepath,
pagesize=landscape(letter),
rightMargin=10,
leftMargin=10,
topMargin=38,
bottomMargin=23
)
ts_tables = [
('ALIGN', (4,0), (-1,-1), 'RIGHT'),
('LINEBELOW', (0,0), (-1,0), 1, colors.purple),
('FONT', (0,0), (-1,0), 'Times-Bold'),
('LINEABOVE', (0,-1), (-1,-1), 1, colors.purple),
('FONT', (0,-1), (-1,-1), 'Times-Bold'),
('BACKGROUND',(1,1),(-2,-2),colors.white),
('TEXTCOLOR',(0,0),(1,-1),colors.black),
('FONTSIZE', (0,0),(-1,-1), 8),
]
df = pd.DataFrame(np.random.randint(0,1000,size=(1000, 4)), columns=list('ABCD'))
lista = [df.columns[:,].values.astype(str).tolist()] + df.values.tolist()
#Here is where you put repeatRows=1
table = reportlab.platypus.Table(lista, colWidths=(20*mm, 20*mm, 20*mm, 20*mm),repeatRows=1)
table_style = reportlab.platypus.TableStyle(ts_tables)
table.setStyle(table_style)
elements = []
elements.append(table)
# Build the PDF
pdf_file.build(elements)
print reportoutputfilepath
t1 = Table(lista, colWidths=220, rowHeights=20, repeatRows=1)
just type repeatRows=1
I found this solution to repeat easily the header on a table which is on two pages. Add this line in your CSS for your table:
-fs-table-paginate: paginate;
I also found a class for FPDF which seems powerful (i don't need it for the moment, so I didn't test it)
http://interpid.eu/fpdf-table

Resources