import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import glob
from skimage.filters import gaussian
import skimage.feature as skfeat
from math import pi
from scipy.optimize import curve_fit
# Define the image channels
CHANNELS = ['R','G','B']
[docs]def count_files(path,file_type):
"""
To count the number of files of a defined extension (filetype on a certain folder (path)
Parameters
----------
path : string
folder name where the images are stored
file_type : string
extension of the files to count (e.g. tif, png, jpg)
Returns
-------
ImageCount : int
number of defined filetype files on the path folder
"""
ImageCount = len(glob.glob1(path,"*."+file_type))
print(path.split('\\')[-1]+' = '+str(ImageCount) + ' files')
return(ImageCount)
[docs]def get_im_data(x_frames,image_count,f_name):
"""
Load image data from a sequence of files
Parameters
----------
x_frames : int
step frames (e.g 10 to use only ten to ten images)
image_count : int
total number of files on the folder (can be obtained with count_files function)
f_name : string
file name pattern including full path where images are stored, e.g. "/folder/image-%04d"
Returns
-------
ImsR,ImsG,ImsB: array_like
data per channel of each image (ImsR -> matrix size = (W,H,image_count/x_frames))
"""
W,H,_ = plt.imread(f_name%0).shape # Measure the image size based on the first image on the folder
NT = int(image_count/x_frames)
ImsR = np.zeros((W,H,NT))
ImsG = np.zeros((W,H,NT))
ImsB = np.zeros((W,H,NT))
for i in range(0,NT):
im = plt.imread(f_name%(i*x_frames))
ImsR[:,:,i] = im[:,:,0] # Last number code the channel: 0=red, 1=green, 2=blue
ImsG[:,:,i] = im[:,:,1]
ImsB[:,:,i] = im[:,:,2]
return(ImsR,ImsG,ImsB)
# at call you can take only the channels you are interested in (e.g.):
# red,_,blue=get_im_data(xframes,imagecount) ---> this only takes the red and blue channels
[docs]def time_vector(data, x_frames, dt):
"""
Get the vector of times for the image sequence loaded
Parameters
----------
data : dictionary
dictionary with the R G B data of all images
xframes : int
step frames used on the analysis (e.g 10 means you are using one every ten to ten images)
dt : double
time step of the frames in hour units. It can be obtained from the file used to perform the timelapse.
Returns
-------
T: array_like
Time vector for the used data (hour units)
"""
_,_,LT = data[CHANNELS[0]].shape # Length of time vector
T = np.zeros((LT))
for i in range(0,LT):
T[i] = (i)*x_frames*dt
return(T)
[docs]def bg_value(x1, x2, y1, y2, data, im_count):
"""
compute the background mean value for each channel and frame based on a rectagle
defined by the user. Plot the rectangle over the image and makes plots of each channel
mean background value over time
Parameters
----------
x1,x2,x1,x2: int values
rectangle area limits: (x1,y1) = left-up corner. (x2,y2) = rigth-bottom corner
data : dictionary
R G B images data to get the background, and his names on data['Im']
im_count : int
total number of files on the folder (can be obtained with count_files function)
Returns
-------
bg: dictionary
Background mean value of each channel for every time frame
"""
X2R = x2-x1 #convert on steps because the rectangle patch definition
Y2R = y2-y1
#plot the defined area
plt.figure(figsize=(8,8))
fig = plt.gcf()
ax = fig.gca()
Im = plt.imread(data['Im']%(im_count-1))
ax.imshow(Im)
rect = matplotlib.patches.Rectangle((y1,x1), Y2R, X2R, linewidth=1, edgecolor='r', facecolor='none')
ax.add_patch(rect)
#get the mean background value at each time for each channel and plot it
BG = {}
LColors = ['r','g','b'] # each color will be for each line in the plot
count = 0
plt.figure()
for chan in CHANNELS:
BG[chan] = data[chan][x1:x2,y1:y2,:].mean(axis=(0,1))
plt.plot(BG[chan][:],LColors[count])
count += 1
plt.xlabel('Time step')
plt.ylabel('Fluorescence intensity')
return(BG)
[docs]def bg_subst(data, bg):
"""
Substract the mean background value for each channel and frame obtained with BG_Val function.
Parameters
----------
data: dictionary
R G B images data
bg : array
ackground mean value of each channel for every time frame (can be obtained with BG_Val function)
Returns
-------
Data: dictionary
R G B images data with the background substracted
"""
L = bg[CHANNELS[0]].shape[0]
S1,S2,_ = data[CHANNELS[0]].shape
for c in CHANNELS:
for i in range(0,L):
BGM = np.ones((S1,S2))
BGM = BGM*bg[c][i] #create a matrix with bg to substract it to the frame
Data = data[c][:,:,i]
Data = Data-BGM #perform the substraction
Data[Data<0] = 0 # values < 0 are not allowed --> transform it to 0
data[c][:,:,i] = Data #actualize Data
return(data)
[docs]def data_sum_time(data):
"""
Sum the data for each pixel over time
Parameters
----------
Data: dictionary
R G B images data
Returns
-------
SData: array like
Sum data over time and over channels for each pixel of the Data
"""
SData = data[CHANNELS[0]][:,:,:].sum(axis=(2))+data[CHANNELS[1]][:,:,:].sum(axis=(2))+data[CHANNELS[2]][:,:,:].sum(axis=(2))
plt.imshow(SData)
plt.colorbar()
plt.title('All channels')
return(SData)
[docs]def smooth_data(data,sigma):
"""
Apply gaussian filter to smooth each frame data
Parameters
----------
data: dictionary
4 dimensional (R,G,B, and Time) matrix with the data
sigma: double
Filter parameter (standard deviation)
Returns
-------
NSIms: dictionary
Sum over time of Smoothed data per channel (call it nsims[channel][r,c])
NSImsAll: array_like
Matrix with sum of nsims over the channels (call it nsimsAll[r,c])
SImsT: dictionary
Smoothed data per channel per frame (call it as simsT[channel][r,c,f])
"""
NSIms = {}
NSIms_All = np.zeros((data[CHANNELS[0]].shape[0],
data[CHANNELS[0]].shape[1]))
SImsT = {}
plt.figure(figsize=(17,3))
POS_VECT = [131,132,133] # figure position vector
count = 0
for c in CHANNELS:
# apply filter
Data_Sum = data[c].sum(axis=2)
SIms = gaussian(Data_Sum, sigma)
NSIms [c] = (SIms-SIms.min())/(SIms.max()-SIms.min())
NSIms_All += NSIms[c]
Maux = np.zeros((data[CHANNELS[0]].shape))
for fr in range(data[c].shape[-1]):
Maux[:,:,fr] = gaussian(data[c][:,:,fr], sigma)
SImsT[c] = Maux
# make plot of the sum over time of smoothed data per channel
plt.subplot(POS_VECT[count])
plt.imshow(NSIms[c])
plt.colorbar()
plt.title(c+' channel')
count += 1
return(NSIms,NSIms_All,SImsT)
[docs]def colony_blobs_id(data, thresh, im_name, filename='null'):
"""
Use skimage to identify the position of each colony and define the circular region
used by each of them
Parameters
----------
data: array of single channel image data
thresh:
Pixel values > thresh are included in the analysis, range (0,1)
im_name:
Name of an image on which to overlay colony positions and sizes
filename: string
filename with whom save the output image+blobs+ID
Returns
-------
A: array (Nx3)
Contains the (x,y) position and size of each blob for each of N colonies detected
"""
A = skfeat.blob_log(data, min_sigma=1.0, max_sigma=10.0, num_sigma=100,
threshold=thresh, overlap=0.8)
plt.figure(figsize=(8,8))
plt.imshow(data, cmap='gray')
#plt.hold(True)
plt.title('Sumarized Image')
for i in range(len(A)):
circle = plt.Circle((A[i,1], A[i,0]), 2*A[i,2], color='r', fill=False ,
lw=0.5)
fig = plt.gcf()
ax = fig.gca()
ax.add_artist(circle)
plt.figure(figsize=(8,8))
plt.imshow(plt.imread(im_name))
#plt.hold(True)
plt.title('Over '+ im_name)
for i in range(len(A)):
# plot the circle area identified for each colony
circle = plt.Circle((A[i,1], A[i,0]), 2*A[i,2], color='w', fill=False , lw=0.5)
fig = plt.gcf()
ax = fig.gca()
ax.add_artist(circle)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
# attach the ID label to each colony
plt.annotate(i, xy=(A[i,1], A[i,0]), xytext=(-2, 2),
textcoords='offset points', ha='right', va='bottom',
color='white')
if filename != 'null':
plt.savefig(str(filename) + ".pdf", transparent=True)
return(A)
[docs]def obtain_rois(data,blobs):
"""
Based on the information of each identified colony, create arrays to contain
the regions of interest (ROI) around each one.
Parameters
----------
data: dictionary
R G B image data per frame
blobs: array like
Array of colony positions and sizes given by skimage in colonyBlob()
Returns
-------
all_rois:
The ROI array image data for square region around colony position of side 2*(colony size)
to call it: all_rois['channel_name'][blob_number][y,x,timepoint]
all_rois_circle:
The ROI array image data only within circle (radius = width/2), with the data outside the circle equal to zero.
The size of the array is equal to square ROIS (all_rois) size.
to call it: all_rois_circle['channel_name'][blob_number][y,x,timepoint]
nc:
Number of colonies analysed (length of returned arrays)
"""
all_rois = {}
all_rois_circle = {}
nc = len(blobs)
for char in CHANNELS:
rois = {}
rois_circle = {}
for i in range(nc):
x = blobs[i,0]
y = blobs[i,1]
r = 2*blobs[i,2] # blobs[i,2] is the std deviation of the radius
# --> r=2*std implies 95% confidence
####### this lines are to eliminate the out of image bounds error
x1 = int(round(x-r))
x2 = int(round(x+r+1)) #plus 1 because slice working
y1 = int(round(y-r))
y2 = int(round(y+r+1)) #plus 1 because slice working
if x1 < 0:
x1 = 0
if x2 >= data[char].shape[0]:
x2 = data[char].shape[0]-1
if y1 < 0:
y1 = 0
if y2 >= data[char].shape[1]:
y2 = data[char].shape[1]-1
if x2>x1 and y2>y1:
print('ROI','x1','x2','y1','y2')
print(i,x1,x2,y1,y2)
rois[i] = data[char][x1:x2,y1:y2,:]
else:
rois[i] = []
#######
xr = int((rois[i].shape[0]+1)/2)
yr = int((rois[i].shape[1]+1)/2)
rois_circle[i] = np.zeros((rois[i].shape))
for n in range(rois[i].shape[0]):
for m in range(rois[i].shape[1]):
if ((n-xr)**2+(m-yr)**2) <= (r**2):
rois_circle[i][n,m,:] = rois[i][n,m,:]
all_rois[char] = rois
all_rois_circle[char] = rois_circle
return(all_rois,all_rois_circle,nc)
# rois contains a square arund the colony
# rois_circle makes the values outside the colony boundaries equals to zero
[docs]def channels_sum(rois_data, cv):
"""
Compute the sum over the RGB channels for each image
Parameters
----------
rois_data: dictionary
RGB time-lapse image data of each ROIS, from obtain_rois()
cv: vector
contain the ID of the of colonies analysed
Returns
-------
sum_chan_rois: dictionary
Sum of channels for each time step and ROI
"""
sum_chan_rois = {}
for i in cv:
sum_chan_rois[i] = np.zeros((rois_data[CHANNELS[0]][i].shape))
for c in CHANNELS:
for i in cv:
sum_chan_rois[i] += rois_data[c][i][:,:,:]
return(sum_chan_rois)
[docs]def frame_colony_radius(rois, cv, thr, min_sig=0.5, max_sig=10, num_sig=200):
"""
Get the colony radius at each time step
Parameters
----------
rois: dictionary
ROI image data from obtain_rois()
cv: vector
contain the ID of the of colonies analysed
thr: double
Threshold for skfeat.blob_log
min_sig: double
minimum value of sigma used on skfeat.blob_log
max_sig: double
maximum value of sigma used on skfeat.blob_log
num_sig: int
number of sigma values used between min_sig and max_sig on skfeat.blob_log
Returns
-------
R: dictionary
The time series of colony size, indexed by colony id number
"""
R = {}
nt = rois[cv[0]].shape[2]
for k in cv:
R[k] = np.zeros((nt,))
for i in range(nt):
troi = rois[k][:,:,i].astype(np.float32)
if len(troi):
nt_roi = (troi-troi.min())/(troi.max()-troi.min())
AA = skfeat.blob_log(nt_roi, min_sigma=min_sig,
max_sigma=max_sig, num_sigma=num_sig,
threshold=thr, overlap=0.8)
#AA = skfeat.blob_log(nt_roi, min_sigma=0.1, max_sigma=6.0, num_sigma=150, threshold=thr, overlap=0.8)
if len(AA)>0:
R[k][i] = AA[0,2]
return(R)
[docs]def area(r, cv, T, filename='null'):
"""
Compute and plot the colonies area over time as a perfect circle (using
the input radius value) around the colony position value
Parameters
----------
r: dictionary
colony radius at each time step of the selected colony (obtained with frame_colony_radius() function)
cv: vector
colonies ID vector to plot
T: vector
the vector of real time values
filename: string
filename to save the plot generated
Returns
-------
A: dictionary
colony area at each time step of the selected colony. Call it as: A[colonyID][time step]
"""
plt.figure()
A = {}
for i in cv:
R = r[i]
A[i] = pi*R*R
plt.plot(T,A[i],'.',label='colony '+str(i))
if filename != 'null':
#plt.savefig("KymoGraph.pdf", transparent=True)
plt.savefig(str(filename)+".pdf", transparent=True)
return(A)
[docs]def f_sigma(t, a, b, c):
"""
Compute the sigmoide function value using the given input values
Parameters
----------
t: vector
independent variable ( "x axis", suposed to be time)
a: double
maximum value parameter
b: double
function parameter
c: double
delay parameter
Returns
-------
function evaluation
"""
return((a /(1+np.exp(-(t+b)*c))))
#return((a /(1+np.exp(-(t+b)*c)))+d)
[docs]def function_fit(xdata, ydata, init, end, cv, func=f_sigma,
param_bounds=([1,-np.inf,0.1],[np.inf,-1,1])):
"""
Fit a given function to given data
Parameters
----------
xdata: vector
independent variable ( "x axis", suposed to be time vector)
ydict: array like
array of dependent variable vectors
init: double
point on the time vector to init the fitting
end: double
point on the time vector to end the fitting
cv: vector
contain the ID of the colonies to analyse
func: function
function to be fitted
param_bounds: array of vectors
lower and upper bounds of each parameters
para_bounds=([lower bounds],[upper bounds])
Returns
-------
Y_fit: dictionay
contain the fitting result for each colony in the dictionary.
It is:
Y_fit[col ID][evalF z]:
evalF: vector
result vector of the fitted function:
evalF=func(xdata, optimal_parameters)
z: vector
fitted parameters
"""
Y_fit = {}
for i in cv:
z,_ = curve_fit(func, xdata[init:end], ydata[i][init:end],
bounds=param_bounds)
print(z)
evalF = func(xdata,z[0],z[1],z[2])
plt.plot(xdata, ydata[i], '.',xdata, evalF, '-')
plt.title('Colony '+str(i))
plt.show()
Y_fit[i] = evalF,z
return(Y_fit)
[docs]def croi_mean_int_frames(data, blobs, radii, cv):
"""
compute the mean intensity values for each time and channels for each CROI
(circular ROI), redefining the ROIS based on radii values
It takes the fit radius value at each time (radii), with it defines a
circular ROI, sum all the pixel values inside them and divide this value
for the number of pixel considered. --> obtain the intensity mean value
inside the colony limits on each time.
Parameters
----------
data: dictionary
RGB dictionary with the images data
blobs: array like
contains the information of identified blobs
radii: dictionary
contains the radius for each colony on each time step
cv: vector
contain the ID of the colonies to analyse
Returns
-------
all_chan_crois_mean_val: dictionary
contain the mean pixel value of each channel for each time step of each colony.
call it as: all_chan_crois_mean_val['channel_name'][blob_number][timepoint]
"""
all_chan_crois_mean_val = {}
for char in CHANNELS:
crois_mean_val = {}
for i in cv:
#x and y are the colony center pixel stored on blobs
x = blobs[i,0]
y = blobs[i,1]
CRoi_int = 0
count = 0
meanInt = np.zeros((len(radii[i])))
for j in range(len(radii[i])):
####### this lines is to eliminate the out of image bounds error
r = radii[i][j]
x1 = int(round(x-r))
x2 = int(round(x+r+1))
y1 = int(round(y-r))
y2 = int(round(y+r+1))
if x1 < 0:
x1 = 0
if x2 >= data[char].shape[0]:
x2 = data[char].shape[0]-1
if y1 < 0:
y1 = 0
if y2 >= data[char].shape[1]:
y2 = data[char].shape[1]-1
SRoi = data[char][x1:x2,y1:y2,j]
#######
xr = int((SRoi.shape[0]+1)/2)
yr = int((SRoi.shape[1]+1)/2)
for n in range(SRoi.shape[0]):
for m in range(SRoi.shape[1]):
if ((n-xr)**2+(m-yr)**2) <= (r**2):
CRoi_int += SRoi[n,m]
count += 1
if count != 0:
meanInt[j] = CRoi_int/count
crois_mean_val[i] = meanInt
all_chan_crois_mean_val[char] = crois_mean_val
return(all_chan_crois_mean_val)
[docs]def f_mu (t, b, d):
"""
compute the grwoth rate (mu) function value
Parameters
----------
t: int or vector
independent variable values (suposed to be time vector)
b: double
functon parameter
c: double
function parameter
Returns
-------
evaluated "mu" fucntion with the given parameters
"""
return((d /(np.exp(d*(t+b))+1)))
[docs]def f_linear(x, a, b):
"""
compute the linear function value with given parameters
Parameters
----------
x: int or vector
independent variable values
a: double
slope parameter
b: double
y-intercept parameter
Returns
-------
evaluated linear fucntion with the given parameters for the given x
"""
return(a * x + b)
[docs]def linear_fit(data1, data2, filename='null'):
"""
Fit linear function (f_linear) to given data, display the fited function
and make a plot of the result. You are able to save the resulting plot by
given as input the "filename" to save it.
Parameters
----------
data1: vector
independent variable ( "x axis") to be used as input of f_linear
data2: vector
"y-data values" used as reference to peform the fitting
filename: string
name of the image file if it is desired to save it.
Returns
-------
z: vector
fitted parameters
"""
z,_ = curve_fit(f_linear, data1, data2, bounds=([0,-np.inf], np.inf))
#print(z) #first component is the slope
p = np.poly1d(z)
print(np.poly1d(p))
xp = np.linspace(data1.min(), data1.max(), 2)
#plt.plot(timeC[init:end], ratio[init:end,i], '.', xp, p(xp), '-')
plt.figure()
axisMax = np.max([np.max(data1), np.max(data2)])
axisMin = np.min([np.min(data1), np.min(data2)])
plt.axis([axisMin, axisMax, axisMin, axisMax])
plt.plot(data1, data2, '.', xp, p(xp), '-')
if filename != 'null':
#plt.savefig("FluorIntRGB.pdf", transparent=True)
plt.savefig(str(filename) + ".pdf", transparent=True)
return(z)
[docs]def colony_classifier(fit, classes, chanx_dat, chany_dat):
"""
Classify chanx_dat and chany_dat (which correspond to the data serie being
classified) on the classes names given as inputs. The classification is
on the small chany_dat distance value with the value computed with the
chanx_dat and each fitted function (that are on "fit"). In other words,
accord the minimal y-coordinate distance between each dot and fitted lines.
Parameters
----------
fit: array like
each position on the array cointains the parameters of the linear
fit of each categorie. fit = [z1, z2, z3] where z is the return of
linear_fit.
classes: string array
contain the names of the defined categories (its length have to be
same long as fit)
chanx_dat: vector
data of the channel on x axis for the data to be classified
chany_dat: vector
data of the channel on y axis for the data to be classified
Returns
----------
clas: list
contain the category of each classified colony in order.
e.g. clas = ['cat3', 'cat1, 'cat1', 'cat1', 'cat2', etc ...]
clas_dict: dictionary
contain the channel value of the colonies of each category in the
corresponding dictinary class. clas_dict = ['class'][chan_xdat,
chany_dat, boolean]. The boolean vector have the length of the
total colony analyzed, and indicate (with True) which colonies
correspond to that category.
"""
CAT_NUM = len(fit) # number of categories
y = np.zeros(CAT_NUM)
d = np.zeros(CAT_NUM)
clas = np.zeros(len(chanx_dat))
clas_dict = {}
# evaluate if have same number of classes as linear fits
if CAT_NUM == len(classes):
# compute the difference between the straight lines categories and the
# colonies being classified
for i in range(len(chanx_dat)):
for j in range(CAT_NUM):
y[j] = fit[j][0]*chanx_dat[i]+fit[j][1]
d[j] = (y[j]-chany_dat[i])*(y[j]-chany_dat[i])
# find the minimal difference value
mindif = np.min(d)
# perform the classification
TOKEN = 0
count = 0
while TOKEN == 0:
if mindif == d[count]:
clas[i] = count
TOKEN = 1
count += 1
# store the data in a dictionary of categories
for n in range(len(classes)):
clas_dict[classes[n]]=[chanx_dat[clas==n],chany_dat[clas==n],clas[:]==n]
# save a list with the corresponding string category name of each element in clas
roi_clas = []
for i in range(len(clas)):
roi_clas.append(classes[int(clas[i])])
return(roi_clas, clas_dict)
else:
print('\nERROR: classes have to be same length as fits\n')
# End