%% Uhlman data processing % Import an image dataset and do some calculations from them % Todd Fallesen, Feb 3, 2022, Calm Facility, Francis Crick Institute %This is a combination of data processing and data filtering, as it was %taking wayyy too long and getting confusing. %%Calculations we want % 1.) Width of the arms from the sigma from fit % 2.) spacing between arms from the seperation of the two centers % 3.) length of the chromsome, by how many rows make up the % calculation...somehow? %Read in the data clear all %clear all variables load("Selected_data_data_struct.mat"); %Load the data tic %Start the timer %% We need to calculate the height of the chromsome. % To calculate the height of the chromosome, we are going to find two % 'border' rows, which is where the width of the chromosome binary image drops below % 70% of the average width of the chromsome binary image. This value can % be changed easily in the variable "width_thresh" % There is another variable, "red_rows" which will store all the rows in an % image where the fit was good enough to be used. The red_rows are so % named, as it was requested that we overlay a red line on the image % wherever there was a row that had a good enough fit. % This first data structure will build on the data structure built in % Uhlman_main.m, giving us the widths of each row, whether or not the data % in each row is good enough to use before filtering out later in the % script width_thresh=0.7; %cut off is 70% of the average Binary width images = {}; %initalize an empty cell set for the images border_rows = []; %initalize an empty list of border rows red_rows = []; %initalize an empty list of red_rows which comes up in the last section of the code for i=1:length(file_struct) for j=1:length(file_struct(i).FitParams) % loop of calculations for each row file_struct(i).FitParams(j).Row_number = j; % specifiy row number. This will come into play for the red_rows try if file_struct(i).FitParams(j).Fitresult.b2-file_struct(i).FitParams(j).Fitresult.b1 > 0 %Save the arms so that the left one is always first. file_struct(i).FitParams(j).width(1,1)=file_struct(i).FitParams(j).Fitresult.c1; %Save c1 as the first arm width file_struct(i).FitParams(j).width(1,2)=file_struct(i).FitParams(j).Fitresult.c2; %Save c2 as the second arm width file_struct(i).FitParams(j).ArmSep = file_struct(i).FitParams(j).Fitresult.b2-file_struct(i).FitParams(j).Fitresult.b1; %Find the seperation between arms else %this loop runs if the fitting algorithm found the right %hand side chromatid as chromatid 1. It reorders them so %it always goes left to right file_struct(i).FitParams(j).width(1,1)=file_struct(i).FitParams(j).Fitresult.c2; %Save c2 as the first arm width file_struct(i).FitParams(j).width(1,2)=file_struct(i).FitParams(j).Fitresult.c1; %Save c1 as the second arm width file_struct(i).FitParams(j).ArmSep = file_struct(i).FitParams(j).Fitresult.b1-file_struct(i).FitParams(j).Fitresult.b2; end catch file_struct(i).FitParams(j).width=0; %If there is no fit, set the width and arm seperation to 0. Can filter these out later file_struct(i).FitParams(j).ArmSep=0; end end %loop for calculations for each image file %Find the border rows, using a Todd-made function called border finder. ImBW = file_struct(i).ImBW; %read in the Binary image from the master data structure image = file_struct(i).ImData; %read in the original, rotated image from the master data structure try border_rows(i,:) = border_finder(ImBW, width_thresh); Good_Image(i) = Image_Format_Check(ImBW); catch border_rows(i,:) = [0,0]; %if border rows can't be calculated, put as zeros, so they can be filtered out later Good_Image(i) = 0; end images{i}=image; %add the image to the cell array. clear image ImBW %clear the variables. %% Add Widths, Border rows to the data structure % binary_widths = extractfield(file_struct(i).FitParams, 'Binary_Width'); %extract the binary width column so we can use it for vectorized filtering file_struct(i).mean_bw_width= mean(nonzeros(extractfield(file_struct(i).FitParams, 'Binary_Width'))); %get the mean of the width of the binary mask, not counting zeros file_struct(i).num_rows_above_threshold = numel(binary_widths(binary_widths>file_struct(i).mean_bw_width*width_thresh)); %total number of rows above the threshold, a proxy for length... file_struct(i).length_from_function = abs(border_rows(i,1)-border_rows(i,2)); %get the length from the border rows file_struct(i).border_rows = border_rows(i,:); %save the border rows to the data structure file_struct(i).Good_image = Good_Image(i); % save the good_image boolean to the image end outname="modified_with_selected_image_data"; %build a filename to keep this set of data struct_name = strcat(outname, '_data_struct.mat'); %save the datastructure out as a file save(struct_name, "file_struct"); toc %stop timer %% This section is for filtering out the data, so we're only left with results that we are confident in clearvars -except struct_name width_thresh %clear variables except the structure name from before load(struct_name); %load the structure from memory %load("modified_with_selected_image_data_data_struct.mat"); %Filter the rows of each image by some variable: % 1.) If the row has a binary width greater than a threshold % 2.) If the intensity is above some threshold (which would be the % amplitiude of the guassians) tic %restart the time amp_thresh=700/2^16; %minimum ampilitude for intensity has to be 700; amp_max=0.99; %max intensity is just wicked high, it can't be inifinity, but can stretch high %%Loop over each image for i=1:length(file_struct) disp('This is file struct number') %just show progress disp(i) binary_widths = extractfield(file_struct(i).FitParams, 'Binary_Width'); %reload list of Binary widths good_entries = (binary_widths > file_struct(i).mean_bw_width * width_thresh)'; %gives logical of values where the width is big enough amplitude1=zeros(length(binary_widths),1); %make a logical vector of zeros for the two amplitudes. We start with zeros, and if the amplitudes meet the criteria, amplitude2=zeros(length(binary_widths),1); %we switch to ones, so when we multiply through by the vector, we keep only the rows where the amplitudes meet the criteria file_struct(i).length_estimation=nnz(good_entries); %this is a wicked rough estimation of length just by how many rows we have. This should be similar to the result from the border rows for j=1:size(file_struct(i).ImData,1) %loop over every row in the image try amplitude1(j)=file_struct(i).FitParams(j).Fitresult.a1; %get the amplitude for the first chromatid fit. NOTE: This is not right/left corrected, as we're using only absolutes here. catch amplitude1(j)=0; %If there is no fit, set equal to zero end try amplitude2(j)=file_struct(i).FitParams(j).Fitresult.a2; %Get the ampiitude for the other chromatid fit. Again, not right/left corrected catch amplitude2(j)=0; %set equal to zero if there is no fit end end % For filtering, keep those rows where the amplitudes are between the % two thresholds good_amp1 = amplitude1 > amp_thresh & amplitude1 < amp_max; good_amp2 = amplitude2 > amp_thresh & amplitude2 < amp_max; %%filter out rows where the arms are way different sizes % We will create a vector (good_widths) of ones that is as long as the % number of rows in the image. For each row, we will check to see if % the wider arms is more than 2x as wide as the smaller arm. If it is, % then we set the value in the good_widths vector to 0, so when it is % multiplied through, the rows where the arms are vastly different % sizes get filtered out good_widths = ones(size(file_struct(i).FitParams, 2),1); %initialize an identity vector of the same length as the number of rows in the image. We will set any row that for j=1:size(file_struct(i).FitParams, 2) if max(file_struct(i).FitParams(j).width)*0.5 > min(file_struct(i).FitParams(j).width) %If the width of the bigger of the two arms is more than double that of the smaller arm, set the value to 0 good_widths(j) = 0; end end %% Multiply together all the filter vectors, to get one filter vector which we can multiply through the table best_rows = logical(good_entries .* good_amp1 .* good_amp2 .* good_widths); file_struct(i).Best_Entries = file_struct(i).FitParams(best_rows); %make a list of the best entries %% Now we can use these good rows to do our calculations try file_struct(i).best_rows = extractfield(file_struct(i).Best_Entries, 'Row_number'); %Extract the rows of the best row into a single vector that we can iterate over catch file_struct(i).best_rows = 1; end try for k=1:size((file_struct(i).Best_Entries),2) %iterate over the best entry rows and find the width of arm1 and arm2. These ARE right/left corrected arm1(k) = file_struct(i).Best_Entries(k).width(1,1); arm2(k) = file_struct(i).Best_Entries(k).width(1,2); arm_total = extractfield(file_struct(i).Best_Entries, 'width'); end file_struct(i).mean_arm1 = mean(arm1); %get the mean of the arm widths from the best entries for each image file_struct(i).mean_arm2 = mean(arm2); file_struct(i).mean_arms_together = mean(arm_total); file_struct(i).sem_arm1 = std(arm1)/sqrt(length(file_struct(i).Best_Entries)); %get the standard errors of the mean of the arm widths for each image file_struct(i).sem_arm2 = std(arm2)/sqrt(length(file_struct(i).Best_Entries)); file_struct(i).sem_arms_together = std(arm_total)/sqrt(length(arm_total)); catch file_struct(i).mean_arm1 = 0; %if there are no good entries for an image, just set the mean and std to 0 file_struct(i).mean_arm2 = 0; file_struct(i).mean_arms_together=0; file_struct(i).sem_arm1 = 0; file_struct(i).sem_arm2 = 0; file_struct(i).sem_arms_together=0; end clear arm1 arm2 binary_widths good_entries good_amp1 good_amp2 best_rows good_widths arm_total end %% Save the data and make some plots. outname="unfiltered_with_selected_image_data"; %Save the data that is filtered . struct_name = strcat(outname, '_data_struct.mat'); save(struct_name, "file_struct"); %check the image only has one edge boundary good_images = cell2mat(extractfield(file_struct, 'Good_image')); file_struct(~good_images)=[]; outname="image_filtered_with_selected_image_data"; %Save the data that is filtered and good. struct_name = strcat(outname, '_data_struct.mat'); save(struct_name, "file_struct"); length = extractfield(file_struct, 'length_from_function'); %Make plottable variables from length, mean of arms 1 and 2, and their standard deviations arm1 = extractfield(file_struct, 'mean_arm1'); arm2 = extractfield(file_struct, 'mean_arm2'); sem_arm1 = extractfield(file_struct, 'sem_arm1'); sem_arm2 = extractfield(file_struct, 'sem_arm2'); mean_arms_total = extractfield(file_struct, 'mean_arms_together'); sem_arms_total = extractfield(file_struct, 'sem_arms_together'); % Generate a plot of length vs arm1 and arm2. We're using fill % outliers here to remove outliers and replace them with the nearest % neighbor value. figure(1) plot(length, filloutliers(arm1, 'nearest'), 'r*') hold on plot(length, filloutliers(arm2, 'nearest'), 'bx') xlabel("Length (pixels)") ylabel("Mean width by Gaussian, pixels") legend('Arm1', 'Arm2') hold off figure(2) errorbar(length, filloutliers(arm1, 'nearest'),sem_arm1, 'r*') hold on errorbar(length, filloutliers(arm2, 'nearest'),sem_arm2, 'bx') hold off xlabel("Length (pixels)") ylabel("Mean width by Gaussian, pixels") legend('Arm1', 'Arm2') figure(3) errorbar(length, filloutliers(arm1, 'nearest'),sem_arm1, 'r*') hold on errorbar(length, filloutliers(arm2, 'nearest'),sem_arm2, 'r*') hold off xlabel("Length (pixels)") ylabel("Mean width by Gaussian, pixels") figure(4) errorbar(length, filloutliers(mean_arms_total, 'nearest'),sem_arms_total, 'b*') xlabel("Length (pixels)") ylabel("Mean width by Gaussian, pixels") toc %%make the montage, where each image is overlaid with two green rows where %%the border is, and red rows over each row that is used to calculate the %%mean images={}; for k=1:size(file_struct,2) images{k}=file_struct(k).ImData_NonNormal; end images = images'; RGB_images = {}; clear k for k=1:size(images) image=images{k}; border_rows = extractfield(file_struct(k), 'border_rows'); red_rows = extractfield(file_struct(k), 'best_rows'); RGB_image = line_drawer(image, red_rows, border_rows); RGB_images{k}=RGB_image; clear image clear border_rows end %the montage function is screwing up the colors somehow... figure(5) montage(RGB_images, 'BorderSize', [150 150]) for i =1:size(file_struct,2) %there is a way quicker way to do this, but I can't remember it and it needs to run once. export_struct(i).Filename = file_struct(i).Filename; export_struct(i).length = file_struct(i).length_from_function; export_struct(i).mean_arm1 = file_struct(i).mean_arm1; export_struct(i).mean_arm2 = file_struct(i).mean_arm2; export_struct(i).mean_arms_total = file_struct(i).mean_arms_together; export_struct(i).sem_arm1 = file_struct(i).sem_arm1; export_struct(i).sem_arm2 = file_struct(i).sem_arm2; export_struct(i).sem_arms_total = file_struct(i).sem_arms_together; export_struct(i).border_rows = file_struct(i).border_rows; export_struct(i).good_rows = file_struct(i).best_rows; end writetable(struct2table(export_struct), 'export_struct.csv');