(* Load libraries and prepare session *) << LinearAlgebra`MatrixManipulation`; << NumericalMath`TrigFit`; << Graphics`Graphics`; << Graphics`Arrow`; Off[General::"spell"]; Off[General::"spell1"]; (* Read Yeast Data *) stream = "/home/lom/tmp/doublecheck_gsvd/Yeast.txt"; matrix = ReadList[stream, Word, RecordLists -> True, NullWords -> True]; {genes, arrays} = Dimensions[matrix] - {2, 6} Clear[stream]; genenames = TakeRows[ TakeColumns[matrix, {1, 6}], {3, genes + 2}]; arraynames = TakeColumns[ TakeRows[matrix, {1, 2}], {7, arrays + 6}]; matrix = TakeColumns[ TakeRows[matrix, {3, genes + 2}], {7, arrays + 6}]; matrix = ToExpression[matrix]; sizes = Flatten[ Table[ Dimensions[ Characters[ ToString[arraynames[[2, a]] ]]], {a, 1, arrays}]]; size = Sort[sizes, OrderedQ[{#2, #1}] &][[1]]; Do[ Do[arraynames[[2, a]] = StringJoin[ToString[arraynames[[2, a]]], " "], {b, 1, size - sizes[[a]]}], {a, 1, arrays}]; (* Count and locate Null Data *) counter = Table[Dimensions[Position[matrix[[a]], Null]][[1]], {a, 1, genes}]; Clear[positions]; positions = Table[0, {a, 1, arrays + 1}] Do[ positions[[a]] = Flatten[ Position[Flatten[counter], a - 1]], {a, 1, arrays + 1}]; numbers = Flatten[ Table[ Dimensions[positions[[a]]], {a, 1, Round[arrays*0.2]}]] (* Create Display Of Gene Position Of Null Data *) framex = Table[{a, a - 1}, {a, 1, Round[arrays*0.2]}]; framey = {500, 1000, 1500, 2000, 2500, 3000}; labelx = ColumnForm[{"Number of Arrays"}, Center]; labely = ColumnForm[{"Number of Genes"}, Center]; g = BarChart[numbers, Frame -> True, Axes -> False, FrameLabel -> {labelx, labely, None, None}, FrameTicks -> {framex, framey, None, None}, GridLines -> {None, None}, PlotRange -> {{0.5, Round[arrays*0.2] + 0.5}, {0, 3000}}, DisplayFunction -> Identity]; g = FullGraphics[g]; g[[1, 2]] = g[[1, 2]] /. Text[labely, {b_, c_}, {1., 0.}] -> Text[labely, {b - 0.75, c}, {0, 0}, {0, 1}]; g[[1, 2]] = g[[1, 2]] /. Text[labelx, {b_, c_}, {0., 1.}] -> Text[labelx, {b, c - 400}, {0, 1}, {1, 0}]; g[[1, 2]] = g[[1, 2]] /. Text[a_, {b_, c_}, {0., 1.}] -> Text[a, {b, c - 200}, {0, 0}, {0, 1}]; Show[g, PlotRange -> All] (* Select Genes with no Missing Data Points *) matrix = AppendRows[Table[{counter[[a]]}, {a, 1, genes}], genenames, matrix]; matrix = Sort[matrix, OrderedQ[{#1, #2}] &]; fullgenenames = TakeColumns[ TakeRows[matrix, {1, numbers[[1]]}], {2, 7}]; fullmatrix = TakeColumns[ TakeRows[matrix, {1, numbers[[1]]}], {8, arrays + 7}]; (* Calculate SVD *) {eigenarrays, eigenexpressions, eigengenes} = SingularValues[fullmatrix]; eigenarrays = Transpose[eigenarrays]; fractions = eigenexpressions^2/Sum[eigenexpressions[[a]]^2, {a, 1, arrays}] entropy = -N[ Sum[fractions[[a]]*Log[fractions[[a]]], {a, 1, arrays}]/Log[arrays]] entropy = N[Round[100*entropy]/100] (* Create Fractions Bar Charts Displays *) limit = 0.01; Clear[gridx, framex, framey, sizes]; gridx = Table[a, {a, 0, limit, N[limit/5]}]; framex = gridx; sizes = Flatten[ Table[ Dimensions[ Characters[ ToString[framex[[a]] ]]], {a, 1, 6}]]; Do[ Do[framex[[a]] = StringJoin[ToString[framex[[a]]], " "], {b, 1, 5 - sizes[[a]]}], {a, 1, 6}]; framex = Table[{gridx[[a]], framex[[a]]}, {a, 1, 6}]; gridx = Table[{gridx[[a]], RGBColor[0, 0, 0]}, {a, 1, 6}]; framey = Table[{a + 1, arrays - a - 6}, {a, 0, 12 - 3}]; table = Table[fractions[[arrays - a]], {a, 6, arrays - 3}]; g = BarChart[table, BarOrientation -> Horizontal, PlotRange -> {{0, limit*1.0001}, {0.5, 12 - 2 + 0.5}}, AspectRatio -> 1, Axes -> False, Frame -> True, FrameTicks -> {None, framey, framex, None}, FrameLabel -> {None, None, None, None}, GridLines -> {gridx, None}, DisplayFunction -> Identity]; g = FullGraphics[g]; g[[1, 2]] = g[[1, 2]] /. Text[a_, {b_, c_}, {0., -1.}] -> Text[a, {b, c + 1.75}, {0, 0}, {0, 1}]; g1 = Show[g, AspectRatio -> 1.25, PlotRange -> All, DisplayFunction -> Identity]; g = BarChart[table, BarOrientation -> Horizontal, PlotRange -> {{0, limit*1.0001}, {0.5, 12 - 2 + 0.5}}, AspectRatio -> 1, Axes -> False, Frame -> True, FrameTicks -> {None, framey, framex, None}, FrameLabel -> {None, None, None, None}, GridLines -> {gridx, None}, DisplayFunction -> Identity]; g = FullGraphics[g]; g[[1, 2]] = g[[1, 2]] /. Text[a_, {b_, c_}, {0., -1.}] -> Text[a, {b, c + 1.75}, {0, 0}, {0, 1}]; g1 = Show[g, AspectRatio -> 1.25, PlotRange -> All, DisplayFunction -> Identity]; (* Red & Green Raster Display *) contrast = 3.5; displaying = Table[ If[contrast*eigengenes[[i, j]] > 0, If[ contrast*eigengenes[[i, j]] < 1, {contrast*eigengenes[[i, j]], 0}, {1, 0}], If[ contrast*eigengenes[[i, j]] > -1, {0, -contrast* eigengenes[[i, j]]}, {0, 1}]], {i, 1, arrays}, {j, 1, arrays}]; framex = Table[{a - 0.5, arraynames[[2, a]]}, {a, 1, arrays}]; framey = Table[{a + 1 - 0.5, arrays - a}, {a, 0, arrays - 1}]; labely = "Eigengenes"; labelx = ColumnForm[{"(a) Arrays", " ", " "}, Center]; g = Show[ Graphics[ RasterArray[ Table[ RGBColor[displaying[[i, j, 1]], displaying[[i, j, 2]], 0], {i, arrays, 1, -1}, {j, 1, arrays}]]], AspectRatio -> 1, Frame -> True, FrameTicks -> {None, framey, framex, None}, FrameLabel -> {None, labely, labelx, None}, DisplayFunction -> Identity]; g = FullGraphics[g]; g[[1, 2]] = g[[1, 2]] /. Text[labely, {b_, c_}, {1., 0.}] -> Text[labely, {b - 3, c}, {0, 0}, {0, 1}]; g[[1, 2]] = g[[1, 2]] /. Text[labelx, {b_, c_}, {0., -1.}] -> Text[labelx, {b, c + 3}, {0, -1}, {1, 0}]; g[[1, 2]] = g[[1, 2]] /. Text[a_, {b_, c_}, {0., -1.}] -> Text[a, {b, c + 2.7}, {0, 0}, {0, 1}]; g1 = Show[g, AspectRatio -> 1.05, PlotRange -> All, DisplayFunction -> Identity]; (* Create Selected Eigengenes Graph Display *) labelx = ColumnForm[{"(c) Arrays"}, Center]; labely = ColumnForm[{" ", "Expression Level"}, Center]; framex = Table[{a - 1, arraynames[[2, a]]}, {a, 1, arrays}]; framey = {-1, -0.5, 0, 0.5, 1}; coordinates = Table[{a - 1, eigengenes[[1, a]]}, {a, 1, arrays}]; points = Table[Point[coordinates[[a]]], {a, 1, arrays}]; line = Line[coordinates]; g = Show[ {Graphics[{RGBColor[1, 0, 0], PointSize[0.022], points}], Graphics[{RGBColor[1, 0, 0], line}]}, Frame -> True, FrameLabel -> {None, labely, labelx, None}, GridLines -> {None, {{0, RGBColor[0, 0, 0]}}}, FrameTicks -> {None, framey, framex, None}, PlotRange -> {-1.05, 1.05}, DisplayFunction -> Identity]; g = FullGraphics[g]; g[[1, 2]] = g[[1, 2]] /. Text[labely, {b_, c_}, {1., 0.}] -> Text[labely, {b - 5.4, c}, {0, 0}, {0, 1}]; g[[1, 2]] = g[[1, 2]] /. Text[labelx, {b_, c_}, {0., -1.}] -> Text[labelx, {b, c + 0.625}, {0, -1}, {1, 0}]; g[[1, 2]] = g[[1, 2]] /. Text[a_, {b_, c_}, {0., -1.}] -> Text[a, {b, c + 0.25}, {0, 0}, {0, 1}]; p1 = Show[g, AspectRatio -> 1.05, PlotRange -> All, DisplayFunction -> Identity]; labelx = ColumnForm[{"(c) Arrays"}, Center]; labely = ColumnForm[{" ", "Expression Level"}, Center]; framex = Table[{a - 1, arraynames[[2, a]]}, {a, 1, arrays}]; framey = {-1, -0.5, 0, 0.5, 1}; coordinates = Table[{a - 1, eigengenes[[2, a]]}, {a, 1, arrays}]; points = Table[Point[coordinates[[a]]], {a, 1, arrays}]; line = Line[coordinates]; g = Show[ {Graphics[{RGBColor[0, 0, 1], PointSize[0.022], points}], Graphics[{RGBColor[0, 0, 1], line}]}, Frame -> True, FrameLabel -> {None, labely, labelx, None}, GridLines -> {None, {{0, RGBColor[0, 0, 0]}}}, FrameTicks -> {None, framey, framex, None}, PlotRange -> {-1.05, 1.05}, DisplayFunction -> Identity]; g = FullGraphics[g]; g[[1, 2]] = g[[1, 2]] /. Text[labely, {b_, c_}, {1., 0.}] -> Text[labely, {b - 5.4, c}, {0, 0}, {0, 1}]; g[[1, 2]] = g[[1, 2]] /. Text[labelx, {b_, c_}, {0., -1.}] -> Text[labelx, {b, c + 0.625}, {0, -1}, {1, 0}]; g[[1, 2]] = g[[1, 2]] /. Text[a_, {b_, c_}, {0., -1.}] -> Text[a, {b, c + 0.25}, {0, 0}, {0, 1}]; p2 = Show[g, AspectRatio -> 1.05, PlotRange -> All, DisplayFunction -> Identity]; (* Display Selected Eigengenes *) g3 = Show[{p2, p1}, DisplayFunction -> Identity] (* Display Eigengenes, Fractions and Selected Eigengenes *) Show[GraphicsArray[{g1, g2, g3}], GraphicsSpacing -> -0.15]

%clear all previous variables clear; format compact %load the yeast data load /home/lom/tmp/doublecheck_gsvd/yeast.mat [nGenes, nExps] = size(data) %Count and locate NaN Data nNanPerExp=zeros(nExps,1); for i=0:nExps-1 nNanPerExp(i+1) = (sum(sum(isnan(data),2)==i)); end %Create and Display nummer of missing values bar(0:nExps-1 ,nNanPerExp); axis([-.5 3.5 0 3000]) xlabel('Number of Arrays') ylabel('Number of Genes') %Take the only rows without missing values idx = sum(isnan(data),2)==0; fullmatrix = data(idx,:); fullgeneNames = geneNames(idx,:); %Calculate SVD [U S V] = svd(fullmatrix, 0); S=diag(S)' fractions = S.^2/sum(S.^2); entropy = -sum(fractions.*log(fractions))/log(nExps) %plot barchart subplot(1,3,2) ; barh([1:nExps],fractions, 'r') title('(b) Expresion Fraction') axes('position', [.45 .3 .16 .62]) barh([3:12], fractions(3:12), 'r') set(gca, 'Color', [1 1 .5]) %plot rasterplot subplot(1,3,1); expNames(expNames=='_')=' '; rasterplot(V', expNames) title('(a) Arrays') colormap(redgreen) xticklabel_rotate %plot two first eigengenes subplot(1,3,3); plot(V(:,1:2), '-o'); title('(c) Arrays') ylabel('Expression Level') set(gca, 'XTick', 1:nExps, 'XTickLabel', expNames) xticklabel_rotate