Info
This question is closed. Reopen it to edit or answer.
Could anyone please help me to group the users which are being at a maximum distances to be grouped together.
2 views (last 30 days)
Show older comments
code:
clear all;
clc;
PPP=[1.7718 2.4035;
1.3717 1.8971;
-0.0575 1.2565;
1.5720 0.5264;
2.0790 0.5114 ]
minClusterSize = 1;
method = 'point';
D = pdist(PPP)
maxdist =10;
kmax = size(PPP,1);
indexPairs = 0;
index = 1;
for k = 1:kmax
indexPairs = indexPairs + kmax-k;
if (k < kmax)
index = [index; indexPairs+1];
end
end
pairs = ones(indexPairs, 2); % column 1 = point1, column2 = point2
for k = 2:length(index )
pairs(index(k-1):index(k)-1,:) = [ (k:kmax)' , (k-1) * ones(kmax-(k-1),1 ) ];
end
[Ds,IX] = sort(D,'descend')
pairs_sorted = pairs(IX,:)
clusters = {}; % list of clusters (one cell per cluster)
pointsInClusters = []; % keep track of which points are already in a cluster (row 1 = cluster number, row 2 = points in cluster)
inc = 0 ;% increment to keep track of the cluster number
for k = 1:numel(pairs(:,1))
k
pt1 = pairs_sorted(k,1)
pt2 = pairs_sorted(k,2)
d = Ds(k) % distance between pt1 and pt2
maxdist
if d > maxdist
if isempty(pointsInClusters) || ...
( ~ismember(pt1,pointsInClusters(2,:)) && ...
~ismember(pt2,pointsInClusters(2,:)))
isempty(pointsInClusters) || ...
( ~ismember(pt1,pointsInClusters(2,:)) && ...
~ismember(pt2,pointsInClusters(2,:)))
inc = inc + 1 % increment + 1 cluster
currentCluster = [pt1,pt2]
clusters = [ clusters ; {currentCluster} ] % create new cluster.
pointsInClusters = [ pointsInClusters , ...
[ inc * ones(1,length(currentCluster)) ; currentCluster ] ]
%==========================================
elseif ~ismember(pt1,pointsInClusters(2,:))
~ismember(pt1,pointsInClusters(2,:))
id = pointsInClusters(1,pointsInClusters(2,:)==pt2); % find the number of the previous cluster where the point 'pt2' already exists.
switch ['distance to ',lower(method)]
case 'distance to centroid'
cdist = sqrt( sum((mean(A(clusters{id},:),1) - A(pt1,:)).^2) ) % distance between 'pt1' and cluster centroid.
case 'distance to geometric median'
cdist = sqrt( sum((median(A(clusters{id},:),1) - A(pt1,:)).^2) ) % distance between 'pt1' and cluster median point.
otherwise
cdist = []
end
if isempty(cdist) || cdist > mindist % add 'pt1' to cluster if within 'maxdist' distance of centroid.
clusters{id} = [ clusters{id} , pt1 ] % add 'pt1' to the same cluster, in which 'pt2' is.
pointsInClusters = [ pointsInClusters , [ id ; pt1 ] ] % add 'pt1' to list of 'points in clusters'.
end
%=======================================================
elseif ~ismember(pt2,pointsInClusters(2,:))
id = pointsInClusters(1,pointsInClusters(2,:)==pt1) % find the number of the previous cluster where the point 'pt1' already exists.
switch ['distance to ',lower(method)]
case 'distance to centroid'
cdist = sqrt( sum((mean(A(clusters{id},:),1) -A(pt2,:)).^2) ) % distance between 'pt2' and cluster centroid.
case 'distance to geometric median'
cdist = sqrt( sum((median(A(clusters{id},:),1) - A(pt2,:)).^2) ) % distance between 'pt2' and cluster median point.
otherwise
cdist = []
end
if isempty(cdist) || cdist > mindist % add 'pt2' to cluster if within 'maxdist' distance of centroid or median point.
clusters{id} = [ clusters{id} , pt2 ] % add 'pt2' to the same cluster, in which 'pt1' is.
pointsInClusters = [ pointsInClusters , [ id ; pt2 ] ] % add 'pt2' to list of 'points in clusters'.
end
end
end
clusters = clusters(cellfun(@(clusters) numel(clusters) >= minClusterSize, clusters)) % exclude clusters that are smaller than 'minClusterSize'.
clearvars pointsInClusters % variable is not needed (and not up-to-date) past this point
% Make list of points that are not in a cluster
pointsNotInClusters = (1:kmax)
pointsNotInClusters = pointsNotInClusters(~ismember(pointsNotInClusters,cell2mat(clusters'))) % select points that are not already in a cluster
if minClusterSize < 2 % convert single points into 'one-point' clusters
clusters = [ clusters ; num2cell(pointsNotInClusters)' ] % create new clusters.
pointsNotInClusters= [] % all points are not in a cluster
end
% Print summary values
clusterCount = numel(clusters); % Total number of clusters
sizeOfClusters = cellfun(@(clusters) numel(clusters), clusters);
clusterMinSize = min(sizeOfClusters);
clusterMaxSize = max(sizeOfClusters);
clusterMeanSize = mean(sizeOfClusters);
clusterMedianSize = median(sizeOfClusters);
singlepointCount = numel(pointsNotInClusters);
fprintf(1,'Number of clusters: %lu\n',clusterCount); % display the number of clusters
fprintf(1,'Size of smallest cluster: %lu\n',clusterMinSize); % display the number of clusters
fprintf(1,'Size of largest cluster: %lu\n',clusterMaxSize); % display the number of clusters
% fprintf(1,'Mean cluster size: %f\n',clusterMeanSize); % display the number of clusters
% fprintf(1,'Median cluster size: %lu\n',clusterMedianSize); % display the number of clusters
fprintf(1,'Number of points that are not part of any cluster: %lu\n',singlepointCount); % display the number of single points
clustersXY = cell(clusterCount,1);
for k=1:clusterCount
clustersPPP{k,1} = PPP(clusters{k,1},:);
end
% Compute the centroid (geometrical mean) of every cluster
clustersCentroids = NaN(clusterCount,2);
for k=1:clusterCount
clustersCentroids(k,:) = mean(clustersPPP{k,1},1);
end
% Compute the geometric median of every cluster
clustersGeoMedians = NaN(clusterCount,2);
for k=1:clusterCount
clustersGeoMedians(k,:) = median(clustersPPP{k,1},1);
end
% Plot the clusters
cc=hsv(clusterCount); % create different colour codes for every cluster
cc = cc(randperm(clusterCount),:); % randomise the colour codes so that neighbouring clusters don't have too similarly looking colours
h1 = figure('Name','Clusters');
hold on;
plot(xunit, yunit)
hold on;
plot(ysd,xsd,'r^')
scatter(PPP(:,1),PPP(:,2),20,'filled','o','CData',[.8,.8,.8]); % plot the original points in light grey
for k=1:clusterCount
plot(clustersPPP{k,1}(:,1),clustersPPP{k,1}(:,2),'o','Color',cc(k,:),'MarkerFaceColor',cc(k,:));
end
% Write Centroids to file
outputfile = 'cluster_centroids.txt';
% write header line
fidout = fopen(outputfile,'w');
fprintf(fidout,'%s\t%s\n','X','Y');
fclose(fidout);
% write data
dlmwrite(outputfile,clustersCentroids,'-append','delimiter','\t','precision','%015.10f');
% Write geometric medians to file
outputfile = 'cluster_geometric-medians.txt';
% write header line
fidout = fopen(outputfile,'w');
fprintf(fidout,'%s\t%s\n','X','Y');
fclose(fidout);
% write data
dlmwrite(outputfile,clustersGeoMedians,'-append','delimiter','\t','precision','%015.10f');
The above executes and gives me the result.
But the users are not grouped based on the maximum distances.
Could anyone please help me on this.
0 Comments
Answers (0)
See Also
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!