Info

This question is closed. Reopen it to edit or answer.

Could anyone please help me to group the users which are being at a maximum distances to be grouped together.

2 views (last 30 days)
code:
clear all;
clc;
PPP=[1.7718 2.4035;
1.3717 1.8971;
-0.0575 1.2565;
1.5720 0.5264;
2.0790 0.5114 ]
minClusterSize = 1;
method = 'point';
D = pdist(PPP)
maxdist =10;
kmax = size(PPP,1);
indexPairs = 0;
index = 1;
for k = 1:kmax
indexPairs = indexPairs + kmax-k;
if (k < kmax)
index = [index; indexPairs+1];
end
end
pairs = ones(indexPairs, 2); % column 1 = point1, column2 = point2
for k = 2:length(index )
pairs(index(k-1):index(k)-1,:) = [ (k:kmax)' , (k-1) * ones(kmax-(k-1),1 ) ];
end
[Ds,IX] = sort(D,'descend')
pairs_sorted = pairs(IX,:)
clusters = {}; % list of clusters (one cell per cluster)
pointsInClusters = []; % keep track of which points are already in a cluster (row 1 = cluster number, row 2 = points in cluster)
inc = 0 ;% increment to keep track of the cluster number
for k = 1:numel(pairs(:,1))
k
pt1 = pairs_sorted(k,1)
pt2 = pairs_sorted(k,2)
d = Ds(k) % distance between pt1 and pt2
maxdist
if d > maxdist
if isempty(pointsInClusters) || ...
( ~ismember(pt1,pointsInClusters(2,:)) && ...
~ismember(pt2,pointsInClusters(2,:)))
isempty(pointsInClusters) || ...
( ~ismember(pt1,pointsInClusters(2,:)) && ...
~ismember(pt2,pointsInClusters(2,:)))
inc = inc + 1 % increment + 1 cluster
currentCluster = [pt1,pt2]
clusters = [ clusters ; {currentCluster} ] % create new cluster.
pointsInClusters = [ pointsInClusters , ...
[ inc * ones(1,length(currentCluster)) ; currentCluster ] ]
%==========================================
elseif ~ismember(pt1,pointsInClusters(2,:))
~ismember(pt1,pointsInClusters(2,:))
id = pointsInClusters(1,pointsInClusters(2,:)==pt2); % find the number of the previous cluster where the point 'pt2' already exists.
switch ['distance to ',lower(method)]
case 'distance to centroid'
cdist = sqrt( sum((mean(A(clusters{id},:),1) - A(pt1,:)).^2) ) % distance between 'pt1' and cluster centroid.
case 'distance to geometric median'
cdist = sqrt( sum((median(A(clusters{id},:),1) - A(pt1,:)).^2) ) % distance between 'pt1' and cluster median point.
otherwise
cdist = []
end
if isempty(cdist) || cdist > mindist % add 'pt1' to cluster if within 'maxdist' distance of centroid.
clusters{id} = [ clusters{id} , pt1 ] % add 'pt1' to the same cluster, in which 'pt2' is.
pointsInClusters = [ pointsInClusters , [ id ; pt1 ] ] % add 'pt1' to list of 'points in clusters'.
end
%=======================================================
elseif ~ismember(pt2,pointsInClusters(2,:))
id = pointsInClusters(1,pointsInClusters(2,:)==pt1) % find the number of the previous cluster where the point 'pt1' already exists.
switch ['distance to ',lower(method)]
case 'distance to centroid'
cdist = sqrt( sum((mean(A(clusters{id},:),1) -A(pt2,:)).^2) ) % distance between 'pt2' and cluster centroid.
case 'distance to geometric median'
cdist = sqrt( sum((median(A(clusters{id},:),1) - A(pt2,:)).^2) ) % distance between 'pt2' and cluster median point.
otherwise
cdist = []
end
if isempty(cdist) || cdist > mindist % add 'pt2' to cluster if within 'maxdist' distance of centroid or median point.
clusters{id} = [ clusters{id} , pt2 ] % add 'pt2' to the same cluster, in which 'pt1' is.
pointsInClusters = [ pointsInClusters , [ id ; pt2 ] ] % add 'pt2' to list of 'points in clusters'.
end
end
end
clusters = clusters(cellfun(@(clusters) numel(clusters) >= minClusterSize, clusters)) % exclude clusters that are smaller than 'minClusterSize'.
clearvars pointsInClusters % variable is not needed (and not up-to-date) past this point
% Make list of points that are not in a cluster
pointsNotInClusters = (1:kmax)
pointsNotInClusters = pointsNotInClusters(~ismember(pointsNotInClusters,cell2mat(clusters'))) % select points that are not already in a cluster
if minClusterSize < 2 % convert single points into 'one-point' clusters
clusters = [ clusters ; num2cell(pointsNotInClusters)' ] % create new clusters.
pointsNotInClusters= [] % all points are not in a cluster
end
% Print summary values
clusterCount = numel(clusters); % Total number of clusters
sizeOfClusters = cellfun(@(clusters) numel(clusters), clusters);
clusterMinSize = min(sizeOfClusters);
clusterMaxSize = max(sizeOfClusters);
clusterMeanSize = mean(sizeOfClusters);
clusterMedianSize = median(sizeOfClusters);
singlepointCount = numel(pointsNotInClusters);
fprintf(1,'Number of clusters: %lu\n',clusterCount); % display the number of clusters
fprintf(1,'Size of smallest cluster: %lu\n',clusterMinSize); % display the number of clusters
fprintf(1,'Size of largest cluster: %lu\n',clusterMaxSize); % display the number of clusters
% fprintf(1,'Mean cluster size: %f\n',clusterMeanSize); % display the number of clusters
% fprintf(1,'Median cluster size: %lu\n',clusterMedianSize); % display the number of clusters
fprintf(1,'Number of points that are not part of any cluster: %lu\n',singlepointCount); % display the number of single points
clustersXY = cell(clusterCount,1);
for k=1:clusterCount
clustersPPP{k,1} = PPP(clusters{k,1},:);
end
% Compute the centroid (geometrical mean) of every cluster
clustersCentroids = NaN(clusterCount,2);
for k=1:clusterCount
clustersCentroids(k,:) = mean(clustersPPP{k,1},1);
end
% Compute the geometric median of every cluster
clustersGeoMedians = NaN(clusterCount,2);
for k=1:clusterCount
clustersGeoMedians(k,:) = median(clustersPPP{k,1},1);
end
% Plot the clusters
cc=hsv(clusterCount); % create different colour codes for every cluster
cc = cc(randperm(clusterCount),:); % randomise the colour codes so that neighbouring clusters don't have too similarly looking colours
h1 = figure('Name','Clusters');
hold on;
plot(xunit, yunit)
hold on;
plot(ysd,xsd,'r^')
scatter(PPP(:,1),PPP(:,2),20,'filled','o','CData',[.8,.8,.8]); % plot the original points in light grey
for k=1:clusterCount
plot(clustersPPP{k,1}(:,1),clustersPPP{k,1}(:,2),'o','Color',cc(k,:),'MarkerFaceColor',cc(k,:));
end
% Write Centroids to file
outputfile = 'cluster_centroids.txt';
% write header line
fidout = fopen(outputfile,'w');
fprintf(fidout,'%s\t%s\n','X','Y');
fclose(fidout);
% write data
dlmwrite(outputfile,clustersCentroids,'-append','delimiter','\t','precision','%015.10f');
% Write geometric medians to file
outputfile = 'cluster_geometric-medians.txt';
% write header line
fidout = fopen(outputfile,'w');
fprintf(fidout,'%s\t%s\n','X','Y');
fclose(fidout);
% write data
dlmwrite(outputfile,clustersGeoMedians,'-append','delimiter','\t','precision','%015.10f');
The above executes and gives me the result.
But the users are not grouped based on the maximum distances.
Could anyone please help me on this.

Answers (0)

Tags

Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!