Could anyone please help me to group the users which are being at a maximum distances to be grouped together.

Question

jaah navi on 6 Nov 2019

0
Link

Direct link to this question

https://se.mathworks.com/matlabcentral/answers/489475-could-anyone-please-help-me-to-group-the-users-which-are-being-at-a-maximum-distances-to-be-grouped

Closed: MATLAB Answer Bot on 20 Aug 2021

code:
clear all;
clc;
   PPP=[1.7718    2.4035;
             1.3717    1.8971;
           -0.0575    1.2565;
           1.5720    0.5264;
           2.0790    0.5114 ]       
 minClusterSize = 1;
 method = 'point';
 D = pdist(PPP) 
 maxdist =10;
 kmax = size(PPP,1);
indexPairs = 0;
index = 1;
for k = 1:kmax
      indexPairs = indexPairs + kmax-k;
      if (k < kmax)
        index = [index; indexPairs+1];
    end
end
pairs = ones(indexPairs, 2); % column 1 = point1, column2 = point2
    for k = 2:length(index )
     pairs(index(k-1):index(k)-1,:) = [ (k:kmax)' , (k-1) * ones(kmax-(k-1),1 ) ];
end
     [Ds,IX] = sort(D,'descend')
     pairs_sorted = pairs(IX,:)
      clusters = {}; % list of clusters (one cell per cluster)
pointsInClusters = []; % keep track of which points are already in a cluster (row 1 = cluster number, row 2 = points in cluster)
inc = 0 ;% increment to keep track of the cluster number
for k = 1:numel(pairs(:,1))
    k
    pt1 = pairs_sorted(k,1)
    pt2 = pairs_sorted(k,2)
    d = Ds(k) % distance between pt1 and pt2
    maxdist
    if d > maxdist
        if isempty(pointsInClusters) || ...
                ( ~ismember(pt1,pointsInClusters(2,:)) && ...
                ~ismember(pt2,pointsInClusters(2,:)))
                     
                       isempty(pointsInClusters) || ...
                ( ~ismember(pt1,pointsInClusters(2,:)) && ...
                ~ismember(pt2,pointsInClusters(2,:)))
            inc = inc + 1 % increment + 1 cluster
            
            currentCluster = [pt1,pt2]
            clusters = [ clusters ; {currentCluster} ] % create new cluster.
            pointsInClusters = [ pointsInClusters , ...
                [ inc * ones(1,length(currentCluster)) ; currentCluster ] ]
          %==========================================  
        elseif ~ismember(pt1,pointsInClusters(2,:))
            ~ismember(pt1,pointsInClusters(2,:))
            id = pointsInClusters(1,pointsInClusters(2,:)==pt2); % find the number of the previous cluster where the point 'pt2' already exists.
            switch ['distance to ',lower(method)]
                case 'distance to centroid'
                    cdist = sqrt( sum((mean(A(clusters{id},:),1) - A(pt1,:)).^2) ) % distance between 'pt1' and cluster centroid.
                case 'distance to geometric median'
                    cdist = sqrt( sum((median(A(clusters{id},:),1) - A(pt1,:)).^2) ) % distance between 'pt1' and cluster median point.
                otherwise
                    cdist = []
            end
            
            if isempty(cdist) || cdist > mindist % add 'pt1' to cluster if within 'maxdist' distance of centroid.
                clusters{id} = [ clusters{id} , pt1 ] % add 'pt1' to the same cluster, in which 'pt2' is.
                pointsInClusters = [ pointsInClusters , [ id ; pt1 ] ] % add 'pt1' to list of 'points in clusters'.
            end
            %=======================================================
        elseif ~ismember(pt2,pointsInClusters(2,:))
            id = pointsInClusters(1,pointsInClusters(2,:)==pt1) % find the number of the previous cluster where the point 'pt1' already exists.
            switch ['distance to ',lower(method)]
                case 'distance to centroid'
                    cdist = sqrt( sum((mean(A(clusters{id},:),1) -A(pt2,:)).^2) ) % distance between 'pt2' and cluster centroid.
                case 'distance to geometric median'
                    cdist = sqrt( sum((median(A(clusters{id},:),1) - A(pt2,:)).^2) ) % distance between 'pt2' and cluster median point.
                otherwise
                    cdist = []
            end
            
            if isempty(cdist) || cdist > mindist % add 'pt2' to cluster if within 'maxdist' distance of centroid or median point.
                clusters{id} = [ clusters{id} , pt2 ] % add 'pt2' to the same cluster, in which 'pt1' is.
                pointsInClusters = [ pointsInClusters , [ id ; pt2 ] ] % add 'pt2' to list of 'points in clusters'.
            end
            
    end 
end 
clusters = clusters(cellfun(@(clusters) numel(clusters) >= minClusterSize, clusters)) % exclude clusters that are smaller than 'minClusterSize'.
clearvars pointsInClusters % variable is not needed (and not up-to-date) past this point
% Make list of points that are not in a cluster
pointsNotInClusters = (1:kmax)
pointsNotInClusters = pointsNotInClusters(~ismember(pointsNotInClusters,cell2mat(clusters'))) % select points that are not already in a cluster
if minClusterSize < 2 % convert single points into 'one-point' clusters
    clusters = [ clusters ; num2cell(pointsNotInClusters)' ] % create new clusters.
    pointsNotInClusters= [] % all points are not in a cluster
end
% Print summary values
clusterCount = numel(clusters); % Total number of clusters
sizeOfClusters = cellfun(@(clusters) numel(clusters), clusters);
clusterMinSize = min(sizeOfClusters);
clusterMaxSize = max(sizeOfClusters);
clusterMeanSize = mean(sizeOfClusters);
clusterMedianSize = median(sizeOfClusters);
singlepointCount = numel(pointsNotInClusters);
fprintf(1,'Number of clusters: %lu\n',clusterCount); % display the number of clusters
fprintf(1,'Size of smallest cluster: %lu\n',clusterMinSize); % display the number of clusters
fprintf(1,'Size of largest cluster: %lu\n',clusterMaxSize); % display the number of clusters
% fprintf(1,'Mean cluster size: %f\n',clusterMeanSize); % display the number of clusters
% fprintf(1,'Median cluster size: %lu\n',clusterMedianSize); % display the number of clusters
fprintf(1,'Number of points that are not part of any cluster: %lu\n',singlepointCount); % display the number of single points
clustersXY = cell(clusterCount,1);
for k=1:clusterCount
    clustersPPP{k,1} = PPP(clusters{k,1},:);
end
% Compute the centroid (geometrical mean) of every cluster
clustersCentroids = NaN(clusterCount,2);
for k=1:clusterCount
    clustersCentroids(k,:) = mean(clustersPPP{k,1},1);
end
% Compute the geometric median of every cluster
clustersGeoMedians = NaN(clusterCount,2);
for k=1:clusterCount
    clustersGeoMedians(k,:) = median(clustersPPP{k,1},1);
end
% Plot the clusters
cc=hsv(clusterCount); % create different colour codes for every cluster
cc = cc(randperm(clusterCount),:); % randomise the colour codes so that neighbouring clusters don't have too similarly looking colours
h1 = figure('Name','Clusters');
 hold on;
 plot(xunit, yunit)
hold on;
plot(ysd,xsd,'r^')
 scatter(PPP(:,1),PPP(:,2),20,'filled','o','CData',[.8,.8,.8]); % plot the original points in light grey
for k=1:clusterCount
    plot(clustersPPP{k,1}(:,1),clustersPPP{k,1}(:,2),'o','Color',cc(k,:),'MarkerFaceColor',cc(k,:));
end
% Write Centroids to file
outputfile = 'cluster_centroids.txt';
% write header line
fidout = fopen(outputfile,'w');
fprintf(fidout,'%s\t%s\n','X','Y');
fclose(fidout);
% write data
dlmwrite(outputfile,clustersCentroids,'-append','delimiter','\t','precision','%015.10f');
% Write geometric medians to file
outputfile = 'cluster_geometric-medians.txt';
% write header line
fidout = fopen(outputfile,'w');
fprintf(fidout,'%s\t%s\n','X','Y');
fclose(fidout);
% write data
dlmwrite(outputfile,clustersGeoMedians,'-append','delimiter','\t','precision','%015.10f');

Could anyone please help me to group the users which are being at a maximum distances to be grouped together.

0 Comments
Show -2 older commentsHide -2 older comments

Answers (0)

See Also

Tags

Community Treasure Hunt

Could anyone please help me to group the users which are being at a maximum distances to be grouped together.

0 Comments Show -2 older commentsHide -2 older comments

Answers (0)

See Also

Tags

Community Treasure Hunt

0 Comments
Show -2 older commentsHide -2 older comments