function agrupa, data_in, kclusters, ini=ini ;does clustering of data_in ;A. Sainz Dalda and L. Kleint ;data_in: arr[nprofiles,nwavelengths] ;kclusters: number of clusters wanted ;ini=initial guess ;mod sep 28, 2017 to return matrix with numbers cluster association in gfin dim=double(size(data_in)) kclusters=double(kclusters) print,'clusters: ',kclusters c_ini=intarr(kclusters)-1 cond=0 ctr=0d while cond eq 0 do begin ran=abs(floor(randomu(s)*dim[1])) ;index of random profile within range [0,#profiles] diff=(where(c_ini eq ran))(0) ;find if randomly chosen profile was chosen before if diff eq -1 then begin ;-1 -> new random profile c_ini[ctr]=ran ;assign index of random profile to c_ini if ctr eq kclusters-1 then cond=1 ctr++ endif endwhile kmean=float(data_in[c_ini,*]) ;kmean contains randomly selected starting profiles if keyword_set(ini) then begin ini=reform(ini) ;LK added to avoid crash if single profile in array dim_ini=size(ini) if dim_ini[0] eq 1 then begin kmean[0,*]=ini ;single profile initial guess c_ini[0] = -1 endif if dim_ini[0] eq 2 then begin kmean[0:dim_ini[1]-1,*]=ini ;multiple profiles initial guess c_ini[0:dim_ini[1]-1] = -1 endif endif d = fltarr(dim[1], kclusters) ;array (#profiles,#clusters), least sq values daux = fltarr(dim[1], kclusters) ;g = fltarr(dim[1], kclusters) ;old g = fltarr(dim[1]) ;array (#profiles), contains number of cluster gfin = fltarr(dim[1], kclusters)+1. h=0d while (where(g ne gfin))[0] ne -1 do begin gfin=g ; g=fltarr(dim[1], kclusters) ;old g=fltarr(dim[1]) ; for k=0d, kclusters-1d do begin ;for each cluster (randomly chosen profile) ; for i=0d, dim[1]-1d do begin ;for each profile ; d[i,k]=sqrt(total((data_in[i,*]-kmean[k,*])^2.)) ;"least squares" ; endfor ; endfor ;vectorize for k=0d, kclusters-1d do begin ;for each cluster (randomly chosen profile) tmp = (data_in - transpose(rebin(reform(kmean[k,*]),dim[2],dim[1])))^2. d[0,k] = sqrt(total(tmp,2)) ;use 0 for faster execution (will assign full vector) endfor for i=0d, dim[1]-1d do begin w=where(d[i,*] eq min(d[i,*])) ;look for minimum least square value ; g[i,w[0]]=1. ;g matrix has 1 if profile belongs to cluster g[i]=w ;g matrix has number of cluster endfor for k=0, kclusters-1 do begin ; w=where(g[*,k] eq 1., cuan) w=where(g eq k, cuan) ; if w[0] ne -1 then kmean[k,*]=total(float(data_in[w,*]),1)/float(cuan) ;save average profile of cluster if w[0] ne -1 then kmean[k,*]=avg(float(data_in[w,*]),0) ;save avg endfor h++ print,h endwhile gfin=g return, {gfin:gfin, kmean:kmean, d:d, c_ini:c_ini} ;gfin: matrix with 0 and 1 for cluster association ;kmean: average profile for each cluster ;d: least squares values ;c_ini: positions of randomly chosen profiles (-1 for initial guesses) end