KB: Using DBSCAN in R with precomputed distance

File “points.txt” contains the X, Y coordinates of the points. We want to customize the distance calculation and feed into DBSCAN as a distance object.

Data file – points.txt

x    y
5    8
6    7
6    5
2    4
3    4
5    4
7    4
9    4
3    3
8    2
7    5

R Script

library(dbscan)

data=read.table("d:/temp/points.txt", sep="\t", header=TRUE)
distance=matrix(,nrow=nrow(data),ncol=nrow(data))

for(i in 1:nrow(data)){
    for(j in i:nrow(data)){
        dx=abs(data$x[i]-data$x[j])^0.5
        dy=abs(data$y[i]-data$y[j])^0.5
        distance[i,j]=(dx+dy)^2
        distance[j,i]=(dx+dy)^2
    }
}

result=dbscan(as.dist(distance), eps=4, minPts=3)
result$cluster

 

Advertisements

KB: SAS cheat sheet

Histogram

proc univariate data=passenger;
 histogram N;
 run;

Mean, Frequency

proc means data=passenger;
 var N;
 run;

proc freq data=t1_santa;
weight n;
tables believe*age / chisq;
run;

ROC

proc logistic data=challenger_data descending plots=(EFFECT ROC(id=prob));
   model failure = temp;
run;

Reading tab delimited text file

data t2_crab;
infile "c:\temp\crab.txt" dsd dlm='09'x;
input id t wt w sa;
run;

Reading tab delimited text file, skipping the header row

data t2_crab;
infile "c:\temp\crab.txt" dsd dlm='09'x firstobs=2;
input id t wt w sa;
run;

Reading tab delimited text file, with string longer than 8 chars

data epl;
infile "C:\temp\epl.txt" dsd dlm='09'x ;
input season_team:$40. season:$9. stage team:$30. opp:$30. team_season_pos opp_season_pos team_goal opp_goal net_pos net_goal home_away:$1. result:$1. form:$3.;
run;

KB: R script sample

coupon <- c("A","B","C","D")

generate_set<-function(coupon_set,i,n){
  if(i==n){
    cat(coupon_set)
    cat("\n")
  }else{
    for(c in coupon){
      generate_set(paste(coupon_set,c,sep=""),i+1,n)
    }
  }
}

sink("d:/temp/q6_10.txt")
generate_set("",0,10)
sink()

q6<-read.table(file="d:/temp/q6_10.txt")
q6$win<- grepl("A",q6$V1) & grepl("B",q6$V1) & grepl("C",q6$V1) & grepl("D",q6$V1)
summary(q6$win)
sum(q6$win)/nrow(q6)

KB: Simple Line Chart in R

x=seq(-10,10,0.1)
y2=x^2
y3=x^3

plot(
  x=x,
  y=y2, type="l", ylim=c(-100,100), col="red", ylab="f(x)"
)
lines(x,y3, type="l", ylim=c(-100,100), col="blue")
legend("bottomright",c("f(x)=x^2","f(x)=x^3"), lty=c(1,1), col=c("red","blue"))