lunedì 10 agosto 2015

Oracle:Linear regression through a SQL Query



I want to have a linear approximation directly in an SQL query.

To make it easy: having a generic function f, I want to approximate it with the function:


Being sure that I found the A and B which minimize the global error between f and f' (where f' is the linear approximation of f). 

Let's generate some data: 
 create table zzz_some_data as   
  SELECT    
    'X' groupid,   
    level x,    
    exp(level/10) + sin(level/10 *3.14) + dbms_random.value(0,1) y   
   FROM DUAL   
   CONNECT BY LEVEL <= 20   
 union all   
  SELECT    
    'Y' groupid,   
    level x,    
    exp(level/10) + Cos(level/10 *3.14) + dbms_random.value(0,1) y  
   FROM DUAL   
   CONNECT BY LEVEL <= 20;  

As you can see there is a random factor. At the bottom of the tutorial, you can find the exact data generated. 

Plotting these numbers the functions look like this: 



Running the following query, I can get the value of A and B for the two functions:

 WITH mean_estimates AS  
 (  SELECT GroupID  
      ,AVG(x) AS xmean  
      ,AVG(y) AS ymean  
      ,min(y) as first_seq  
      ,max(y) as last_seq  
   FROM zzz_some_data  
   GROUP BY GroupID  
 ),  
 stdev_estimates AS  
 (  SELECT pd.GroupID  
      ,CASE     
        WHEN SUM(POWER(x - xmean, 2)) = 0 THEN 1   
        when COUNT(*) = 1 then 10000000  
        ELSE SQRT(SUM(POWER(x - xmean, 2)) / (COUNT(*) - 1))   
       END AS xstdev  
      ,CASE  
        when count(*) = 1 then 10000000   
        else SQRT(SUM(POWER(y - ymean, 2)) / (COUNT(*) - 1))     
       end AS ystdev  
   FROM zzz_some_data pd  
   INNER JOIN mean_estimates pm ON pm.GroupID = pd.GroupID  
   GROUP BY pd.GroupID, pm.xmean, pm.ymean  
 ),  
 standardized_data AS          -- increases numerical stability  
 (  SELECT pd.GroupID  
      ,(x - xmean) / xstdev                  AS xstd  
      ,CASE ystdev WHEN 0 THEN 0 ELSE (y - ymean) / ystdev END AS ystd  
   FROM zzz_some_data pd  
   INNER JOIN stdev_estimates ps ON ps.GroupID = pd.GroupID  
   INNER JOIN mean_estimates pm ON pm.GroupID = pd.GroupID  
 ),  
 standardized_beta_estimates AS  
 (  SELECT GroupID  
      ,CASE WHEN SUM(xstd * xstd) = 0 THEN 0  
         ELSE SUM(xstd * ystd) / (COUNT(*) - 1) END     AS betastd  
   FROM standardized_data pd  
   GROUP BY GroupID  
 ),   
 linear_model as   
 (  
 SELECT   
    pb.GroupID,     
    ymean - xmean * betastd * ystdev / xstdev AS BETA,   
    betastd * ystdev / xstdev         AS ALPHA,   
    first_seq,   
    last_seq  
 FROM standardized_beta_estimates pb  
 INNER JOIN stdev_estimates ps ON ps.GroupID = pb.GroupID  
 INNER JOIN mean_estimates pm ON pm.GroupID = pb.GroupID  
 )  
 select   
  GroupID,   
  alpha,   
  Beta  
 from linear_model  
 order by GroupID desc;  


This query generate the following output: 

 GROUPID   ALPHA    BETA  
 ------- ---------- ----------  
 Y    ,341603277 ,27926809   
 X    ,221725267 1,66190364   



Let's see now the results: 


Here the plot of the approximation of X:


Here the plot of the approximation of Y:








 CREATE TABLE "ZZZ_SOME_DATA"  
  (  
   "GROUPID" CHAR(1 BYTE),  
   "X"    NUMBER,  
   "Y"    NUMBER  
  );  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','1','1,61932317662496064883408392432113402523');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','2','2,77694913213341470441940320202765814948');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','3','2,91090102987016066482064043800309804071');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','4','3,13678897398740391101470575963142792902');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','5','3,31605237538417176477155850638842190865');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','6','3,13667075089211953539069065645455345878');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','7','3,47488861159608290867160313703997232386');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','8','3,42251149078741711848240969510483689103');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','9','3,70603465587716541507204559098228342981');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','10','3,49314419776404379007856528151793653642');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','11','3,24172315449369167895615169724235970383');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','12','2,96881260394832309096782780931487864967');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','13','3,07513520161764733445750922412894102533');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','14','3,81479688098856311171457385615269516843');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','15','4,32475565056326473828436305142365381565');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','16','4,85233459222270714560038164481370508461');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','17','5,61989845355259006300115338181868398089');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','18','6,31700004318003808435939347557223237057');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','19','6,76689263318576069778969371993663187878');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('X','20','7,82576523994149918219695417536412769495');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','1','2,53714598427679136859871205905510695503');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','2','2,3110817502573158903868891890087776345');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','3','1,94494243937675278618005281210530984129');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','4','2,25982980116506374957923410951751088925');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','5','1,86603427812480463944739874768807857366');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','6','2,05505938632236899572636979590661767446');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','7','2,3258999982985244735180126549118494278');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','8','1,83188925742276377791489099175111599133');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','9','2,41166969667358720655946060775919042819');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','10','1,8104604598253185583746180090281519982');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','11','2,87738150798966416515834453685956594055');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','12','3,43069799388754748665140656518089681805');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','13','3,54673238603787598200769925226968142282');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','14','4,43614682007291136740320048898865794443');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','15','4,87577441430844397983546298110499857896');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','16','6,00804958270558010522626027655493241713');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','17','6,3755271699704708032463599263767041073');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','18','7,12510967938699587673395044064905861434');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','19','8,17916759972655812255130821868093121265');  
 Insert into ZZZ_SOME_DATA (GROUPID,X,Y) values ('Y','20','9,11344975307640059368329002839752407251');  
 COMMIT;  

venerdì 7 agosto 2015

Tutorial: neural network in R by examples (using the package neuralnet)

Tutorial: Create a neural network in R

Let's start with the basics. 
If you want to start learning how does it work a NN in R, start searching for some packages supporting you. 

Googling a bit you will wee that there are two main packages: 
  • Neuralnet: specific for neural networks
  • Caret: generic machine learning package, containing a lot of machine learning algorithm, supporting very well the neural networks.
In this post I will show some investigation I made on the package "Neuralnet".
In this next post I show the investigation I made on the package "Caret".

Basic AND and OR functions


If you go on the help of R, typing: 
 ?neuralnet  

You can get the complete documentation.
Going quickly to the example, you find the following code:

 AND <- c(rep(0,7),1)  
 OR <- c(0,rep(1,7))  
 binary.data <- data.frame(expand.grid(c(0,1), c(0,1), c(0,1)), AND, OR)  
 net <- neuralnet(AND+OR~Var1+Var2+Var3, binary.data, hidden=0,   
             rep=10, err.fct="ce", linear.output=FALSE)  


Let's have a look inside to the returned object "net":
 > typeof(net)  
 [1] "list"  

 > summary(net)  
                   Length   Class      Mode    
 call                  7    -none-     call    
 response             16    -none-     numeric   
 covariate            24    -none-     numeric   
 model.list            2    -none-     list    
 err.fct               1    -none-     function  
 act.fct               1    -none-     function  
 linear.output         1    -none-     logical   
 data                  5    data.frame list    
 net.result           10    -none-     list    
 weights              10    -none-     list    
 startweights         10    -none-     list    
 generalized.weights  10    -none-     list    
 result.matrix       110    -none-     numeric   
 

> head(net)  
 $call  
 neuralnet(formula = AND + OR ~ Var1 + Var2 + Var3, data = binary.data,   
   hidden = 0, rep = 10, err.fct = "ce", linear.output = FALSE)  
 $response  
  AND OR  
 1  0 0  
 2  0 1  
 3  0 1  
 4  0 1  
 5  0 1  
 6  0 1  
 7  0 1  
 8  1 1  
 $covariate  
    [,1] [,2] [,3]  
 [1,]  0  0  0  
 [2,]  1  0  0  
 [3,]  0  1  0  
 [4,]  1  1  0  
 [5,]  0  0  1  
 [6,]  1  0  1  
 [7,]  0  1  1  
 [8,]  1  1  1  
 $model.list  
 $model.list$response  
 [1] "AND" "OR"   
 $model.list$variables  
 [1] "Var1" "Var2" "Var3"  
 $err.fct  
 function (x, y)   
 {  
   -(y * log(x) + (1 - y) * log(1 - x))  
 }  
 <environment: 0x00000000174034c8>  
 attr(,"type")  
 [1] "ce"  
 $act.fct  
 function (x)   
 {  
   1/(1 + exp(-x))  
 }  
 <environment: 0x00000000174034c8>  
 attr(,"type")  
 [1] "logistic"  

> print(net)
10 repetitions were calculated.

           Error Reached Threshold Steps
7  0.07270325501    0.007209205002   237
2  0.08361412743    0.008216337601   210
1  0.08425544114    0.008840366736   228
4  0.08667354432    0.009692218237   226
6  0.08736308608    0.009046804494   236
10 0.08767067498    0.009009176337   240
3  0.08817255652    0.009546552076   214
8  0.09165608530    0.009676470949   221
5  0.09170859687    0.008872336865   213
9  0.09517374836    0.009993935991   223




How can I use the NN once I have trained it?

 > test <- data.frame(expand.grid(c(0,1), c(0,1), c(0,1)))  
 > predicted <- compute(net, test) #Run them through the neural network  
 > print(predicted$net.result)  
            [,1]       [,2]  
 [1,] 0.000000005510800906 0.00003437586348  
 [2,] 0.000008322603907184 0.99999887725263  
 [3,] 0.000010232856460365 0.99999418995760  
 [4,] 0.015219103082846375 1.00000000000000  
 [5,] 0.000009115152595497 0.99998044588564  
 [6,] 0.013579325087889940 1.00000000000000  
 [7,] 0.016644285280624567 1.00000000000000  
 [8,] 0.962352882489281747 1.00000000000000  
 >   



What is very cool about this package is the graphical representation:

 > plot(net)

generate this plot:


Square root function

There is a great tutorial in this page: 


Let's have a look at the code: 

 install.packages('neuralnet')  
 library("neuralnet")  

 #Going to create a neural network to perform sqare rooting  
 #Type ?neuralnet for more information on the neuralnet library  
 #Generate 50 random numbers uniformly distributed between 0 and 100  
 #And store them as a dataframe  
 traininginput <- as.data.frame(runif(50, min=0, max=100))  
 trainingoutput <- sqrt(traininginput)  
 #Column bind the data into one variable  
 trainingdata <- cbind(traininginput,trainingoutput)  
 colnames(trainingdata) <- c("Input","Output")  
 #Train the neural network  
 #Going to have 10 hidden layers  
 #Threshold is a numeric value specifying the threshold for the partial  
 #derivatives of the error function as stopping criteria.  
 net.sqrt <- neuralnet(Output~Input,trainingdata, hidden=10, threshold=0.01)  
 #Test the neural network on some training data  
 testdata <- as.data.frame((1:10)^2) #Generate some squared numbers  
 net.results <- compute(net.sqrt, testdata) #Run them through the neural network  


Let's have a look at the reuslt:
 > cleanoutput <- cbind(testdata,sqrt(testdata),  
 +           as.data.frame(net.results$net.result))  
 > colnames(cleanoutput) <- c("Input","Expected Output","Neural Net Output")  
 > print(cleanoutput)  
   Input Expected Output Neural Net Output  
 1   1                 1       1.292537723  
 2   4                 2       2.004110690  
 3   9                 3       3.004860581  
 4   16                4       4.001313250  
 5   25                5       4.996608428  
 6   36                6       6.003918278  
 7   49                7       6.997395208  
 8   64                8       7.995504751  
 9   81                9       9.009677415  
 10  100              10       9.959406366  
 >   

Having a look at the graphical representation:


Square root function with more nodes in the hidden layer

And what if I wanted to increase the number of nodes in the hidden layer?
Please consider that this is only to see how to tune the network. Use an hidden layer for the Sqrt function is totally useless, and I will show it later with the results.

Anyway, to add more layers, it is simply needed to use the value:    "hidden = c(x, y, z)"
where:
- x: number of perceptrons in the first hidden layer
- y: number of perceptrons in the second hidden layer
- z: number of perceptrons in the third hidden layer

 traininginput <- as.data.frame(runif(50, min=0, max=100))  
 trainingoutput <- sqrt(traininginput)  
 trainingdata <- cbind(traininginput,trainingoutput)  
 colnames(trainingdata) <- c("Input","Output")  
 net.sqrt <- neuralnet(Output~Input,trainingdata, hidden=c(10, 10, 10), threshold=0.01)  

Let's plot now the network:

Let's check now the results (you will see, adding these hidden layer did not improve the results at all):

 > cleanoutput <- cbind(testdata,sqrt(testdata),  
 +           as.data.frame(net.results$net.result))  
 > colnames(cleanoutput) <- c("Input","Expected Output","Neural Net Output")  
 > print(cleanoutput)  
   Input Expected Output Neural Net Output  
 1   1                1    0.591036793  
 2   4                2    2.070495859  
 3   9                3    3.000115410  
 4   16               4    3.999594071  
 5   25               5    4.997643979  
 6   36               6    6.034835331  
 7   49               7    6.997600383  
 8   64               8    7.997306744  
 9   81               9    8.999801209  
 10  100             10    9.982023499  
 >   

giovedì 18 giugno 2015

Oracle: SQL for list of numbers or list of dates



For numbers:

I want to have a query which returns the list of the dates from today, for the next 10 years:

   SELECT   
     level seq,   
     _starting_ + _delta_ * level as ddate,   
   FROM DUAL  
   CONNECT BY LEVEL <= _occurrences_  


The "parameters" needed by this query are:
  • Starting: the value where to start
  • Delta: the number of days between each occurrence of the list
  • Occurrences: How many occurrences in the list (how long is the list)

For example, given the following query:

SELECT
   level seq,
   100 + 5 * level as value
FROM DUAL
CONNECT BY LEVEL <= 10

I will have this output

     SEQ   VALUE  
 ---------- ----------  
      1    105   
      2    110   
      3    115   
      4    120   
      5    125   
      6    130   
      7    135   
      8    140   
      9    145   
     10    150   
  10 Zeilen gewählt  



For dates:

I want to have a query which returns the list of the dates from today, for the next 10 years:

   SELECT   
     level seq,   
     to_date(_starting_date_, 'yyyymmdd') + _delta_ * level as ddate,   
     to_char(to_date(_starting_date_, 'yyyymmdd') + _delta_ * level, 'yyyy-ww') as week,   
     to_char(to_date(_starting_date_, 'yyyymmdd') + _delta_ * level, 'yyyy-mm') as month  
   FROM DUAL  
   CONNECT BY LEVEL <= _occurrences_  


The "parameters" needed by this query are:
  • Starting_date: the date where to start
  • Delta: the number of days between each occurrence of the list
  • Occurrences: How many occurrences in the list (how long is the list)


For example, given the following query:
   SELECT   
     level seq,   
     to_date('20140101', 'yyyymmdd') + 7 * level as ddate,   
     to_char(to_date('20140101', 'yyyymmdd') + 7 * level, 'yyyy-ww') as week,   
     to_char(to_date('20140101', 'yyyymmdd') + 7 * level, 'yyyy-mm') as month  
   FROM DUAL  
   CONNECT BY LEVEL <= 10  


I will have this output

     SEQ DDATE  WEEK  MONTH   
 ---------- -------- ------- -------  
      1 08.01.14 2014-02 2014-01   
      2 15.01.14 2014-03 2014-01   
      3 22.01.14 2014-04 2014-01   
      4 29.01.14 2014-05 2014-01   
      5 05.02.14 2014-06 2014-02   
      6 12.02.14 2014-07 2014-02   
      7 19.02.14 2014-08 2014-02   
      8 26.02.14 2014-09 2014-02   
      9 05.03.14 2014-10 2014-03   
     10 12.03.14 2014-11 2014-03   
  10 Zeilen gewählt  






martedì 19 maggio 2015

Tutorial: Canvas in HTML5




Let's play a bit with the graphic & Canvas...
You can find the code here:
http://jsbin.com/umaDEvu/5/edit?html,output

 <!DOCTYPE HTML>  
 <html>  
   <head>  
     <style>  
       body {  
         margin: 0px;  
         padding: 0px;  
       }  
       #myCanvas {  
         border: 1px solid #9C9898;  
       }  
     </style>  
     <script>     
       var clicked = 0;  
       var loopMouth = 0;  
       var lastMouseX = 300;  
       var lastMouseY = 300;  
       var colorSetIndex = 2;  
       var colorSet = [["limegreen", "forestgreen", "darkgreen", "olive"],   
               ["mediumblue", "blue", "darkblue", "indigo"],   
               ["darkorange", "peru", "sienna", "darkgoldenrod"]];  
      function setColorSetIndex(colorIndex){  
       colorSetIndex = colorIndex;  
      }  
       function getMousePos(canvas, evt) {  
         // get canvas position  
         var obj = canvas;  
         var top = 0;  
         var left = 0;  
         while (obj && obj.tagName != 'BODY') {  
           top += obj.offsetTop;  
           left += obj.offsetLeft;  
           obj = obj.offsetParent;  
         }  
         lastMouseX = evt.clientX - left + window.pageXOffset;  
         lastMouseY = evt.clientY - top + window.pageYOffset;  
       }  
      function drawFace(context) {  
         //left ear  
         context.beginPath();  
         var grd = context.createRadialGradient(125, 130, 10, 175, 130, 100);  
         grd.addColorStop(0.01, colorSet[colorSetIndex][0]);  
         grd.addColorStop(0.5, colorSet[colorSetIndex][1]);  
         grd.addColorStop(0.7, colorSet[colorSetIndex][2]);  
         grd.addColorStop(0.95, colorSet[colorSetIndex][3]);  
         context.arc(175, 150, 75, 0, 2*Math.PI, false);  
         context.fillStyle = grd;  
         context.fill();  
         // right ear  
         context.beginPath();  
         var grd = context.createRadialGradient(450, 95, 10, 425, 130, 100);  
         grd.addColorStop(0.01, colorSet[colorSetIndex][0]);  
         grd.addColorStop(0.5, colorSet[colorSetIndex][1]);  
         grd.addColorStop(0.7, colorSet[colorSetIndex][2]);  
         grd.addColorStop(0.95, colorSet[colorSetIndex][3]);  
         context.arc(425, 125, 75, 0, 2*Math.PI, false);  
         context.fillStyle = grd;  
         context.fill();  
         //face  
         context.beginPath();  
         var grd = context.createRadialGradient(300, 140, 50, 300, 170, 175);  
         grd.addColorStop(0.01, colorSet[colorSetIndex][0]);  
         grd.addColorStop(0.5, colorSet[colorSetIndex][1]);  
         grd.addColorStop(0.7, colorSet[colorSetIndex][2]);  
         grd.addColorStop(0.95, colorSet[colorSetIndex][3]);  
         context.arc(300, 200, 150, 0, 2*Math.PI, false);  
         context.fillStyle = grd;  
         context.fill();  
      }  
      function drawEyes(context, mx, my) {  
         var cx = [300, 220, 200, 250, 380, 350, 400];  
         var cy = [100, 130, 170, 160, 130, 150, 185];  
         var ray = [ 30, 20, 10, 10, 20, 10, 10];  
         for (var i = 0; i < cx.length; i++) {  
          //compute the multiplier for the mouse position  
          var mmx = ((mx-300)/300*ray[i]);  
          var mmy = ((my-200)/200*ray[i]);  
          context.beginPath();  
          var grd = context.createRadialGradient(cx[i]+mmx, cy[i]+mmy, 1, cx[i]+mmx, cy[i]+mmy, ray[i]*3);  
          grd.addColorStop(0.01, "black");  
          grd.addColorStop(0.1, "white");  
          grd.addColorStop(0.99, "gray");  
          context.arc(cx[i], cy[i], ray[i], 0, 2*Math.PI, false);  
          context.fillStyle = grd;  
          context.fill();    
         }  
      }  
      function drawMouth(context) {  
         var grd_m = context.createRadialGradient(300, 300, 10, 300, 300, 100);  
         grd_m.addColorStop(0.1, "red");  
         grd_m.addColorStop(0.6, "black");  
       //for closing the mouth  
       var diffLeft;  
       var diffRight;   
       //loopMouth goes from 0-100  
       // from 0-50 it close the mouth  
       //from 50-100 it reopen the mouth  
       if(loopMouth < 50)  
       {  
        tempLoopMouth = loopMouth;  
        diffLeft=((380-230)/50)*(tempLoopMouth+1);  
        diffRight=((330-250)/50)*(tempLoopMouth+1);  
       }  
       else   
       {  
        tempLoopMouth = (100-loopMouth);  
        diffLeft=((380-230)/50)*(tempLoopMouth+1);  
        diffRight=((330-250)/50)*(tempLoopMouth+1);  
       }   
         context.beginPath();  
         context.moveTo(220, 230);  
         context.bezierCurveTo(220, 380 - diffLeft, 380, 330 - diffRight, 380, 250);  
         context.closePath(); // complete custom shape  
         context.fillStyle = grd_m;  
         context.fill();  
      }       
      function drawBackground(context){  
         //set the background  
         var grd = context.createRadialGradient(300, 200, 50, 300, 200, 600);  
         grd.addColorStop(0.01, "white");  
         grd.addColorStop(0.5, "black");  
         context.arc(300, 200, 600, 0, 2*Math.PI, false);  
         context.fillStyle = grd;  
         context.fill();  
      }  
      function drawAll(context){  
           drawBackground(context);  
           drawFace(context);  
           drawEyes(context, lastMouseX, lastMouseY);  
           drawMouth(context);  
      }        
      // requestAnim shim layer by Paul Irish    
      window.requestAnimFrame = (function(){    
      return window.requestAnimationFrame ||    
       window.webkitRequestAnimationFrame ||    
       window.mozRequestAnimationFrame ||    
       window.oRequestAnimationFrame ||    
       window.msRequestAnimationFrame ||    
       function(/* function */ callback, /* DOMElement */ element){    
       window.setTimeout(callback, 1000 / 60);    
       };    
      })();     
      function animationLoop(){  
       var canvas = document.getElementById("myCanvas");  
       var context = canvas.getContext("2d");  
       if(clicked == 1)  
         loopMouth++;  
       if(loopMouth==100)  
       {  
         loopMouth = 0;  
         clicked = 0;  
       }  
       drawAll(context);  
       requestAnimFrame(animationLoop);        
      }  
       window.onload = function(){  
         var canvas = document.getElementById("myCanvas");  
         var context = canvas.getContext("2d");  
         canvas.addEventListener('mousemove', function (evt) {  
           var mousePos = getMousePos(canvas, evt);  
           drawAll(context);  
         }, false);  
         canvas.addEventListener('click', function (evt) {  
           var mousePos = getMousePos(canvas, evt);  
           if(clicked == 0)   
            clicked = 1;   
         }, false);  
         drawAll(context, 300, 200);  
         // Start animation  
         animationLoop();  
       };  
     </script>  
   </head>  
   <body onmousedown="return false;">  
     <canvas id="myCanvas" width="600" height="400">  
     </canvas>  
    <button onclick="setColorSetIndex(0)"> green </button>  
    <button onclick="setColorSetIndex(1)"> blue </button>  
    <button onclick="setColorSetIndex(2)"> orange </button>  
    <div id="help">  
     Animations are:   
     <ul>  
      <li> moving mouse on the canvas = move eyes; </li>  
      <li> click on the canvas = close mouth; </li>  
      <li> click on the buttons = change monster color; </li>  
     </ul>  
    </div>  
     </body>  
 </html>  


As you can see, there is basically no HTML. It is a pure drawing made in Javascript...

venerdì 7 novembre 2014

Improvement philosophy

Next time think twice about what are you doing... are you sure that you cannot stop for a second?



sabato 1 novembre 2014

Sankey Diagram with D3JS




I received a task recently which gave me a lot of fun.
I had to create a diagram like the one above with adapted numbers for my company core business.
The idea was to have such numbers in a Power point. One of the biggest challenge was to dig in the DB in order to find the correct informations (but this is beyond the scope of this post).
One of the funniest task was to build the power point presentation! How can I build a power point reporting the information such the one in the diagram above?

I googled it and I discovered that this diagram is called "Sankey" diagram.
I tried then to find any tool who was supporting the creation of those type of diagrams.
Of course, first choice was google chart:
https://developers.google.com/chart/interactive/docs/gallery/sankey

I found that this lybrary is quite static, it does not give too much freedom. Additionally the graphic is quite poor.

I searched deeper and I fell in love for the library D3js
http://d3js.org/

This library support too many of the non conventional diagrams. No words to mention, just have a look at the examples:
https://github.com/mbostock/d3/wiki/Gallery

I must confess that it is not really straightforward, not as easy as the google chart one.
As well the possibilities are enormous.

In this post I want to have my usual copy and paste snippets.
I found a very interesting tutorial here:
http://bl.ocks.org/d3noob/5028304

I have reported here the result of my task (of course number changed):
http://marcoandolfi.eu5.org/sankey_test/viewer.html

And now the copy and paste part.
You need three things:
- The javascript library
- The json file.
- The HTML file rendering it.

The javascript library Sankey.js, I stored a copy on my website here).
Here it follow an example of possible JSON file. Very easy: links and nodes.

 {  
 "links": [  
 {"source":"1A","target":"2A","value":"10"},  
 {"source":"1A","target":"2B","value":"1" },  
 {"source":"1B","target":"2B","value":"15"},  
 {"source":"1C","target":"2A","value":"20"},  
 {"source":"1C","target":"2B","value":"14"},  
 {"source":"1C","target":"2C","value":"45"},  
 {"source":"1D","target":"2A","value":"25"},  
 {"source":"1D","target":"2C","value":"25"},  
 {"source":"2A","target":"3A","value":"9" },  
 {"source":"2A","target":"3B","value":"15"},  
 {"source":"2A","target":"3C","value":"28"},  
 {"source":"2A","target":"3D","value":"17"},  
 {"source":"2B","target":"3B","value":"15"},  
 {"source":"2B","target":"3C","value":"15"},  
 {"source":"2C","target":"4B","value":"50"},  
 {"source":"3A","target":"4A","value":"10"},  
 {"source":"3B","target":"4B","value":"20"},  
 {"source":"3B","target":"4A","value":"5" },  
 {"source":"3C","target":"4B","value":"40"}  
 ] ,  
 "nodes": [  
 {"name":"1A"},  
 {"name":"1B"},  
 {"name":"1C"},  
 {"name":"1D"},  
 {"name":"2A"},  
 {"name":"2B"},  
 {"name":"2C"},  
 {"name":"3A"},  
 {"name":"3B"},  
 {"name":"3C"},  
 {"name":"3D"},  
 {"name":"4A"},  
 {"name":"4B"}  
 ] }  


Finally the HTML page (it is needed to adapt only the bold part: Json file name or ajax call and the location of your Sankey.js).
 <!DOCTYPE html>  
 <meta charset="utf-8">  
 <title>SANKEY Experiment</title>  
 <style>  
 .node rect {  
  cursor: move;  
  fill-opacity: .9;  
  shape-rendering: crispEdges;  
 }  
 .node text {  
  pointer-events: none;  
  text-shadow: 0 1px 0 #fff;  
 }  
 .link {  
  fill: none;  
  stroke: #000;  
  stroke-opacity: .2;  
 }  
 .link:hover {  
  stroke-opacity: .5;  
 }  
 </style>  
 <body>  
 <p id="chart">  
 <script src="http://d3js.org/d3.v3.js"></script>  
 <script src="sankey.js"></script>  
 <script>  
 var units = "Widgets";  
 var margin = {top: 10, right: 10, bottom: 10, left: 10},  
   width = 1200 - margin.left - margin.right,  
   height = 740 - margin.top - margin.bottom;  
 var formatNumber = d3.format(",.0f"),  // zero decimal places  
   format = function(d) { return formatNumber(d) + " " + units; },  
   color = d3.scale.category20();  
 // append the svg canvas to the page  
 var svg = d3.select("#chart").append("svg")  
   .attr("width", width + margin.left + margin.right)  
   .attr("height", height + margin.top + margin.bottom)  
  .append("g")  
   .attr("transform",   
      "translate(" + margin.left + "," + margin.top + ")");  
 // Set the sankey diagram properties  
 var sankey = d3.sankey()  
   .nodeWidth(36)  
   .nodePadding(10)  
   .size([width, height]);  
 var path = sankey.link();  
 // load the data  
 d3.json("data.json", function(error, graph) {  
   var nodeMap = {};  
   graph.nodes.forEach(function(x) { nodeMap[x.name] = x; });  
   graph.links = graph.links.map(function(x) {  
    return {  
     source: nodeMap[x.source],  
     target: nodeMap[x.target],  
     value: x.value  
    };  
   });  
  sankey  
    .nodes(graph.nodes)  
    .links(graph.links)  
    .layout(32);  
 // add in the links  
  var link = svg.append("g").selectAll(".link")  
    .data(graph.links)  
   .enter().append("path")  
    .attr("class", "link")  
    .attr("d", path)  
    .style("stroke-width", function(d) { return Math.max(1, d.dy); })  
    .sort(function(a, b) { return b.dy - a.dy; });  
 // add the link titles  
  link.append("title")  
     .text(function(d) {  
         return d.source.name + " → " +   
         d.target.name + "\n" + format(d.value); });  
 // add in the nodes  
  var node = svg.append("g").selectAll(".node")  
    .data(graph.nodes)  
   .enter().append("g")  
    .attr("class", "node")  
    .attr("transform", function(d) {   
            return "translate(" + d.x + "," + d.y + ")"; })  
   .call(d3.behavior.drag()  
    .origin(function(d) { return d; })  
    .on("dragstart", function() {   
            this.parentNode.appendChild(this); })  
    .on("drag", dragmove));  
 // add the rectangles for the nodes  
  node.append("rect")  
    .attr("height", function(d) { return d.dy; })  
    .attr("width", sankey.nodeWidth())  
    .style("fill", function(d) {   
            return d.color = color(d.name.replace(/ .*/, "")); })  
    .style("stroke", function(d) {   
            return d3.rgb(d.color).darker(2); })  
   .append("title")  
    .text(function(d) {   
            return d.name + "\n" + format(d.value); });  
 // add in the title for the nodes  
  node.append("text")  
    .attr("x", -6)  
    .attr("y", function(d) { return d.dy / 2; })  
    .attr("dy", ".35em")  
    .attr("text-anchor", "end")  
    .attr("transform", null)  
    .text(function(d) { return d.name; })  
   .filter(function(d) { return d.x < width / 2; })  
    .attr("x", 6 + sankey.nodeWidth())  
    .attr("text-anchor", "start");  
 // the function for moving the nodes  
  function dragmove(d) {  
   d3.select(this).attr("transform",   
     "translate(" + (  
            d.x = Math.max(0, Math.min(width - d.dx, d3.event.x))  
          ) + "," + (  
           d.y = Math.max(0, Math.min(height - d.dy, d3.event.y))  
       ) + ")");  
   sankey.relayout();  
   link.attr("d", path);  
  }  
 });  
 </script>  
 </body>  
 </html>  


Most probably you want to run the first test on your machine. Therefore you need to launch your browser in a "special mode" so that it will be able to read local file (for securityy reason browser deactivate this mode). I like Chrome and therefore what follows is valid only for Chrome, but extremely similar is for the other browser.

First be sure that you have killed every process of Chrome, which does not simply mean to close the browser, but to open the task manager and kill every process.
Then create a batch file like this (or invoke from command line):

 cd "C:\Program Files (x86)\Google\Chrome\Application"  
 chrome.exe --allow-file-access-from-files  



lunedì 29 settembre 2014

Extract DB as XML



I want to have a single query which is extracting the complete structure of the database into a simple XML.


For example having:
 create table my_test   
 (  
  f11 number,   
  f12 number,   
  f13 varchar2(300),   
  CONSTRAINT my_test_pk PRIMARY KEY (f11)  
 );  
 create table my_test_2   
 (  
  f21 number,   
  f22 number,   
  f23 varchar2(300),   
  CONSTRAINT my_test_pk_2 PRIMARY KEY (f21, f22),   
   CONSTRAINT fk_column_2  
   FOREIGN KEY (f22)  
   REFERENCES my_test (f11)  
 );  
 create table my_test_3  
 (  
  f31 number,   
  f32 number,   
  f33 varchar2(300),   
  CONSTRAINT my_test_pk_3 PRIMARY KEY (f31),   
   CONSTRAINT fk_column_3  
   FOREIGN KEY (f32)  
   REFERENCES my_test (f11)  
 );  
 comment on table my_test_2 is 'this is my test_2';  
 comment on table my_test is 'this is my test';  
 comment on column my_test.f11 is 'my_test.f11';  
 comment on column my_test.f12 is 'my_test.f12';  
 comment on column my_test.f13 is 'my_test.f13';  
 comment on column my_test_2.f21 is 'my_test.f21';  
 comment on column my_test_2.f22 is 'my_test.f22';  
 comment on column my_test_2.f23 is 'my_test.f23';   


I want to have as output:
 <?xml version="1.0" encoding="UTF-8"?>  
 <schema name="MARCOA">  
   <table name="MY_TEST" pk_column_name="F11" pk_name="MY_TEST_PK" pk_owner="MARCOA">  
    <comment>this is my test</comment>  
    <column name="F11" num_nulls="" num_distinct="" type="NUMBER(22, 0)" nullable="N">  
      <comment>my_test.f11</comment>  
      <referenced_by rk_ref_column_name="F22" rk_ref_table_name="MY_TEST_2" />  
      <referenced_by rk_ref_column_name="F32" rk_ref_table_name="MY_TEST_3" />  
    </column>  
    <column name="F12" num_nulls="" num_distinct="" type="NUMBER(22, 0)" nullable="Y">  
      <comment>my_test.f12</comment>  
    </column>  
    <column name="F13" num_nulls="" num_distinct="" type="VARCHAR2(300)" nullable="Y">  
      <comment>my_test.f13</comment>  
    </column>  
   </table>  
   <table name="MY_TEST_2">  
    <comment>this is my test_2</comment>  
    <column name="F21" num_nulls="" num_distinct="" type="NUMBER(22, 0)" nullable="N">  
      <comment>my_test.f21</comment>  
    </column>  
    <column name="F22" num_nulls="" num_distinct="" type="NUMBER(22, 0)" nullable="N">  
      <comment>my_test.f22</comment>  
      <referencing fk_ref_column_name="F11" fk_ref_table_name="MY_TEST" />  
    </column>  
    <column name="F23" num_nulls="" num_distinct="" type="VARCHAR2(300)" nullable="Y">  
      <comment>my_test.f23</comment>  
    </column>  
   </table>  
   <table name="MY_TEST_3">  
    <comment />  
    <column name="F31" num_nulls="" num_distinct="" type="NUMBER(22, 0)" nullable="N">  
      <comment />  
    </column>  
    <column name="F32" num_nulls="" num_distinct="" type="NUMBER(22, 0)" nullable="Y">  
      <comment />  
      <referencing fk_ref_column_name="F11" fk_ref_table_name="MY_TEST" />  
    </column>  
    <column name="F33" num_nulls="" num_distinct="" type="VARCHAR2(300)" nullable="Y">  
      <comment />  
    </column>  
   </table>  
 </schema>  




And here is the query to do it:
 with csv_constraint as (  
   SELECT distinct owner, constraint_name, table_name, SUBSTR (SYS_CONNECT_BY_PATH (column_name , ','), 2) csv  
      FROM (SELECT   
            acc.owner, acc.constraint_name, acc.table_name, acc.column_name,   
            acc.position rn,  
            COUNT (*) OVER (partition by cc.constraint_name) cnt  
         FROM   
            all_cons_columns acc left outer join all_constraints cc  
              on (acc.owner = cc.owner and acc.table_name = cc.table_name and cc.constraint_type = 'P')  
         where (cc.owner, cc.table_name) in (select t.owner, t.table_name from all_tables t)  
         /* to be changed */ and cc.owner = 'MARCOA'  
         )  
      WHERE rn = cnt  
   START WITH rn = 1  
   CONNECT BY constraint_name = prior constraint_name and rn = PRIOR rn + 1  
 ),   
 xml_builder as(  
  select 'STARTTAG' as tagtype, '_0000' as postfix, '<?>' as xml_tag from dual union all   
  select 'CLOSETAG' as tagtype, '_9999' as postfix, '</?>' as xml_tag from dual   
 ),   
 schemas as (  
  select   
   lpad(user_id, 4, '0') as hh_s_id,   
   username  
  from all_users   
  where username = user  
 ),   
 tab_cols_cons as (  
   select   
     tt.owner,   
     tt.table_name,   
     cl.owner pk_owner,   
     cl.constraint_name pk_name,   
     cl.csv pk_column_name,   
     lpad(ss.user_id, 4, '0') as hh_s_id,   
     lpad(t_id, 4, '0') as hh_t_id  
   from (  
    select ROW_NUMBER( ) OVER (PARTITION BY owner ORDER BY table_name NULLS LAST) t_id, t.*  
    from all_tables t  
   )tt join all_users ss   
              on (tt.owner = ss.username)  
             left outer join csv_constraint cl  
              on (cl.owner = tt.owner and tt.table_name = cl.table_name)  
   /* to be changed */ where tt.owner = 'MARCOA' and tt.table_name like 'MY_TEST%'  
 ),   
 tab_cols as (  
   select   
     atc.owner,   
     atc.table_name,   
     atc.column_name,   
     atc.nullable,   
     case   
      when atc.data_type = 'NUMBER' then atc.data_type || '(' || atc.data_length || ', ' || nvl(atc.data_scale, 0) || ')'  
      when atc.data_type = 'VARCHAR2' then atc.data_type || '(' || atc.data_length || ')'  
      else atc.data_type  
     end as col_type,  
     atc.num_distinct,  
     atc.num_nulls,   
     tcc.hh_s_id,   
     tcc.hh_t_id,   
     lpad(atc.column_id + 1, 4, '0') as hh_c_id  
   from tab_cols_cons tcc join all_tab_columns atc on (tcc.owner = atc.owner and atc.table_name = tcc.table_name)  
 ),   
 refs as (  
     select distinct all_constraints.owner fk_owner, -- all_constraints.constraint_name, all_constraints.constraint_type,  
         all_constraints.constraint_name fk_name,  
         all_cons_columns.table_name fk_table_name,  
         all_cons_columns.column_name fk_column_name,  
         all_constraints.r_owner fk_ref_owner,  
         -- all_constraints.r_constraint_name fk_ref_pk_name,  
         fk_ref_table_name,  
         fk_ref_column_name  
      from ALL_CONSTRAINTS  
      join ALL_CONS_COLUMNS on ALL_CONSTRAINTS.constraint_name = ALL_CONS_COLUMNS.constraint_name  
      join (  
         select all_constraints.owner fk_owner,   
             all_constraints.constraint_name fk_name,   
             -- all_constraints.constraint_type,  
             all_cons_columns.table_name fk_ref_table_name,  
             all_cons_columns.column_name fk_ref_column_name  
          from ALL_CONSTRAINTS  
          join ALL_CONS_COLUMNS on ALL_CONSTRAINTS.constraint_name = ALL_CONS_COLUMNS.constraint_name  
          where constraint_type in ('P')  
         ) on all_constraints.r_owner = fk_owner and all_constraints.r_constraint_name = fk_name  
      where ALL_CONSTRAINTS.owner = user  
       and constraint_type in ('R')  
 ),   
 referenced_list as (  
  select fk_owner, fk_table_name rk_ref_table_name, fk_column_name rk_ref_column_name,   
   hh_s_id,   
   hh_t_id,   
   hh_c_id,   
   ROW_NUMBER( ) OVER (PARTITION BY fk_owner, fk_table_name , fk_column_name ORDER BY fk_ref_owner, fk_ref_table_name, fk_ref_column_name NULLS LAST) rd_id  
  from tab_cols tc join refs r on (tc.owner = r.fk_ref_owner and tc.table_name = r.fk_ref_table_name and tc.column_name = r.fk_ref_column_name )  
 ),   
 referencing_list as (  
  select fk_ref_owner, fk_ref_table_name, fk_ref_column_name,  
   hh_s_id,   
   hh_t_id,   
   hh_c_id  
  from tab_cols tc join refs r on (tc.owner = r.fk_owner and tc.table_name = r.fk_table_name and tc.column_name = r.fk_column_name )  
 )  
 select xml   
 from (  
   select res_query, final_id, xml   
   from (  
     select   
      'q1' as res_query,   
      case   
       when tagtype = 'STARTTAG'   
         then replace(xml_builder.xml_tag, '?', 'schema name="' || username || '"')   
         else replace(xml_builder.xml_tag, '?', 'schema')   
      end as xml,   
      hh_s_id || xml_builder.postfix as final_id  
     from schemas join xml_builder on (1=1)  
     union all   
     select   
      'q2' as res_query,   
      case   
       when tagtype = 'STARTTAG'   
         then   
           case when pk_column_name is null   
            then replace(xml_builder.xml_tag, '?', 'table name="' || table_name || '"')  
            else replace(xml_builder.xml_tag, '?', 'table name="' || table_name || '" pk_owner = "' || pk_owner || '" pk_name = "' || pk_name || '" pk_column_name = "' || pk_column_name || '"')  
           end  
         else replace(xml_builder.xml_tag, '?', 'table')   
      end as xml,   
       hh_s_id || '_' || hh_t_id || xml_builder.postfix as final_id  
     from tab_cols_cons join xml_builder on (1=1)  
     union all   
     select   
       'q3' as res_query,   
       '<comment>' || comments || '</comment>' as xml,   
       hh_s_id || '_' || hh_t_id || '_0001' as final_id  
     from tab_cols_cons t join all_tab_comments atc on (t.owner = atc.owner and t.table_name = atc.table_name)  
     union all   
     select   
      'q4' as res_query,   
      case   
       when tagtype = 'STARTTAG'   
         then replace(xml_builder.xml_tag, '?', 'column name="' || column_name || '" nullable="' || nullable || '" type="' || col_type || '" num_distinct = "' || num_distinct || '" num_nulls ="' || num_nulls || '"' )   
         else replace(xml_builder.xml_tag, '?', 'column')   
      end as xml,   
      hh_s_id || '_' || hh_t_id || '_' || hh_c_id || xml_builder.postfix as final_id  
     from  tab_cols join xml_builder on (1=1)  
     union all   
     select   
       'q5' as res_query,   
       '<comment>' || comments || '</comment>' as xml,   
       hh_s_id || '_' || hh_t_id || '_' || hh_c_id || '_0001' as final_id  
     from tab_cols tc join all_col_comments acc on (tc.owner = acc.owner and tc.table_name = acc.table_name and tc.column_name = acc.column_name)  
     union all   
     select   
       'q6' as res_query,   
       '<referencing fk_ref_table_name="' || fk_ref_table_name || '" fk_ref_column_name="' || fk_ref_column_name || '"/>' as xml,   
       hh_s_id || '_' || hh_t_id || '_' || hh_c_id || '_0002' as final_id  
     from referencing_list  
     union all   
     select   
       'q7' as res_query,   
       '<referenced_by rk_ref_table_name="' || rk_ref_table_name || '" rk_ref_column_name="' || rk_ref_column_name || '"/>' as xml,   
       hh_s_id || '_' || hh_t_id || '_' || hh_c_id || '_' || lpad(rd_id+2, 4, '0') as final_id  
     from referenced_list  
   )  
   order by 2  
 );