## http://cran.r-project.org/doc/contrib/usingR.pdf

## 1. Starting Up
## 1.1	Getting started under Windows
## 1.2	Use of an Editor Script Window

## 1.3 	A Short R Session
austpop <- read.table("austpop.txt", header=TRUE)
austpop
> austpop
  Year  NSW Vic.  Qld   SA   WA Tas.  NT ACT Aust.
1 1917 1904 1409  683  440  306  193   5   3  4941
2 1927 2402 1727  873  565  392  211   4   8  6182
3 1937 2693 1853  993  589  457  233   6  11  6836
4 1947 2985 2055 1106  646  502  257  11  17  7579
5 1957 3625 2656 1413  873  688  326  21  38  9640
6 1967 4295 3274 1700 1110  879  375  62 103 11799
7 1977 5002 3837 2130 1286 1204  415 104 214 14192
8 1987 5617 4210 2675 1393 1496  449 158 265 16264
9 1997 6274 4605 3401 1480 1798  474 187 310 18532

names(austpop)
> names(austpop)
 [1] "Year"  "NSW"   "Vic."  "Qld"   "SA"    "WA"    "Tas."  "NT"    "ACT"  
[10] "Aust."

-----------------------------python
### austpop <- read.table("austpop.txt", header=TRUE)
file = open('austpop.txt')
header = file.readline().replace('"','').split()
d = [(name, ' attach("hills.RData")
> summary(hills)
    distance          climb           time       
 Min.   : 2.000   Min.   : 300   Min.   : 15.95  
 1st Qu.: 4.500   1st Qu.: 725   1st Qu.: 28.00  
 Median : 6.000   Median :1000   Median : 39.75  
 Mean   : 7.529   Mean   :1815   Mean   : 57.88  
 3rd Qu.: 8.000   3rd Qu.:2200   3rd Qu.: 68.62  
 Max.   :28.000   Max.   :7500   Max.   :204.62  

> hills
                 distance climb    time
Greenmantle           2.5   650  16.083
Carnethy              6.0  2500  48.350
Craig Dunain          6.0   900  33.650
Ben Rha               7.5   800  45.600
Ben Lomond            8.0  3070  62.267
Goatfell              8.0  2866  73.217
Bens of Jura         16.0  7500 204.617
Cairnpapple           6.0   800  36.367
Scolty                5.0   800  29.750
Traprain              6.0   650  39.750
Lairig Ghru          28.0  2100 192.667
Dollar                5.0  2000  43.050
Lomonds               9.5  2200  65.000
Cairn Table           6.0   500  44.133
Eildon Two            4.5  1500  26.933
Cairngorm            10.0  3000  72.250
Seven Hills          14.0  2200  98.417
Knock Hill            3.0   350  78.650
Black Hill            4.5  1000  17.417
Creag Beag            5.5   600  32.567
Kildcon Hill          3.0   300  15.950
Meall Ant-Suidhe      3.5  1500  27.900
Half Ben Nevis        6.0  2200  47.633
Cow Hill              2.0   900  17.933
N Berwick Law         3.0   600  18.683
Creag Dubh            4.0  2000  26.217
Burnswark             6.0   800  34.433
Largo Law             5.0   950  28.567
Criffel               6.5  1750  50.500
Acmony                5.0   500  20.950
Ben Nevis            10.0  4400  85.583
Knockfarrel           6.0   600  32.383
Two Breweries        18.0  5200 170.250
Cockleroi             4.5   850  28.100
Moffat Chase         20.0  5000 159.833

-----------------------------python
###> attach("hills.RData")
###> summary(hills)
import Rtools
hills = Rtools.readtable_DF('hills.txt')
Rtools.summary(hills)
     climb         distance         time     
Min.   : 300     Min.   : 2       Min.   : 15.95   
1st Qu.: 725     1st Qu.: 4.5     1st Qu.: 28      
Median : 1000    Median : 6       Median : 39.75   
Mean   : 1815.3  Mean   : 7.5286  Mean   : 57.876  
3rd Qu.: 2200    3rd Qu.: 8       3rd Qu.: 68.625  
Max.   : 7500    Max.   : 28      Max.   : 204.62  

###> pairs(hills)
#
Rtools.ScatterMatrix(hills)

###> options(digits=3)
###> cor(hills)
###        distance climb time
###distance 1.000 0.652 0.920
###climb    0.652 1.000 0.805
###time     0.920 0.805 1.000

Rtools.corrcoefDF(hills)
                  climb distance    time 
          climb        1  0.6523  0.8052
       distance   0.6523       1  0.9196
           time   0.8052  0.9196       1

#----------------------------------------------------------------

###> plot(distance ~ stretch,data=elasticband, pch=16)

Rtools.scatterDF(elasticband, 'stretch', 'distance')

###> elastic.lm <- lm(distance~stretch,data=elasticband)

(a_s,b_s,r,tt,stderr)=linregress(array(elasticband['stretch']), array(elasticband['distance']))
(4.5535714285714288,
 -63.571428571428584,
 0.79699954090878578,
 0.031860702945051264,
 0.87860816982116141)
slope, intercept, r, two-tailed prob, stderr-of-the-estimate

stderr_est = sqrt(np.sum(resid**2)/5.)
16.332045624651993
stderr_b = 

###> lm(distance ~stretch,data=elasticband)
###Call:
###lm(formula = distance ~ stretch, data = elasticband)
###Coefficients:
###(Intercept) stretch
###-63.571 4.554
###More complete information is available by typing
###> summary(lm(distance~stretch,data=elasticband))

###Call:
###lm(formula = distance ~ stretch, data = elasticband)
###
###Residuals:
###       1        2        3        4        5        6        7
### -2.1071   0.3214 -18.0000  -1.8929  27.7857 -13.3214   7.2143
###
###Coefficients:
###            Estimate Std. Error t value Pr(>|t|)
###(Intercept)  -63.571     74.332   0.855   0.4315
###stretch        4.554      1.543   2.951   0.0319 *
###---
###Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
###
###Residual standard error: 16.33 on 5 degrees of freedom
###Multiple R-squared: 0.6352,     Adjusted R-squared: 0.5622
###F-statistic: 8.706 on 1 and 5 DF,  p-value: 0.03186

import Rtools
Rtools.LinRegDF(elasticband, 'stretch', 'distance', summary=True)

#----------------------------------------------------------------

###2. The following ten observations, taken during the years 1970-79, are on October snow cover for Eurasia.
###(Snow cover is in millions of square kilometers):
###year snow.cover
###1970 6.5
###1971 12.0
###1972 14.9
###1973 10.0
###1974 10.7
###1975 7.9
###1976 21.9
###1977 12.5
###1978 14.5
###1979 9.2
###i. Enter the data into R. [Section 1.3.1 showed one way to do this. To save keystrokes, enter the successive
###years as 1970:1979]
###ii. Plot snow.cover versus year.
###iii Use the hist() command to plot a histogram of the snow cover values.
###iv. Repeat ii and iii after taking logarithms of snow cover.

snow = P.DataFrame(array(zip(range(1970,1980),[6.5, 12., 14.9, 10., 10.7, 7.9, 21.9, 12.5, 14.5, 9.2])), index=range(10), columns=['year','snow_cover'])

Rtools.scatterDF(snow['year'], snow['snow_cover'])
Rtools.histDF(snow, 'snow_cover')

snow['snow_cover'] = log(snow['snow_cover'])
Rtools.scatterDF(snow['year'], snow['snow_cover'])
Rtools.histDF(snow, 'snow_cover')