## http://cran.r-project.org/doc/contrib/usingR.pdf ## 1. Starting Up ## 1.1 Getting started under Windows ## 1.2 Use of an Editor Script Window ## 1.3 A Short R Session austpop <- read.table("austpop.txt", header=TRUE) austpop > austpop Year NSW Vic. Qld SA WA Tas. NT ACT Aust. 1 1917 1904 1409 683 440 306 193 5 3 4941 2 1927 2402 1727 873 565 392 211 4 8 6182 3 1937 2693 1853 993 589 457 233 6 11 6836 4 1947 2985 2055 1106 646 502 257 11 17 7579 5 1957 3625 2656 1413 873 688 326 21 38 9640 6 1967 4295 3274 1700 1110 879 375 62 103 11799 7 1977 5002 3837 2130 1286 1204 415 104 214 14192 8 1987 5617 4210 2675 1393 1496 449 158 265 16264 9 1997 6274 4605 3401 1480 1798 474 187 310 18532 names(austpop) > names(austpop) [1] "Year" "NSW" "Vic." "Qld" "SA" "WA" "Tas." "NT" "ACT" [10] "Aust." -----------------------------python ### austpop <- read.table("austpop.txt", header=TRUE) file = open('austpop.txt') header = file.readline().replace('"','').split() d = [(name, 'attach("hills.RData") > summary(hills) distance climb time Min. : 2.000 Min. : 300 Min. : 15.95 1st Qu.: 4.500 1st Qu.: 725 1st Qu.: 28.00 Median : 6.000 Median :1000 Median : 39.75 Mean : 7.529 Mean :1815 Mean : 57.88 3rd Qu.: 8.000 3rd Qu.:2200 3rd Qu.: 68.62 Max. :28.000 Max. :7500 Max. :204.62 > hills distance climb time Greenmantle 2.5 650 16.083 Carnethy 6.0 2500 48.350 Craig Dunain 6.0 900 33.650 Ben Rha 7.5 800 45.600 Ben Lomond 8.0 3070 62.267 Goatfell 8.0 2866 73.217 Bens of Jura 16.0 7500 204.617 Cairnpapple 6.0 800 36.367 Scolty 5.0 800 29.750 Traprain 6.0 650 39.750 Lairig Ghru 28.0 2100 192.667 Dollar 5.0 2000 43.050 Lomonds 9.5 2200 65.000 Cairn Table 6.0 500 44.133 Eildon Two 4.5 1500 26.933 Cairngorm 10.0 3000 72.250 Seven Hills 14.0 2200 98.417 Knock Hill 3.0 350 78.650 Black Hill 4.5 1000 17.417 Creag Beag 5.5 600 32.567 Kildcon Hill 3.0 300 15.950 Meall Ant-Suidhe 3.5 1500 27.900 Half Ben Nevis 6.0 2200 47.633 Cow Hill 2.0 900 17.933 N Berwick Law 3.0 600 18.683 Creag Dubh 4.0 2000 26.217 Burnswark 6.0 800 34.433 Largo Law 5.0 950 28.567 Criffel 6.5 1750 50.500 Acmony 5.0 500 20.950 Ben Nevis 10.0 4400 85.583 Knockfarrel 6.0 600 32.383 Two Breweries 18.0 5200 170.250 Cockleroi 4.5 850 28.100 Moffat Chase 20.0 5000 159.833 -----------------------------python ###> attach("hills.RData") ###> summary(hills) import Rtools hills = Rtools.readtable_DF('hills.txt') Rtools.summary(hills) climb distance time Min. : 300 Min. : 2 Min. : 15.95 1st Qu.: 725 1st Qu.: 4.5 1st Qu.: 28 Median : 1000 Median : 6 Median : 39.75 Mean : 1815.3 Mean : 7.5286 Mean : 57.876 3rd Qu.: 2200 3rd Qu.: 8 3rd Qu.: 68.625 Max. : 7500 Max. : 28 Max. : 204.62 ###> pairs(hills) # Rtools.ScatterMatrix(hills) ###> options(digits=3) ###> cor(hills) ### distance climb time ###distance 1.000 0.652 0.920 ###climb 0.652 1.000 0.805 ###time 0.920 0.805 1.000 Rtools.corrcoefDF(hills) climb distance time climb 1 0.6523 0.8052 distance 0.6523 1 0.9196 time 0.8052 0.9196 1 #---------------------------------------------------------------- ###> plot(distance ~ stretch,data=elasticband, pch=16) Rtools.scatterDF(elasticband, 'stretch', 'distance') ###> elastic.lm <- lm(distance~stretch,data=elasticband) (a_s,b_s,r,tt,stderr)=linregress(array(elasticband['stretch']), array(elasticband['distance'])) (4.5535714285714288, -63.571428571428584, 0.79699954090878578, 0.031860702945051264, 0.87860816982116141) slope, intercept, r, two-tailed prob, stderr-of-the-estimate stderr_est = sqrt(np.sum(resid**2)/5.) 16.332045624651993 stderr_b = ###> lm(distance ~stretch,data=elasticband) ###Call: ###lm(formula = distance ~ stretch, data = elasticband) ###Coefficients: ###(Intercept) stretch ###-63.571 4.554 ###More complete information is available by typing ###> summary(lm(distance~stretch,data=elasticband)) ###Call: ###lm(formula = distance ~ stretch, data = elasticband) ### ###Residuals: ### 1 2 3 4 5 6 7 ### -2.1071 0.3214 -18.0000 -1.8929 27.7857 -13.3214 7.2143 ### ###Coefficients: ### Estimate Std. Error t value Pr(>|t|) ###(Intercept) -63.571 74.332 0.855 0.4315 ###stretch 4.554 1.543 2.951 0.0319 * ###--- ###Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 ### ###Residual standard error: 16.33 on 5 degrees of freedom ###Multiple R-squared: 0.6352, Adjusted R-squared: 0.5622 ###F-statistic: 8.706 on 1 and 5 DF, p-value: 0.03186 import Rtools Rtools.LinRegDF(elasticband, 'stretch', 'distance', summary=True) #---------------------------------------------------------------- ###2. The following ten observations, taken during the years 1970-79, are on October snow cover for Eurasia. ###(Snow cover is in millions of square kilometers): ###year snow.cover ###1970 6.5 ###1971 12.0 ###1972 14.9 ###1973 10.0 ###1974 10.7 ###1975 7.9 ###1976 21.9 ###1977 12.5 ###1978 14.5 ###1979 9.2 ###i. Enter the data into R. [Section 1.3.1 showed one way to do this. To save keystrokes, enter the successive ###years as 1970:1979] ###ii. Plot snow.cover versus year. ###iii Use the hist() command to plot a histogram of the snow cover values. ###iv. Repeat ii and iii after taking logarithms of snow cover. snow = P.DataFrame(array(zip(range(1970,1980),[6.5, 12., 14.9, 10., 10.7, 7.9, 21.9, 12.5, 14.5, 9.2])), index=range(10), columns=['year','snow_cover']) Rtools.scatterDF(snow['year'], snow['snow_cover']) Rtools.histDF(snow, 'snow_cover') snow['snow_cover'] = log(snow['snow_cover']) Rtools.scatterDF(snow['year'], snow['snow_cover']) Rtools.histDF(snow, 'snow_cover')