Chapter 5 Input and Output

5.1 Excel 파일 다루기

5.1.1 Excel 읽기

install.packages(“readxl”) 을 수행 해서 패키지를 설치 하세요

#install.packages("readxl")
library(readxl)

## Warning: package 'readxl' was built under R version 4.0.2

excel_data_ex <- read_excel("data_ex.xlsx")

5.1.2 Excel Sheet 지정 하여 읽기

Excel Sheet 정보 보기

excel_sheets("data_ex.xlsx")

## [1] "member"   "employee" "product"

sheet 이름으로 불러오기

excel_data_ex <- read_excel("data_ex.xlsx", sheet = 'product')
head(excel_data_ex)

## # A tibble: 6 x 3
##      ID prod_name price
##   <dbl> <chr>     <dbl>
## 1     1 toy         100
## 2     2 pen         200
## 3     3 shoes       300
## 4     4 notebook   1300
## 5     5 shirts     2100
## 6     6 pants     31000

sheet 번호로 불러오기

excel_data_ex <- read_excel("data_ex.xlsx", sheet = 3)
head(excel_data_ex)

## # A tibble: 6 x 3
##      ID prod_name price
##   <dbl> <chr>     <dbl>
## 1     1 toy         100
## 2     2 pen         200
## 3     3 shoes       300
## 4     4 notebook   1300
## 5     5 shirts     2100
## 6     6 pants     31000

5.1.3 Excel 의 다양한 읽기 형태

row 개수 지정하기

read_excel("data_ex.xlsx", sheet = 'product', n_max = 3)

## # A tibble: 3 x 3
##      ID prod_name price
##   <dbl> <chr>     <dbl>
## 1     1 toy         100
## 2     2 pen         200
## 3     3 shoes       300

컬럼 범위지정하여 읽기

read_excel("data_ex.xlsx",  sheet = 'product',range="B1:C4")

## # A tibble: 3 x 2
##   prod_name price
##   <chr>     <dbl>
## 1 toy         100
## 2 pen         200
## 3 shoes       300

read_excel("data_ex.xlsx", range="product!B1:C4")

## # A tibble: 3 x 2
##   prod_name price
##   <chr>     <dbl>
## 1 toy         100
## 2 pen         200
## 3 shoes       300

read_excel("data_ex.xlsx", range=cell_cols("B:C"))

## # A tibble: 10 x 2
##    SEX     AGE
##    <chr> <dbl>
##  1 F        22
##  2 F        12
##  3 F        34
##  4 F        44
##  5 M        12
##  6 F        34
##  7 M        12
##  8 F        56
##  9 M        12
## 10 M         3

5.1.4 CSV, TEXT 파일 읽기

5.1.4.1 reac_csv() 함수 사용

기본 읽기

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0

## Warning: package 'ggplot2' was built under R version 4.0.2

## Warning: package 'tibble' was built under R version 4.0.2

## Warning: package 'tidyr' was built under R version 4.0.2

## Warning: package 'readr' was built under R version 4.0.2

## Warning: package 'dplyr' was built under R version 4.0.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

컬럼 타입 지정

df3 <- read_csv(
  "data_ex.txt",
  col_types = cols(
    ID = col_double(),
    SEX = col_character(),
    AGE = col_integer(),
    AREA = col_character()
  )
)

## Warning: The following named parsers don't match the column names: ID, SEX, AGE,
## AREA

read.table 을 사용하여 텍스트 파일 읽기

ex_data <- read.table("data_ex.txt")
#View(ex_data)

URL로 부터 데이터 읽기

#uciCar <- read.table(
#  'http://www.win-vector.com/dfiles/car.data.csv', 
#  sep=',',
#  header=T
  
#)

ex_data <- read.table("data_ex.txt", header = TRUE)
#View(ex_data)