barplot...?

## KoNLP : 한국어 자연어 처리 패키지

## tm : 영문 자연어 처리 패키지.

>setwd("D:/20180730/r/20181030")

>install.packages("tm")

>library(tm)

>library(wordcloud)

>library(RColorBrewer)

>data1=readLines("steve.txt")

>data1

>corp1=VCorpus(VectorSource(data1))

>corp1

>inspect(corp1)

>tdm=TermDocumentMatrix(corp1)

>m=as.matrix(tdm)

>corp2=tm_map(corp1,stripWhitespace)

>corp2=tm_map(corp2,tolower)

>corp2=tm_map(corp2,removeNumbers)

>corp2=tm_map(corp2,removePunctuation)

>corp2=tm_map(corp2,PlainTextDocument)

>sword2=c(stopwords("en"),"and","but","not")

>corp2=tm_map(corp2,removeWords,sword2)

>tdm2=TermDocumentMatrix(corp2)

>m2=as.matrix(tdm2)

>colnames(m2)=c(1:59)

>freq1=sort(rowSums(m2),decreasing=T)

>freq2=sort(colSums(m2),decreasing=T)

>head(freq1,20)

>head(freq2,20)

>findFreqTerms(tdm2,2)

>findAssocs(tdm2,"apple",0.5)

>palete=brewer.pal(9,"Set1")

>wordcloud(names(freq1),freq=freq1,

scale=c(5,1),min.freq=2,

colors=palete,random.order=F,

random.color=T)

>barplot(head(freq1,20),main="tm 패키지 test",

las=2,ylim=c(0,20),col=rainbow(10),horiz=F)

##########################################

#abline : 막대그래프의 내부선 출력

#h : 가로선 ,

# seq(0,20,1) : 범위. 0~ 20까지 1칸씩 라인 출력

# lty : 선의 종류

# 0:선없음 1:solid(실선), 2:dashed(대쉬선)

# 3:dotted(점선) 4:dotdash(대쉬+점선)

# 5:longdash(긴대쉬선)

# 6:twodash(두개의 대쉬선)

###########################################

>abline(h=seq(0,20,1),lty=3)

######################################

# excel 파일 읽기. xls 파일

######################################

##64비트용

>install.packages("XLConnect")

##32비트용

##install.packages("RODBC") ##???

>library(XLConnect)

>xls=loadWorkbook("ssec1804.xls",create=TRUE)

>all = readWorksheet(xls,sheet="1.전체")

>xls=loadWorkbook("청소년통계.xls",create=TRUE)

>sheet2=readWorksheet(xls,sheet="고민문제")

>sheet3=readWorksheet(xls,sheet="음주흡연율")

##고민문제 막대 그래프로 표시

bp2=barplot(as.matrix(sheet2),main="고민문제",

beside=T,axes=F,ylab="비율%",xlab="항목",

cex.names=1.0, ylim=c(0,40),

col=c("skyblue","plum"),

border="yellow",las=2,

names.arg=c("외모","가정환경","용돈","성적","직업",

"친구","이성교재","흡연","기타","고민없음"))

axis(2,ylim=seq(0,40,10),las=2)

abline(h=seq(0,40,5),lty=2)

#names.arg 값을 sheet2에서 가져오기

names=colnames(sheet2)

bp2=barplot(as.matrix(sheet2),main="고민문제",

beside=T,axes=F,ylab="비율%",xlab="항목",

cex.names=1.0, ylim=c(0,40),

col=c("skyblue","plum"),

border="yellow",las=2,

names.arg=names)

#그래프 축 설정

# axis(2 : 그래프의 위치

# 1 : 아래, 2 : 왼쪽 , 3:위쪽, 4:오른쪽

axis(2,ylim=seq(0,40,10),las=2)

#음주 흡연율을 그래프로 작성하기 : 이동훈

##범례와 그래프값 출력하기

>pct=as.matrix(teen2)

>text(x=bp2,y=as.matrix(sheet2)*1.05,

labels=paste(pct,"%"),col="navy",cex=0.7)

>legend("topright",c("남자","여자"),cex=0.9,

fill=c("skyblue","plum"),bg="white")

## KoNLP : 한국어 자연어 처리 패키지

## tm : 영문 자연어 처리 패키지.

>setwd("D:/20180730/r/20181030") : 작업환경을 설정한다.

>install.packages("tm") : tm이라는 패키지 설정. : 텍스트 마이닝 기법이라고 한다.

>library(tm)

>library(wordcloud) : wordcloud 패키지

>library(RColorBrewer)

>data1=readLines("steve.txt") : 작업공간에 steve.txt라는 파일이 있어야 한다. 그것을 읽어 올때 readLines함수를 사용한다.
자바를 하면서도 느꼈지만, 파일을 읽어올때 혹은 스트림 혹은 그런것들은 보통 readLines처럼 줄을 읽어오는 것 같다.

>data1

>corp1=VCorpus(VectorSource(data1)) : VectorSource 는 Vector 형태의 단어를 만들고

VCorpus는 단어를 묶는 다고 하는데 이것에 대한 설명은 좀더 알아 봐야겠다.

>corp1 : 단어묶음 조회

>inspect(corp1) : 분석이 가능한 Matrix 형태 변형하기

>tdm=TermDocumentMatrix(corp1) : 일반적인 Matrix 형태 변형하기

>m=as.matrix(tdm)

>corp2=tm_map(corp1,stripWhitespace) : Whitespace : 의미없는 공백 제거.

>corp2=tm_map(corp2,tolower) : 소문자로 변경

>corp2=tm_map(corp2,removeNumbers): 숫자 제거

>corp2=tm_map(corp2,removePunctuation) : 특수문자(마침표,콤마 등) 제거

>corp2=tm_map(corp2,PlainTextDocument) : 일반적인 문자형태의 문서로 변환

>sword2=c(stopwords("en"),"and","but","not") : 의미없는 단어 설정, 분석대상이 아닌 단어 설정 : stopwords

>corp2=tm_map(corp2,removeWords,sword2) : 분석대상이 아닌 단어 제거

>tdm2=TermDocumentMatrix(corp2) : ??

>m2=as.matrix(tdm2)

>colnames(m2)=c(1:59)

>freq1=sort(rowSums(m2),decreasing=T)

>freq2=sort(colSums(m2),decreasing=T)

>head(freq1,20)

>head(freq2,20)

>findFreqTerms(tdm2,2) : tdm2에서 2번이상 언급된 단어만 선택

>findAssocs(tdm2,"apple",0.5) : tdm2에서 apple 단어와 연관성이 0.5이상인 값들을 출력

#wordcloud 그래프 그리기

>palete=brewer.pal(9,"Set1")

>wordcloud(names(freq1),freq=freq1,

scale=c(5,1),min.freq=2,

colors=palete,random.order=F,

random.color=T)

#막대 그래프 그리기

# head(freq1,20) : 출력될 데이터. freq1데이터 중 20개만

# main : 막대그래프 제목내용

# las=2 : x축의 내용을 세로로 출력.1인경우 가로출력

# ylim : y축 좌표 값

# col : 색상 지정.

>barplot(head(freq1,20),main="tm 패키지 test",

las=2,ylim=c(0,20),col=rainbow(10),horiz=F)

##########################################

#abline : 막대그래프의 내부선 출력

#h : 가로선 ,

# seq(0,20,1) : 범위. 0~ 20까지 1칸씩 라인 출력

# lty : 선의 종류

# 0:선없음 1:solid(실선), 2:dashed(대쉬선)

# 3:dotted(점선) 4:dotdash(대쉬+점선)

# 5:longdash(긴대쉬선)

# 6:twodash(두개의 대쉬선)

###########################################

>abline(h=seq(0,20,1),lty=3)

######################################

# excel 파일 읽기. xls 파일

######################################

##64비트용

>install.packages("XLConnect")

##32비트용

##install.packages("RODBC") ##???

>library(XLConnect)

>xls=loadWorkbook("ssec1804.xls",create=TRUE)

>all = readWorksheet(xls,sheet="1.전체")

>xls=loadWorkbook("청소년통계.xls",create=TRUE)

>sheet2=readWorksheet(xls,sheet="고민문제")

>sheet3=readWorksheet(xls,sheet="음주흡연율")

##고민문제 막대 그래프로 표시

bp2=barplot(as.matrix(sheet2),main="고민문제",

beside=T,axes=F,ylab="비율%",xlab="항목",

cex.names=1.0, ylim=c(0,40),

col=c("skyblue","plum"),

border="yellow",las=2,

names.arg=c("외모","가정환경","용돈","성적","직업",

"친구","이성교재","흡연","기타","고민없음"))

axis(2,ylim=seq(0,40,10),las=2)

abline(h=seq(0,40,5),lty=2)

#names.arg 값을 sheet2에서 가져오기

names=colnames(sheet2)

bp2=barplot(as.matrix(sheet2),main="고민문제",

beside=T,axes=F,ylab="비율%",xlab="항목",

cex.names=1.0, ylim=c(0,40),

col=c("skyblue","plum"),

border="yellow",las=2,

names.arg=names)

#그래프 축 설정

# axis(2 : 그래프의 위치

# 1 : 아래, 2 : 왼쪽 , 3:위쪽, 4:오른쪽

axis(2,ylim=seq(0,40,10),las=2)

#음주 흡연율을 그래프로 작성하기 : 이동훈

##범례와 그래프값 출력하기

>pct=as.matrix(teen2)

>text(x=bp2,y=as.matrix(sheet2)*1.05,

labels=paste(pct,"%"),col="navy",cex=0.7)

>legend("topright",c("남자","여자"),cex=0.9,

fill=c("skyblue","plum"),bg="white")

저작자표시

'일상 기술노트 > R' 카테고리의 다른 글

R 지도 (0)	2018.10.31

직딩이군

barplot...?

'일상 기술노트 > R' 카테고리의 다른 글

티스토리툴바

barplot...?

'일상 기술노트 > R' 카테고리의 다른 글

관련글

티스토리툴바