## 제주도 여행코스를 검색하여 분석후 그래프로 표시하기
#
setwd("C:\\Users\\user\\Desktop\\R까기")
getwd()
library(KoNLP)
library(wordcloud)
library(RColorBrewer)
useSejongDic()
mergeUserDic(data.frame("신비의도로", "ncn")) # 단어 추가
mergeUserDic(data.frame("주상절리", "ncn")) # 단어 추가
# 1. read txt
txt = readLines("data/Part_1/LEVEL_1/jeju.txt")
head(txt)
typeof(txt)
# 2. extract nuons
txt_nouns = sapply(txt, extractNoun, USE.NAMES=F)
typeof(txt_nouns)
head(txt_nouns)
# 3. unlist for filtering
txt_nouns_unlist = unlist(txt_nouns)
# 4. 두 글자 이상되는것만 필터링
place = Filter(function(x){nchar(x) >=2},txt_nouns_unlist)
typeof(place)
head(place,60)
# 5. 필요없는 단어 제거
place = gsub("무난","", place)
place = gsub("전국","", place)
place = gsub("렌트카","", place)
place = gsub("\\d+","", place)
place = gsub("40","", place)
place = gsub("입장료","", place)
place = gsub("관광지","", place)
place = gsub("대략적","", place)
place = gsub("어디","", place)
place = gsub("여행","", place)
place = gsub("숙소","", place)
place = gsub("도움","", place)
place = gsub("연휴","", place)
place = gsub("할인","", place)
place = gsub("없구요","", place)
place = gsub("하시","", place)
place = gsub("6월4일부터","", place)
place = gsub("되버려서","", place)
place = gsub("가격","", place)
place = gsub("질문","", place)
place = gsub("모바일할인쿠폰을","", place)
place = gsub("모바일쿠폰을","", place)
place = gsub("일정","", place)
place = gsub("예약","", place)
place = gsub("제주","", place)
place = gsub("공항","", place)
place = gsub("해안","", place)
place = gsub("이용","", place)
place = gsub("경우","", place)
place = gsub("전망","", place)
place = gsub("코스","", place)
place = gsub("시간","", place)
place = gsub("추천","", place)
place = gsub("일출","", place)
place = gsub("드라이브","", place)
place = gsub("도착","", place)
place = gsub("사진","", place)
place = gsub("가능","", place)
place = gsub("박물관","", place)
place = gsub("바다","", place)
place = gsub("경유","", place)
place = gsub("소요","", place)
place = gsub("하루","", place)
place = gsub("하게","", place)
place = gsub("바다","", place)
place = gsub("녹차","", place)
place = gsub("위치","", place)
place = gsub("출발","", place)
place = gsub("다양","", place)
place = gsub("랜드","", place)
place = gsub("바다","", place)
# 6. save file
write(unlist(place), "jeju_2.txt")
# 7. read file as table
rev = read.table("jeju_2.txt") # list type
typeof(rev)
nrow(rev)
# 8. table 형태로 변환해서 wordcount라는 변수에 할당
wordcount = table(rev)
head(sort(wordcount, decreasing=T),30) ## 가장 언급 빈도수가 많은순으로 정렬
# 9. pie형으로 보이기 상위 10개
a = head(sort(wordcount, decreasing=T),10)
windows()
pie(a)
savePlot("jeju_2.jpg",type="jpg") # save
결과
# 10. 색상변경
color = rainbow(10)
pie(a, col=color,radius=1)
savePlot("jeju_3.jpg",type="jpg") # save
결과
# 11. 수치값 넣기
pct = round(a/sum(a)*100,1)
names(a)
typeof(a)
lab = paste(names(a),"\n",pct,"%")
lab
pie(a, col=color,radius=1, labels=lab, main="제주도 추천 코스")
par(new=T) ## 겹치기?
pie(a, radius=0.6, labels=NA, border=NA, col="white")
savePlot("jeju_4.jpg",type="jpg") # save
결과
# 12. bar graph
bar = head(sort(wordcount, decreasing=T),10)
barplot(bar, main="제주도 추천 코스 TOP 10", col=color,space=0.8, ylim=c(0,25),cex.name=0.7,las=1 )
# space : 바 간격, ylim : y축 값, cex.name : x축 사이즈, las : 수직/평형
savePlot("jeju_5.jpg",type="jpg") # save
결과
##13. 수치넣기
bp = barplot(bar, main="제주도 추천 코스 TOP 10", col=color,space=0.8, ylim=c(0,25),cex.name=0.7,las=1 )
pct_bar = round(bar/sum(bar)*100,1)
pct_bar
barplot(bar, main="제주도 추천 코스 TOP 10", col=color,space=0.8, ylim=c(0,25),cex.name=0.7,las=1)
text(x=bp, y=bar*1.05, labels=paste("(",pct_bar,"%",")"), col="black", cex=0.7)
text(x=bp, y=bar*0.95, labels=paste(bar,"건"), col="black", cex=0.7)
savePlot("jeju_6.jpg",type="jpg") # save
결과
bp_h = barplot(bar, main="제주도 추천 코스 TOP 10", col=color,space=0.8, xlim=c(0,25),cex.name=0.7,las=1,horiz=T)
text(x=bar*1.15, y=bp_h, labels=paste("(",pct_bar,"%",")"), col="black", cex=0.7)
text(x=bar*0.9, y=bp_h, labels=paste(bar,"건"), col="black", cex=0.7)
savePlot("jeju_7.jpg",type="jpg") # save
결과