|
用R获取中国天气网实时天气信息。代码比较粗糙,可能有错误。
pcks<-c("httr", "rlist", "jsonlite","dplyr","magrittr","xml2","stringr","rio",
"readr","gsubfn","rrapply","rvest")
lapply(pcks, require, character.only = TRUE)
WD <- getwd() #获取工作目录
if (!is.null(WD))
setwd(WD)
date <-Sys.Date() # 获取系统日期
now <- Sys.time() # 获取系统时间
Jan1 <- strptime("1970-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")
timestamp <- as.numeric(difftime(now, Jan1, unit="secs"))# 时间戳 unix timestamps
timestamps <- round(timestamp*1000) #取整,13位时间戳,四舍六入五成双
ts <- as.character(round(timestamp))
###------下载/导入站点ID-------
myheaders1 <- c('Host'='j.i8tq.com',
'User-Agent'='Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0',
'Accept'='*/*',
'Accept-Language'= 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'Accept-Encoding'= 'gzip, deflate, br',
'Connection'= 'keep-alive',
'Referer'= 'http://www.weather.com.cn/',
'Sec-Fetch-Dest'= 'script',
'Sec-Fetch-Mode'= 'no-cors',
'Sec-Fetch-Site'= 'cross-site')
url <- c('https://j.i8tq.com/weather2020/search/city.js')
download_html(url, paste('temp.txt'))# response is a js html file, UTF-8 object
library("tcltk")
button <- tkmessageBox(title='Message',
message='Delete the FIRST line of temp.txt
删除 temp.txt的第一行',
type='ok')
button <- tclvalue(button)
if(button == 'ok'){
#do something
}
dat <- read_json('temp.txt')
city <- rrapply(dat, how = "melt") %>%
filter(L4 == "AREAID") %>%
select(Sheng = L2, Shi = L3, AreadID = value)
city
export(city,paste('City_ID',".csv",sep=""))
file.remove('temp.txt')
###-----下载天气实时数据--------
tq_ID <-import("City_ID.csv", header = T) # read the stations
tq_ID <- tq_ID[,3] #只取第3列,编号
tq_ID <- data.frame(matrix(unlist(tq_ID), nrow=1, byrow=T))
tq_ID <- as.character(tq_ID)
mycookie <- paste('f_city=%E5%A4%A9%E6%B4%A5%7C101030100%7C')
myheaders <- c('Accept' ='*/*',
'Accept-Encoding'='gzip, deflate',
'Accept-Language'='zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
'Connection'='keep-alive',
'Host' ='d1.weather.com.cn',
'Referer'='http://www.weather.com.cn/',
'cookie' = mycookie)
i=1
n <- length(tq_ID)
tqs_inf <- data.frame()
for (i in 1:n){
url <- str_c('http://d1.weather.com.cn/sk_2d/',tq_ID[[i]],'.html?','_=', paste(round(timestamps)))
resp<- GET(url = url, add_headers(.headers = myheaders),encode="raw" )
HTML <- read_html(x= resp)
s <- html_text2(HTML)
s1 <- str_sub(s,12,-1) #sub string in {}
tq_inf <- fromJSON(s1)
tq_inf <- data.frame(matrix(lapply(tq_inf,as.character),nrow=1)) #将list转换为dataframe
if (is.null (tq_inf$X23)) {
next } # 如果站点没有数值,进入下一个循环
tqs_inf <- bind_rows(tqs_inf,tq_inf) #变量的列数不等,智能合并
}
file_name <- paste("tq_real",date,".txt")
export(tqs_inf, file=file_name)
Archiver|手机版|科学网 ( 京ICP备07017567号-12 )
GMT+8, 2024-11-15 15:24
Powered by ScienceNet.cn
Copyright © 2007- 中国科学报社