博文

用R获取中国天气网实时天气信息

已有 2007 次阅读 2022-3-17 13:42 |系统分类:科研笔记

用R获取中国天气网实时天气信息。代码比较粗糙，可能有错误。

pcks<-c("httr", "rlist", "jsonlite","dplyr","magrittr","xml2","stringr","rio",
"readr","gsubfn","rrapply","rvest")
lapply(pcks, require, character.only = TRUE)

WD <- getwd() #获取工作目录
if (!is.null(WD))
setwd(WD)
date <-Sys.Date() # 获取系统日期
now <- Sys.time() # 获取系统时间
Jan1 <- strptime("1970-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")
timestamp <- as.numeric(difftime(now, Jan1, unit="secs"))# 时间戳 unix timestamps
timestamps <- round(timestamp*1000) #取整，13位时间戳，四舍六入五成双
ts <- as.character(round(timestamp))

###------下载/导入站点ID-------
myheaders1 <- c('Host'='j.i8tq.com',
               'User-Agent'='Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0',
               'Accept'='*/*',
                'Accept-Language'= 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
               'Accept-Encoding'= 'gzip, deflate, br',
               'Connection'= 'keep-alive',
               'Referer'= 'http://www.weather.com.cn/',
               'Sec-Fetch-Dest'= 'script',
               'Sec-Fetch-Mode'= 'no-cors',
               'Sec-Fetch-Site'= 'cross-site')
url <- c('https://j.i8tq.com/weather2020/search/city.js')
download_html(url, paste('temp.txt'))# response is a js html file, UTF-8 object
library("tcltk")
button <- tkmessageBox(title='Message',
                       message='Delete the FIRST line of temp.txt
                       删除 temp.txt的第一行',
                       type='ok')
button <- tclvalue(button)
if(button == 'ok'){
#do something
}
dat <- read_json('temp.txt')
city <- rrapply(dat, how = "melt") %>%
filter(L4 == "AREAID") %>%
select(Sheng = L2, Shi = L3, AreadID = value)
city
export(city,paste('City_ID',".csv",sep=""))
file.remove('temp.txt')
###-----下载天气实时数据--------
tq_ID <-import("City_ID.csv", header = T) # read the stations
tq_ID <- tq_ID[,3] #只取第3列，编号
tq_ID <- data.frame(matrix(unlist(tq_ID), nrow=1, byrow=T))
tq_ID <- as.character(tq_ID)

mycookie <- paste('f_city=%E5%A4%A9%E6%B4%A5%7C101030100%7C')

myheaders <- c('Accept' ='*/*',
               'Accept-Encoding'='gzip, deflate',
               'Accept-Language'='zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
               'Connection'='keep-alive',
               'Host' ='d1.weather.com.cn',
               'Referer'='http://www.weather.com.cn/',
               'cookie' = mycookie)
i=1
n <- length(tq_ID)
tqs_inf <- data.frame()
for (i in 1:n){
url <- str_c('http://d1.weather.com.cn/sk_2d/',tq_ID[[i]],'.html?','_=', paste(round(timestamps)))
resp<- GET(url = url, add_headers(.headers = myheaders),encode="raw" )
HTML <- read_html(x= resp)
s <- html_text2(HTML)
s1 <- str_sub(s,12,-1) #sub string in {}
tq_inf <- fromJSON(s1)
tq_inf <- data.frame(matrix(lapply(tq_inf,as.character),nrow=1)) #将list转换为dataframe
if (is.null (tq_inf$X23)) {
    next } # 如果站点没有数值，进入下一个循环
tqs_inf <- bind_rows(tqs_inf,tq_inf) #变量的列数不等，智能合并
}
file_name <- paste("tq_real",date,".txt")
export(tqs_inf, file=file_name)

转载本文请联系原作者获取授权，同时请注明本文来自张兵科学网博客。
链接地址：https://blog.sciencenet.cn/blog-331295-1329835.html

上一篇：windows R 句法分析器里不能有多字节字符 bat 不能运行
下一篇：windows 定时任务 OnTime HOU 任务计划

收藏 IP: 202.113.99.*| 热度|

当前推荐数：0

该博文允许注册用户评论请点击登录评论 (0 个评论)

数据加载中...

返回顶部

张兵

扫一扫，分享此博文

tony8310的个人博客分享 http://blog.sciencenet.cn/u/tony8310

博文

用R获取中国天气网实时天气信息

当前推荐数：0

该博文允许注册用户评论请点击登录评论 (0 个评论)

张兵

全部作者的精选博文

全部作者的其他最新博文

全部精选博文导读

tony8310的个人博客分享 http://blog.sciencenet.cn/u/tony8310

博文

用R获取中国天气网实时天气信息

当前推荐数：0

该博文允许注册用户评论 请点击登录 评论 (0 个评论)

张兵

全部作者的精选博文

全部作者的其他最新博文

全部精选博文导读

该博文允许注册用户评论请点击登录评论 (0 个评论)