|||
背景:中药有效果是有目共睹的,中药中的化合物是繁多的,已经提示了我们可以多靶标治疗。
从中药中寻找活性化合物,是新药发现的一种手段。
tcmsp网站,http://tcmspnw.com/login_clearSession
作为一个开源网站,可以提供化合物的单个下载。
据说今后也会提供下载包,批量下载的。
现在我和以及以前的师姐等不及了,所以就自己写了脚本从网站上不这些分子下载下来。
方法:downtcm.pl
需要用到files.txt
++++++++++++++++++++++++++++++++
#!/usr/bin/perl -w
use strict;
use LWP;
use HTTP::Request::Common qw ( POST GET);
#FUNCTION: to download all the molecue in the tcmsp website
#author: chenzhaoqiang 744891290@qq.com
#need input file:files.txt
#version1
my $browser=LWP::UserAgent->new(cookie_jar=>{});
$browser->requests_redirectable;
$browser->agent("MyApp/0.1");
my $username='tcmspuser';
my $password='tcmspuser';
my $req=POST('http://tcmspnw.com/login_login',['user.userComments'=>$username,'user.userPassword'=>$password]);
my $res=$browser->request($req);
$req=GET('http://tcmspnw.com/download?moleculeDownload=zhx_yzhh_Molecule219');
$res=$browser->request($req);
if ($res->is_success) {
print $res->content;
}
else {
print $res->status_line, "\n";
}
#test success
#TO write a loop,to batch download chemical molecule
my $baseurl='http://tcmspnw.com/download?moleculeDownload=';
my @ids=(0..999);
my $filebasename;
my $filename;
open FH,"f:/chenzhaoqiang/files.txt";
while(<FH>)
{
chomp;
$filebasename=$_;
push @ids,"";
foreach my $id(@ids)
{
#print "$id\n";
$filename=$filebasename.$id.'.mol2';
my $totalurl=$baseurl.$filebasename.'_Molecule'.$id;
print $totalurl,"\n";
$req=GET($totalurl);
$res=$browser->request($req);
if ($res->content!~/Sorry\,File not found /)
{
open FF,">F:/downtcm/$filename";
print FF $res->content;
}
else
{
print $res->status_line, "\n";
}
}
}
=pod
#http://tcmspnw.com/login_login
#open FH,"F:/files.txt";
my $baseurl='http://tcmspnw.com/download?moleculeDownload=';
my $fileclass='zhx_yzhh_Molecule';
my $fileid='219';
my @ids=(1..999);
foreach my $id(@ids)
{
#print "$id\n";
my $totalurl=$baseurl.$fileclass.$id;
print $totalurl,"\n";
# my $content=getstore($totalurl,$id);
# print $content,"n";
}
=cut
Archiver|手机版|科学网 ( 京ICP备07017567号-12 )
GMT+8, 2024-12-22 15:55
Powered by ScienceNet.cn
Copyright © 2007- 中国科学报社