qiangh的个人博客分享 http://blog.sciencenet.cn/u/qiangh

博文

用SAS整理SNP calling数据

已有 4371 次阅读 2013-8-26 15:42 |系统分类:科研笔记| calling

随着电脑配置的提高,用单机版SAS运行重测序SNPcalling结果看来完全可以。以下代码是用来将SNPcalling 数据转化为Hapmap格式的代码,以便进一步的数据分析,谢谢青云兄百忙之中帮我写下这段代码。

filename fileref pipe "dir/b/s  F:Resqdatachr1*.xlsx";

data filename;

infile fileref truncover;

input filename $char100.;

n=scan(substr(compress(filename),length('F:\Resqdata\chr1\')+1),1,'_');

m=put(input(n,8.),z8.);

run;

proc sort data=filename;by m;run;

%macro import(filename,n);

proc import datafile="&filename" out=Acc&n dbms=excel replace;run;

data Acc&n;

length rs $20 Acc&n $8;

set Acc&n;

alt_base=substr(compress(alt_base),1,1);

ref_base=substr(compress(ref_base),1,1);

rs=compress(chr_name||chr_start);

if hom_het='het' then delete;

if ref_base='-' then ref_base='I';

Acc&n=ifc(alt_base='-','I',alt_base);

keep rs chr_start  ref_base Acc&n;

rename  chr_start=position  ref_base=refAcc&n ;

run;

%mend;

data _null_;

set filename end=last;

call execute('%import('||filename||','||n||')');

if last then call symput('n',n);

run;

%macro out(n);

data out;

retain rs allele chrom position strand assembly center protlsid assaylsid panel qccode Acc1-Acc&n;

array Acc{&n} $ Acc1-Acc&n;

array refAcc{&n} $ refAcc1-refAcc&n;

merge Acc1-Acc&n;

by position;

do i=1 to &n;

 if Acc{i}^='' then do;

 allele=compress(refAcc{i}||'/'||Acc{i});

 leave;

 end;

end;

do i=1 to &n;

 if Acc{i}^='' then Acc{i}=compress(Acc{i}||Acc{i});

 else do;

    do j=1 to &n;

      if refAcc{j}^='' then do;

   Acc{i}=compress(refAcc{j}||refAcc{j});

leave;

  end;

end;

 end;

end;

chrom=1;

strand='NA';assembly='NA';center='NA';protlsid='NA';assaylsid='NA';panel='NA';qccode='NA';

drop refAcc1-refAcc&n i j;

run;

%mend;

%out(&n);




http://blog.sciencenet.cn/blog-724080-719861.html


下一篇:Linux下解压缩文件

0

该博文允许注册用户评论 请点击登录 评论 (1 个评论)

数据加载中...

Archiver|手机版|科学网 ( 京ICP备07017567号-12 )

GMT+8, 2020-8-11 10:05

Powered by ScienceNet.cn

Copyright © 2007- 中国科学报社

返回顶部