#!/usr/bin/perl eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if $running_under_some_shell; # this emulates #! processing on NIH machines. # (remove #! line above if indigestible) eval '$'.$1.'$2;' while $ARGV[0] =~ /^([A-Za-z_0-9]+=)(.*)/ && shift; # process any FOO=bar switches # # # # ###################################################################### # # # pdb2cif.pl # # produced from pdb2cif.m4 # version 2.3.7 07 Mar 1999 # # a m4 macro program which produces pdb2cif.pl, pdb2cif.awk, pdb2cif.oawk # # Scripts to filter a PDB entry and produce a CIF file. # # Phil Bourne (bourne@sdsc.edu) # # adapted to 6 Oct 95 cifdic.m95 0.7.28 # and later to 1 Jan 97 cif_mm.dic 0.9.01 # # by # Herbert J. Bernstein # Bernstein+Sons, P.O. Box 177, Bellport, NY 11713 # phone: 1-516-286-1339, email: yaya@bernstein-plus-sons.com # and # Frances C. Bernstein # Bernstein+Sons, P.O. Box 177, Bellport, NY 11713 # phone: 1-516-286-1339, email: fcb@bernstein-plus-sons.com # # This work was supported in part by IUCr (for HJB), US NSF, PHS, NIH, # NCRR, NIGMS, NLM and DOE (for FCB prior to 1998), US NSFgrant no. # See H. Bernstein, F. Bernstein, P. E. Bourne "CIF Applications. VIII. # pdb2cif: Translating PDB Entries into mmCIF Format", J. Appl. Cryst., # 31, pp. 282-295, 1998. # #************************************************************************** # THE CONVERSION FROM PDB FORMAT TO CIF FORMAT IS COMPLEX # ******* USE WITH CAUTION ******* # COMMENTS AND SUGGESTIONS APPRECIATED # If you like the basic approach, thank Phil Bourne. He did # the real work of creating pdb2cif. If you have problems with # the adaptation to cif_mm.dic, tell yaya@bernstein-plus-sons.com # # ########################################################################### #************************************************************************** # # # This version available via http from: # # http://www.bernstein-plus-sons.com/software/pdb2cif # http://www.iucr.org/iucr-top/cif/software/pdb2cif # and the mirror sites of the IUCr # http://www.sdsc.edu/pb/pdb2cif/pdb2cif # http://ndbserver.rutgers.edu/NDB/mmcif/software/pdb2cif # and the mirror sites of the NDB # # See the file README for instructions on use and installation # ########################################################################### # # # # # # # # # # # # # # # # # # # # # ############################################################################## # # Version History: # See the file CHANGES # ####################################################################### $[ = 1; # set array base to 1 $comma = ','; $lcaz = 'abcdefghijklmnopqrstuvwxyz'; $UCAZ = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; $version = '2.3.7'; $version_date = '07 Mar 1999'; printf (("\n")); printf (("###################################################\n")); printf (("# #\n")); printf (("# Converted from PDB format to CIF format by #\n")); printf "# pdb2cif version %-15s %11s #\n", $version, $version_date; printf (("# by #\n")); printf (("# P.E. Bourne, H.J. Bernstein and F.C. Bernstein #\n")); printf (("# #\n")); printf (("# http://www.bernstein-plus-sons.com/software #\n")); printf (("# /pdb2cif #\n")); printf (("# *** See the remarks at the end of this *** #\n")); printf (("# *** file for information on conversion *** #\n")); printf (("# *** of this entry and on the program *** #\n")); printf (("# *** pdb2cif *** #\n")); $my_at = '@'; printf (("# Please report problems to: #\n")); printf ((('# pdb2cif' . $my_at . "bernstein-plus-sons.com #\n"))); printf (("###################################################\n\n\n")); # # Set starting variables # # The following flag is used to produce a more complete CIF entry, # i.e. data items are given, but with the value "?". # If you desire only the minimum set of data items comment out the # following one line: # verbose = "yes" # # The following flag controls conversion of text fields using # the type-setting codes used in some PDB entries # convtext = "yes" # # The following flag controls conversion of author and editor # names, "yes" to always convert according to the 1992 format # description, "conditional" to be controlled by convtext $auth_convtext = 'yes'; # # uncomment the next line if convtext control of typesetting desired # auth_convtext = "conditional" # # The following flag controls the distribution of entity_seq_num # to all atom site lines, uncomment if you do _not_ want # this distribution done, but want denser atom lists # dense_list = "yes" # # # The following flag control the handling of Junior, Senior, etc # As of this writing, the mmCIF convention is to keep dynastic # indicators with the last name # $junior_on_last = 'yes'; # # The following flag control the printing of TER records # The possible values are "yes" to print them, "no" to # suppress them, or "comment" to print them as comments # $print_ter = 'comment'; # # each of these variables may be reset from within the text # with a line of the form # # #define variable value # # e.g. #define verbose yes # or #define convtext no # # In addition, to allow control of translation of author names # the variable "name" may be defined multiple times # with two values, in the form # # #define name PDB_form name_value # # where the PDB_form is the form of the name expected in the PDB # and name_value is the form to be used by this program. All blanks # in either form must be replaced by "_". For example, you can # give the following # # #define name E.F.MEYER_JUNIOR Meyer Junior,_E.F. # # If the same name is defined multiple times, only the last # translation given will be used. The PDB_form is not case-sensitive, # but the name_value is. # # Normally, the compliance_level for a PDB dataset should be # obtained from REMARK 4. However, to facilitate processing # of pseudo-pdb datasets from non-PDB sources, the compliance # level may be set by # # #define compliance_level level # # where 2.0 is the only meaningful case of level at this time # # If REAMRK 4 contains a compliance level, that will apply in # parsing from that point onwards $aniso_flag = 0; $atom_alt_flag = 1; $atom_flag = 1; $atom_flag_1 = 1; $atom_flag_2 = 1; $atom_res_flag = 0; $audit_flag = 0; $author_flag = 1; $bcid = '*';# character to use for blank chain id $cispep_flag = 0; $cit_flag = 0;# 1 is the primary citation $compnd_flag = 1; $compliance_level = 0.0; $connect_flag = 0; $conect_flag = 1; $conect_flag_2 = 0; $conect_id = 1; $dbref_flag = 0;# count of dbref records $end_flag = 0; $entity_flag = 0; $entity_mon_flag = 1; $ent_non_poly_point{' '} = ''; $ent_non_poly_num{' '} = 0; $ent_poly_point{' '} = ''; $ent_poly_num{' '} = 0; $entity_seq_num_flag = 0; $flush_ref = 0; for ($X = 1; $X <= 999; ++$X) { $ftnote_flag{$X} = 0; } $ftnote_flag_old = 1; $foot_flag = 0; $formul_flag = 1; $head_PDB_code = '.'; $helix_flag = 1; $het_flag = 1; $hetnam_flag = 1; $hetsyn_flag = 1; $hydbnd_flag = 0; $jrnl_flag = 1; $keywrd_flag = 0; $link_flag = 0; $mon_flag = 1; $model_flag = '.'; $model_flags = 'no'; $modres_flag = 0; $mtrix_flag = 0; $nmr_flag = 0; $nonp_flag = 1; $num_non_poly_ents = 0; $num_poly_ents = 0; $num_res_name = 0; $num_res_pair = 0; $origx_flag = 0; $record_number = ' '; $remark_flag = 0; $remark_header_flag = 0; $revdat_flag = 1; $res_flag = 0; $res_res_flag = 0; $scale_flag = 0; $seqadv_flag = 0;# count of seqadv records $seqres_flag = 1; $sheet_flag = 0; $sheet_flag_2 = 1; $sigatm_flag = 0; $siguij_flag = 0; $site_flag = 1; $sltbrg_flag = 0; $s_o_flag = 0; $ss_flag = 1;# tracks HELIX, TURN and SHEET $ss_flag_2 = 1; $ssbond_flag = 1; $ter_flag = 0; $turn_flag = 1; $turn_flag_2 = 1; $tvect_flag = 0; $vol_flag = 0; $warning_flag = 0;# count of warnings in warning list $xlat_flag = 0; $xlat_save = 0; $flag = 0;# flags to correctly set ; $previous_keyword = ' ';# prior to new keyword $remark_number = 0; $remark_number_old = 0; $all_remarks = 0; # # set up connect types # { $num_ctypes = (@connect_types = split(' ', '. . . . hydrog hydrog saltbr hydrog hydrog saltbr', 9999)); if ($num_ctypes >= 1 && $connect_types[$num_ctypes] eq '' && ' ' eq ' ') { --$num_ctypes; } } # # set up conversion strings for residues # $numl = '0123456789'; $charl = $lcaz; $charu = $UCAZ; $chars = "+_*/!#\$,.;:?|{}()"; $charx = ($charl . $charu . $numl . $chars); # Define date format conversion arrays # # mmm2mm[month_name] = month_ordinal # yyyy[2_digit_year] = 4_digit_year # $mmm2mm{'JAN'} = '01'; $mmm2mm{'FEB'} = '02'; $mmm2mm{'MAR'} = '03'; $mmm2mm{'APR'} = '04'; $mmm2mm{'MAY'} = '05'; $mmm2mm{'JUN'} = '06'; $mmm2mm{'JUL'} = '07'; $mmm2mm{'AUG'} = '08'; $mmm2mm{'SEP'} = '09'; $mmm2mm{'OCT'} = '10'; $mmm2mm{'NOV'} = '11'; $mmm2mm{'DEC'} = '12'; $mmm2mm{'Jan'} = '01'; $mmm2mm{'Feb'} = '02'; $mmm2mm{'Mar'} = '03'; $mmm2mm{'Apr'} = '04'; $mmm2mm{'May'} = '05'; $mmm2mm{'Jun'} = '06'; $mmm2mm{'Jul'} = '07'; $mmm2mm{'Aug'} = '08'; $mmm2mm{'Sep'} = '09'; $mmm2mm{'Oct'} = '10'; $mmm2mm{'Nov'} = '11'; $mmm2mm{'Dec'} = '12'; # for ($yy = 0; $yy < 100; ++$yy) { $yyyy{$yy + 0} = $yy + 1900; if ($yy < 70) { $yyyy{$yy + 0} += 1000; } } # # Define lists of amino acids and nucleic acids { $num_aa = (@aa_list = split(' ', ('ABU ACD ALA ALB ALI ARG ARO ASN ASP ASX' . ' BAS CYS GLN GLU GLX GLY HIS HYP ILE LEU' . ' LYS MET PCA PHE PRO SER THR TRP TYR VAL'), 9999)); if ($num_aa >= 1 && $aa_list[$num_aa] eq '' && ' ' eq ' ') { --$num_aa; } } { $num_na = (@na_list = split(' ', ('A +A C +C G +G I +I T +T U +U'), 9999)); if ($num_na >= 1 && $na_list[$num_na] eq '' && ' ' eq ' ') { --$num_na; } } # # Formulae and Muloecular Weights for Standard Residues from # the 1992 PDB format Description # AARESES has the Amino Acids and Miscellaneous Residues # NARESES has the Nucleotides # # Name; Code;Formula;Mol. Wt. # { $num_AARESES = (@AARESES_list = split(/\|/, ('Alanine;ALA;C3 H7 N1 O2;89.09|' . 'Arginine;ARG;C6 H14 N4 O2;174.20|' . 'Asparagine;ASN;C4 H8 N2 O3;132.12|' . 'Aspartic acid;ASP;C4 H7 N1 O4;133.10|' . 'ASP/ASN ambiguous;ASX;C4 H7.5 N1.5;132.61|' . 'Cysteine;CYS;C3 H7 N1 O2 S1;121.15|' . 'Glutamine;GLN;C5 H10 N2 O3;146.15|' . 'Glutamic acid;GLU;C5 H9 N1 O4;147.13|' . 'GLU/GLN ambiguous;GLX;C5 H9.5 N1.5 O3.5;146.64|' . 'Glycine;GLY;C2 H5 N1 O2;75.07|' . 'Histidine;HIS;C6 H9 N3 O2;155.16|' . 'Isoleucine;ILE;C6 H13 N1 O2;131.17|' . 'Leucine;LEU;C6 H13 N1 O2;131.17|' . 'Lysine;LYS;C6 H14 N2 O2;146.19|' . 'Methionine;MET;C5 H11 N1 O2 S1;149.21|' . 'Phenylalanine;PHE;C9 H11 N1 O2;165.19|' . 'Proline;PRO;C5 H9 N1 O2;115.13|' . 'Serine;SER;C3 H7 N1 O3;105.09|' . 'Threonine;THR;C4 H9 N1 O3;119.12|' . 'Tryptophan;TRP;C11 H12 N2 O2;204.23|' . 'Tyrosine;TYR;C9 H11 N1 O3;181.19|' . 'Valine;VAL;C5 H11 N1 O2;117.15|' . 'Undetermined;UNK;C5 H6 N1 O3;128.16|' . 'Acetic Acid;ACE;C2 H4 O2;60.05|' . 'Formic Acid;FOR;C1 H2 O2;40.03|' . 'Water;HOH;H2 O1;18.015'), 9999)); if ($num_AARESES >= 1 && $AARESES_list[$num_AARESES] eq '' && '|' eq ' ') { --$num_AARESES; } } for ($naa = 1; $naa <= $num_AARESES; ++$naa) { { $nxx = (@naa_split = split(/;/, $AARESES_list[$naa], 9999)); if ($nxx >= 1 && $naa_split[$nxx] eq '' && ';' eq ' ') { --$nxx; } } $res_id{$naa} = $naa_split[2]; $res_count{$naa_split[2]} = 0; $res_name{$naa_split[2]} = $naa_split[1]; $res_formul{$naa_split[2]} = $naa_split[3]; } { $num_NARESES = (@NARESES_list = split(/\|/, ('Adenosine; A;C10 H14 N5 O7 P1;347.22|' . 'Modified Adenosine; +A;.;347.22|' . '1-Methyladenosine;1MA;C11 H16 N5 O7 P1;361.25|' . 'Cytidine; C;C9 H14 N3 O8 P1;323.20|' . 'Modified Cytidine; +C;.;323.20|' . '5-Methylcytidine;5MC;C10 H16 N3 O8 P1;337.23|' . "2'-O-Methylcytidine;OMC;C10 H17 N3 O8 P1;338.23|" . 'Guanosine; G;C10 H14 N5 O8 P1;363.22|' . 'Modified Guanosine; +G;.;363.22|' . '1-Methylguanosine;1MG;C11 H16 N5 O8 P1;377.25|' . 'N2-Methylguanosine;2MG;C11 H16 N5 O8 P1;377.25|' . 'N2-Dimethylguanosine;M2G;C12 H18 N5 O8 P1;391.28|' . '7-Methylguanosine;7MG;C11 H10 N5 O8 P1;377.25|' . "2'-O-Methylguanosine;OMG;C11 H16 N5 O8 P1;377.25|" . 'Wybutosine; YG;C21 H26 N6 O11 P1;587.48|' . 'Inosine; I;C10 H13 N4 O8 P1;348.21|' . 'Modified Inosine; +I;.;348.21|' . 'Thymidine; T;C10 H15 N2 O8 P1;322.21|' . 'Modified Thymidine; +T;.;322.21|' . 'Uridine; U;C9 H13 N2 O9 P1;324.18|' . 'Modified Uridine; +U;.;324.18|' . 'Dihydrouridine;H2U;C9 H15 N2 O9 P1;326.20|' . 'Ribosylthymidine;5MU;C10 H16 N2 O10 P1;355.22|' . 'Pseudouridine;PSU;C9 H13 N2 O9 P1;324.18|'), 9999)); if ($num_NARESES >= 1 && $NARESES_list[$num_NARESES] eq '' && '|' eq ' ') { --$num_NARESES; } } for ($nna = 1; $nna <= $num_NARESES; ++$nna) { { $nxx = (@nna_split = split(/;/, $NARESES_list[$nna], 9999)); if ($nxx >= 1 && $nna_split[$nxx] eq '' && ';' eq ' ') { --$nxx; } } $res_id{$num_AARESES + $nna} = $nna_split[2]; $res_count{$nna_split[2]} = 0; $res_name{$nna_split[2]} = $nna_split[1]; $res_formul{$nna_split[2]} = $nna_split[3]; } $num_OTRESES = 0; # # Element Lists to check atom types # { $num_per_tab = (@periodic_table = split(' ', (' . D ' . 'H HE ' . 'LI BE B C N O F NE ' . 'NA MG AL SI P S CL AR ' . 'K CA SC TI V CR MN FE CO NI CU ZN GA GE AS SE BR KR ' . 'RB SR Y ZR NB MO TC RU RH PD AG CD IN SN SB TE I XE ' . 'CS BA ' . ' LA CE PR ND PM SM EU GD TB DY HO ER TM YB LU ' . ' HF TA W RE OS IR PT AU HG TL PB BI PO AT RN ' . 'FR RA ' . ' AC TH PA U NP PU AM CM BK CF ES FM MD NO LR ' . ' KU HA SG NS HS' . ' DB JL RF BH HN MT'), 9999)); if ($num_per_tab >= 1 && $periodic_table[$num_per_tab] eq '' && ' ' eq ' ') { --$num_per_tab; } } { $num_aa_na_el = (@standard_res_elements = split(' ', ('. C H D N O P S'), 9999)); if ($num_aa_na_el >= 1 && $standard_res_elements[$num_aa_na_el] eq '' && ' ' eq ' ') { --$num_aa_na_el; } } { $num_one_let_el = (@one_letter_elements = split(' ', ('. B C H D N O F P S K V Y I W'), 9999)); if ($num_one_let_el >= 1 && $one_letter_elements[$num_one_let_el] eq '' && ' ' eq ' ') { --$num_one_let_el; } } # # Define special name suffixes to move away from family names # { $num_suffix = (@suffix_list = split(' ', ('JUNIOR SENIOR JR SR JR. SR.' . ' I II III IV V VI VII VIII IX X XI XII'), 9999)); if ($num_suffix >= 1 && $suffix_list[$num_suffix] eq '' && ' ' eq ' ') { --$num_suffix; } } { $xnum_suffix = (@rep_suffix_list = split(' ', ('Junior Senior Junior Senior Junior Senior' . ' I II III IV V VI VII VIII IX X XI XII'), 9999)); if ($xnum_suffix >= 1 && $rep_suffix_list[$xnum_suffix] eq '' && ' ' eq ' ') { --$xnum_suffix; } } for ($i = 1; $i <= $xnum_suffix; ++$i) { $rep_suffix{$suffix_list[$i]} = $rep_suffix_list[$i]; } # # Setup charge conversions # # No Charge $charge{'0 '} = ' '; $charge{'00'} = ' '; $charge{' '} = ' '; # 1+ $charge{'1+'} = '1+'; $charge{'+1'} = '1+'; $charge{'I '} = '1+'; $charge{'i '} = '1+'; $charge{'1 '} = '1+'; $charge{'+ '} = '1+'; # 1- $charge{'1-'} = '1-'; $charge{'-1'} = '1-'; $charge{'- '} = '1-'; # 2+ $charge{'2+'} = '2+'; $charge{'+2'} = '2+'; $charge{'II'} = '2+'; $charge{'ii'} = '2+'; $charge{'2 '} = '2+'; $charge{'++'} = '2+'; # 2- $charge{'2-'} = '2-'; $charge{'-2'} = '2-'; $charge{'--'} = '2-'; # 3+ $charge{'3+'} = '3+'; $charge{'+3'} = '3+'; $charge{'3 '} = '3+'; # 3- $charge{'3-'} = '3-'; $charge{'-3'} = '3-'; # 4+ $charge{'4+'} = '4+'; $charge{'+4'} = '4+'; $charge{'4 '} = '4+'; # 4- $charge{'4-'} = '4-'; $charge{'-4'} = '4-'; # 5+ $charge{'5+'} = '5+'; $charge{'+5'} = '5+'; $charge{'5 '} = '5+'; # 5- $charge{'5-'} = '5-'; $charge{'-5'} = '5-'; $charge{'5 '} = '5-'; # 6+ $charge{'6+'} = '6+'; $charge{'+6'} = '6+'; $charge{'6 '} = '6+'; # 6- $charge{'6-'} = '6-'; $charge{'-6'} = '6-'; # 7+ $charge{'7+'} = '7+'; $charge{'+7'} = '7+'; $charge{'7 '} = '7+'; # 7- $charge{'7-'} = '7-'; $charge{'-7'} = '7-'; # 8+ $charge{'8+'} = '8+'; $charge{'+8'} = '8+'; $charge{'8 '} = '8+'; # 8- $charge{'8-'} = '8-'; $charge{'-8'} = '8-'; # 9+ $charge{'9+'} = '9+'; $charge{'+9'} = '9+'; $charge{'9 '} = '9+'; # 9- $charge{'9-'} = '9-'; $charge{'-9'} = '9-'; # End of BEGIN statement # # # Flag all lines as untranslated unless proven otherwise. # # Process #define (or #def) # # Determine whether this is a new keyword, if so and flag is set # terminate free text with a ; Also discard noise lines less than # 6 characters long, and pad other lines to 80 characters with blanks # # Ensure that the record name used is separated from following info # while (<>) { chop; # strip record separator @Fld = split(' ', $_, 9999); if ($end_flag == 0) { $xlat_save = $xlat_flag; $non_xlated{++$xlat_flag} = $_; $first_field = $Fld[1]; if ($#Fld > 1 && ($Fld[1] eq '#def' || $Fld[1] eq '#define')) { { $lx_tl = length($Fld[2]); $tx_tl = $Fld[2]; $var_name = ''; for ($ix_tl = 1; $ix_tl <= $lx_tl; ++$ix_tl) { $cx_tl = substr($tx_tl, $ix_tl, 1); $cx_tl = substr(($lcaz . $cx_tl), index(($UCAZ . $cx_tl), $cx_tl), 1); $var_name = ($var_name . $cx_tl); } } $var_value = ''; if ($#Fld > 2) { $lx_tl = length($Fld[3]); $tx_tl = $Fld[3]; $var_value = ''; for ($ix_tl = 1; $ix_tl <= $lx_tl; ++$ix_tl) { $cx_tl = substr($tx_tl, $ix_tl, 1); $cx_tl = substr(($lcaz . $cx_tl), index(($UCAZ . $cx_tl), $cx_tl), 1); $var_value = ($var_value . $cx_tl); } } if ($var_name eq 'verbose' && ($var_value eq 'yes' || $var_value eq 'no')) { $verbose = $var_value; $xlat_flag = $xlat_save; } else { if ($var_name eq 'convtext' && ($var_value eq 'yes' || $var_value eq 'no')) { $convtext = $var_value; $xlat_flag = $xlat_save; } else { if ($var_name eq 'auth_convtext' && ($var_value eq 'yes' || $var_value eq 'no' || $var_value eq 'conditional')) { $auth_convtext = $var_value; $xlat_flag = $xlat_save; } else { if ($var_name eq 'dense_list' && ($var_value eq 'yes' || $var_value eq 'no')) { $dense_list = $var_value; $xlat_flag = $xlat_save; } else { if ($var_name eq 'junior_on_last' && ($var_value eq 'yes' || $var_value eq 'no')) { $junior_on_last = $var_value; $xlat_flag = $xlat_save; } else { if ($var_name eq 'print_ter' && ($var_value eq 'yes' || $var_value eq 'no' || $var_value eq 'comment')) { $print_ter = $var_value; $xlat_flag = $xlat_save; } else { if ($var_name eq 'compliance_level') { $compliance_level = $var_value; $xlat_flag = $xlat_save; } } } } } } } if ($#Fld > 3 && $var_name eq 'name') { $ll = length($Fld[3]); $PDB_form = ''; for ($i = 1; $i <= $ll; ++$i) { $cc = substr($Fld[3], $i, 1); if ($cc eq '_') { #??? $cc = ' '; } $PDB_form = ($PDB_form . $cc); } $ll = length($Fld[4]); $name_value = ''; for ($i = 1; $i <= $ll; ++$i) { $cc = substr($Fld[4], $i, 1); if ($cc eq '_') { #??? $cc = ' '; } $name_value = ($name_value . $cc); } { # # apply PDB typsetting codes if any to a line # { $lx_tl = length($PDB_form); $tx_tl = $PDB_form; $lostr = ''; for ($ix_tl = 1; $ix_tl <= $lx_tl; ++$ix_tl) { $cx_tl = substr($tx_tl, $ix_tl, 1); $cx_tl = substr(($lcaz . $cx_tl), index(($UCAZ . $cx_tl), $cx_tl), 1); $lostr = ($lostr . $cx_tl); } } $lstr = length($lostr); $mystr = ''; $pchar = ' '; for ($qtsi = 1; $qtsi <= $lstr; ++$qtsi) { $mychar = substr($lostr, $qtsi, 1); if ($pchar eq ' ' || $pchar eq ',' || $pchar eq '.' || $pchar eq '(' || $pchar eq '*' || $pchar eq '/') { $mychar = substr(($UCAZ . $mychar), index(($lcaz . $mychar), $mychar), 1); } if (($mychar ne '*' && $mychar ne "\$" && $mychar ne '/') || ($mychar eq $pchar)) { #??? $mystr = ($mystr . $mychar); } if ($pchar eq '/') { if ($mychar eq "\$" || $mychar eq '-') { $pchar = $mychar; } } else { $pchar = $mychar; } } $ret_val = $mystr; $PDB_form = $ret_val; } { $lx_tu = length($PDB_form); $tx_tu = $PDB_form; $PDB_form = ''; for ($ix_tu = 1; $ix_tu <= $lx_tu; ++$ix_tu) { $cx_tu = substr($tx_tu, $ix_tu, 1); $cx_tu = substr(($UCAZ . $cx_tu), index(($lcaz . $cx_tu), $cx_tu), 1); $PDB_form = ($PDB_form . $cx_tu); } } $rep_name{$PDB_form} = $name_value; $xlat_flag = $xlat_save; } } if (length($_) > 5) { if (length($_) < 80) { $_ = (($_) . substr((' ' . ' '), 1, 80 - length($_))); } if (length($first_field) > 6) { $first_field = substr($Fld[1], 1, 6); } if ($first_field ne $previous_keyword && $flag ne '0') { #??? #??? printf (("; \n\n")); $flag = '0'; $previous_keyword = $first_field; } else { $previous_keyword = $first_field; } } } else { $_ = ''; $first_field = ''; if ($previous_keyword ne '' && $flag ne '0') { #??? printf (("; \n\n")); $flag = '0'; } $previous_keyword = ''; } # # Print out any accumulated COMPND, SOURCE, TITLE or CAVEAT information if ($compnd_flag ne '1' && $first_field ne 'COMPND' && #??? $first_field ne 'TITLE' && $first_field ne 'CAVEAT' && $first_field ne 'SOURCE') { printf (("\n\n")); printf (("##################\n")); printf (("# #\n")); printf (("# STRUCT #\n")); printf (("# #\n")); printf (("##################\n\n")); printf (("loop_\n_struct.entry_id\n_struct.title\n")); printf " %s\n", $head_PDB_code; printf "; %s\n", $compnd{1}; for ($i = 2; $i < $compnd_flag; ++$i) { printf " %s\n", $compnd{$i}; } printf (("; \n")); $compnd_flag = 1; } #========================================================================= # Keyword ATOM or HETATM or TER # # atom pdb type [ 1- 6] = _atom_site.group_PDB # atom serial number [ 7-11] = _atom_site.id # atom type [13-14] = _atom_site.type_symbol # (first 2 characters of atom name) # atom name [13-16] = _atom_site.label_atom_id # alternate location [17] = _atom_site.label_alt_id # residue name [18-20] = _atom_site.label_comp_id # chain identifier [22] = _atom_site.label_asym_id # residue seq no. [23-26] = _atom_site.auth_seq_id # insertion code [27] = appended to residue sequence no. # x-coordinate [31-38] = _atom_site.cartn_x # y-coordinate [39-46] = _atom_site.cartn_y # z-coordinate [47-54] = _atom_site.cartn_z # occupancy [55-60] = _atom_site.occupancy # temperature factor [61-66] = _atom_site.B_iso_or_equiv # footnote number [68-70] = _atom_site.footnote_id # (February 1992 PDB format) # segment identifier [73-76] = _atom_site.auth_asym_id # (February 1996 PDB format) # element symbol [77-78] = _atom_site.type_symbol # (February 1996 PDB format) # charge on atom [79-80] = append to _atom_site.type_symbol # (February 1996 PDB format) # # Information on non_standard monomers and non-polymers derived from # HET and FORMUL records is presented here using additional information # derived from ATOM and HETATM records. # The assignment of non-standard monomers versus non-polymers # is tricky and unlikely to be correct for all entries. Assignment is # based on the following rules: # i) If the HET has a chain id then it must be non-standard (this # is not complete since single chains do not have an chain id # assigned. # ii)If FORMUL places assigns a HET to a component number among # the SEQRES components, the HET must be non-standard # # if ($first_field eq 'ATOM' || $first_field eq 'HETATM' || $first_field eq 'TER') { $xlat_flag = $xlat_save; # parse field and save ATOM/HETATM/TER info # Since atoms are not necessarily numbered consecutively maintain # a complete conesecutive list 1 -> atom_flag and a partial # list for use by CONECT which references the atom_number # $atom_pdb = substr(($_), 7, 5); $atom_number{$atom_flag} = substr(($_), 7, 5); $atom_point{$atom_number{$atom_flag}} = $atom_flag; $atom_name{$atom_flag} = substr(($_), 13, 4); { # # # fix up atom_name by squeezing out blanks in the middle # $temp_a_name = $atom_name{$atom_flag}; if (substr($temp_a_name, 3, 1) eq ' ') { $temp_a_name = (substr($temp_a_name, 1, 2) . substr($temp_a_name, 4, 1) . ' '); } if (substr($temp_a_name, 2, 1) eq ' ') { $temp_a_name = (' ' . substr($temp_a_name, 1, 1) . substr($temp_a_name, 3, 2)); } if ($temp_a_name eq ' ') { $temp_a_name = ' . '; } $ret_val = $temp_a_name; $temp_name = $ret_val; } $atom_name{$atom_flag} = $temp_name; $residue_name{$atom_flag} = substr(($_), 18, 3); $temp_name = substr($temp_name, 1, 2); { $lx_tu = length($temp_name); $tx_tu = $temp_name; $temp_name = ''; for ($ix_tu = 1; $ix_tu <= $lx_tu; ++$ix_tu) { $cx_tu = substr($tx_tu, $ix_tu, 1); $cx_tu = substr(($UCAZ . $cx_tu), index(($lcaz . $cx_tu), $cx_tu), 1); $temp_name = ($temp_name . $cx_tu); } } if (index($UCAZ, substr($temp_name, 1, 1)) == 0) { $temp_name = (' ' . substr($temp_name, 2, 1)); } $xtemp_name = substr($temp_name, 1, 2); $ytemp_name = substr($temp_name, 2, 1); if (substr($xtemp_name, 1, 1) eq ' ') { $xtemp_name = $ytemp_name; } $found = 'false'; if ($first_field eq 'ATOM') { ++$atom_res_flag; if ($res_count{$residue_name{$atom_flag}} eq '') { $res_count{$residue_name{$atom_flag}} = 0; $res_formul{$residue_name{$atom_flag}} = '.'; $res_name{$residue_name{$atom_flag}} = '.'; ++$num_OTRESES; $res_id{$num_AARESES + $num_NARESES + $num_OTRESES} = $residue_name{$atom_flag}; $warning_list{++$warning_flag} = ('#=# ATOM_SITE: Residue name ' . $residue_name{$atom_flag} . " not in standard residue list \n"); } ++$res_count{$residue_name{$atom_flag}}; for ($ii = 1; $ii <= $num_aa_na_el && $found eq 'false'; ++$ii) { if ($xtemp_name eq $standard_res_elements[$ii]) { #??? $found = 'true'; } } } else { if (index($xhet_formula{$residue_name{$atom_flag}}, $xtemp_name) > 0) { $found = 'true'; } else { if (index($xhet_formula{$residue_name{$atom_flag}}, (' ' . $ytemp_name . ' ')) > 0) { $found = 'true'; if ($het_conv{($residue_name{$atom_flag} . '|' . $xtemp_name . '|' . $ytemp_name)} eq '') { $warning_list{++$warning_flag} = ('#=# ATOM_SITE: Het group ' . $residue_name{$atom_flag} . '; atom type ' . $xtemp_name . ' converted to ' . $ytemp_name . "\n"); $het_conv{($residue_name{$atom_flag} . '|' . $xtemp_name . '|' . $ytemp_name)} = 'done'; } $xtemp_name = $ytemp_name; $temp_name = $ytemp_name; } else { for ($ii = 1; $ii <= $num_per_tab && $found eq 'false'; ++$ii) { if ($xtemp_name eq $periodic_table[$ii]) { #??? $found = 'true'; } } } } } if ($found eq 'false') { $temp_name = ' .'; if ($first_field eq 'ATOM') { for ($ii = 1; $ii <= $num_aa_na_el && $found eq 'false'; ++$ii) { if ($ytemp_name eq $standard_res_elements[$ii]) { #??? $found = 'true'; } } } else { for ($ii = 1; $ii <= $num_one_let_el && $found eq 'false'; ++$ii) { if ($ytemp_name eq $one_letter_elements[$ii]) { #??? $found = 'true'; } } } if ($found eq 'true') { $temp_name = (' ' . $ytemp_name); } $warning_list{++$warning_flag} = ('#=# ATOM_SITE: Site ' . $atom_pdb . '; unexpected atom type ' . $xtemp_name . ' converted to ' . $temp_name . "\n"); } $atom_type{$atom_flag} = substr($temp_name, 1, 2); $atom_alt_location{$atom_flag} = substr(($_), 17, 1); $chain_id{$atom_flag} = substr(($_), 22, 1); $residue_seq_number{$atom_flag} = substr(($_), 23, 5); $atom_x{$atom_flag} = substr(($_), 31, 8); $atom_y{$atom_flag} = substr(($_), 39, 8); $atom_z{$atom_flag} = substr(($_), 47, 8); $atom_occ{$atom_flag} = substr(($_), 55, 6); $B_or_U{$atom_flag} = substr(($_), 61, 6); $footnote_number{$atom_flag} = substr(($_), 68, 3); if ($compliance_level >= 2.0) { $atom_seg_id{$atom_flag} = substr(($_), 73, 4); $atom_type{$atom_flag} = substr(($_), 77, 4); } { # # # fix up atom_type (atom symbol and charge) # $temp_a_type = ($atom_type{$atom_flag} . ' '); $orig_charge = substr($temp_a_type, 3, 2); if ($orig_charge ne ' ') { if (substr($temp_a_type, 3, 1) eq ' ') { $temp_a_type = (substr($temp_a_type, 1, 2) . substr($temp_a_type, 4, 1) . ' '); $orig_charge = substr($temp_a_type, 3, 2); } $temp_charge = $charge{$orig_charge}; if ($temp_charge ne '') { $temp_a_type = (substr($temp_a_type, 1, 2) . $temp_charge); } } if (substr($temp_a_type, 2, 1) eq ' ') { $temp_a_type = (' ' . substr($temp_a_type, 1, 1) . substr($temp_a_type, 3, 2)); } if ($temp_a_type eq ' ') { $temp_a_type = ' . '; } if (substr(($temp_a_type . ' '), 3, 2) eq ' ') { $temp_a_type = substr($temp_a_type, 1, 2); } $ret_val = $temp_a_type; $temp_type = $ret_val; } $atom_type{$atom_flag} = $temp_type; if ($atom_x{$atom_flag} eq ' ') { $atom_x{$atom_flag} = ' . '; } if ($atom_y{$atom_flag} eq ' ') { $atom_y{$atom_flag} = ' . '; } if ($atom_z{$atom_flag} eq ' ') { $atom_z{$atom_flag} = ' . '; } if ($atom_occ{$atom_flag} eq ' ') { $atom_occ{$atom_flag} = ' . '; } if ($B_or_U{$atom_flag} eq ' ') { $B_or_U{$atom_flag} = ' . '; } $atom_model{$atom_flag} = $model_flag; # # # flag atom as ATOM or HETATM or TER # if ($first_field eq 'ATOM') { $atom_or_het{$atom_flag} = 'ATOM'; } if ($first_field eq 'HETATM') { $atom_or_het{$atom_flag} = 'HETATM'; } if ($first_field eq 'TER') { $atom_or_het{$atom_flag} = 'TER'; # # set alternate location value if blank # ; } if ($atom_alt_location{$atom_flag} eq ' ') { $atom_alt_location{$atom_flag} = '.'; # # make a list of alternative atoms # ; } if ($atom_alt_location{$atom_flag} ne '.') { $at_alt = $atom_alt_location{$atom_flag}; $atom_alt_list{$at_alt}++; } # # set footnote value if blank # if ($footnote_number{$atom_flag} eq ' ') { $footnote_number{$atom_flag} = ' . '; # # set chain_id and entity_id to bcid for ATOM records if blank # ; } if (($first_field eq 'ATOM' || $first_field eq 'TER') && $chain_id{$atom_flag} eq ' ') { $chain_id{$atom_flag} = $bcid; $entity_id{$atom_flag} = $bcid; } # # set chain_id to . for HETATM records if blank # if ($first_field eq 'HETATM' && $chain_id{$atom_flag} eq ' ') { $chain_id{$atom_flag} = '.'; if ($num_poly_ents == 1 && $entities{1} eq $bcid) { #??? $chain_id{$atom_flag} = $bcid; } } # # set entity_id to chain_id for ATOM and TER records # if ($first_field eq 'ATOM' && $chain_id{$atom_flag} ne ' ') { $entity_id{$atom_flag} = $chain_id{$atom_flag}; } if ($first_field eq 'TER' && $chain_id{$atom_flag} ne ' ') { $entity_id{$atom_flag} = $chain_id{$atom_flag}; } # # set _entity.id to residue_name for HETATM records # if ($first_field eq 'HETATM') { $entity_id{$atom_flag} = $residue_name{$atom_flag}; $hetatm_entity = $residue_name{$atom_flag}; $ent_non_poly_id{$hetatm_entity}++; if ($ent_non_poly_id{$hetatm_entity} == 1) { $next_non_poly_id = $ent_non_poly_point{' '}; $prev_non_poly_id = ' '; while ($next_non_poly_id ne '') { $prev_non_poly_id = $next_non_poly_id; $next_non_poly_id = $ent_non_poly_point{$prev_non_poly_id}; } $ent_non_poly_point{$prev_non_poly_id} = $hetatm_entity; $ent_non_poly_point{$hetatm_entity} = ''; ++$num_non_poly_ents; $ent_non_poly_num{$hetatm_entity} = $num_non_poly_ents; } if ($entity_seq_num{$residue_name{$atom_flag}} ne '' && $entity_seq_num{$residue_name{$atom_flag}} + 0 <= $num_poly_ents) { $entity_id{$atom_flag} = $chain_id{$atom_flag}; } } # # define _entities for polypeptide chains or DNA strands # ie these are _entity_poly. Done by checking for chain in chain_id # in ATOM records if ($first_field eq 'ATOM') { $atom_entity = $chain_id{$atom_flag}; $ent_poly_id{$atom_entity}++; if ($ent_poly_id{$atom_entity} == 1) { $next_poly_id = $ent_poly_point{' '}; $prev_poly_id = ' '; while ($next_poly_id ne '') { $prev_poly_id = $next_poly_id; $next_poly_id = $ent_poly_point{$prev_poly_id}; } $ent_poly_point{$prev_poly_id} = $atom_entity; $ent_poly_point{$atom_entity} = ''; ++$num_poly_ents; $ent_poly_num{$atom_entity} = $num_poly_ents; $entity_seq_num{$atom_entity} = $num_poly_ents; $entities{$num_poly_ents} = $atom_entity; } } ++$atom_flag; } #===================================================================== # Keyword ANISOU # # # atom serial number = matched via pointers to ATOM/HETATM # atom type = dropped, taken from ATOM/HETATM # atom name = dropped, taken from ATOM/HETATM # alternate location = dropped, taken from ATOM/HETATM # residue name = dropped, taken from ATOM/HETATM # chain identifier = dropped, taken from ATOM/HETATM # residue sequence no. = dropped, taken from ATOM/HETATM # insertion code = dropped, taken from ATOM/HETATM # # # # Note the different order # PDB CIF # 1. U[1][1] U[1][1] # 2. U[2][2] U[1][2] # 3. U[3][3] U[1][3] # 4. U[1][2] U[2][2] # 5. U[1][3] U[2][3] # 6. U[2][3] U[3][3] # if ($first_field eq 'ANISOU') { $xlat_flag = $xlat_save; # parse field ++$aniso_flag; $a_atom_serial_number{$aniso_flag} = substr(($_), 7, 5); $aniso_point{$a_atom_serial_number{$aniso_flag}} = $aniso_flag; $atom_U11{$aniso_flag} = substr(($_), 29, 7); $atom_U22{$aniso_flag} = substr(($_), 36, 7); $atom_U33{$aniso_flag} = substr(($_), 43, 7); $atom_U12{$aniso_flag} = substr(($_), 50, 7); $atom_U13{$aniso_flag} = substr(($_), 57, 7); $atom_U23{$aniso_flag} = substr(($_), 64, 7); } #==================================================================== # Keyword AUTHOR # # Loop over authors as "_audit_author..." if ($first_field eq 'AUTHOR') { $xlat_flag = $xlat_save; # parse record creating an array of authors $text = substr(($_), 11, 60); $cont = substr(($_), 9, 2); { $num_auth = (@authors = split($comma, $text, 9999)); if ($num_auth >= 1 && $authors[$num_auth] eq '' && $comma eq ' ') { --$num_auth; } } for ($i = 1; $i <= $num_auth; ++$i) { { $num_a_split = (@a_split = split(' ', $authors[$i], 9999)); if ($num_a_split >= 1 && $a_split[$num_a_split] eq '' && ' ' eq ' ') { --$num_a_split; } } $authors[$i] = ''; if ($num_a_split > 0) { $authors[$i] = $a_split[1]; for ($j = 2; $j <= $num_a_split; ++$j) { $authors[$i] = ($authors[$i] . ' ' . $a_split[$j]); } } if ($auth_convtext eq 'yes' || ($auth_convtext eq 'conditional' && $convtext eq 'yes')) { { # # produce a CIF-style name from a PDB name # # begin by applying typesetting codes if any # but always treat "-" and "'" as breaks for capitalization # in names # { $lx_tl = length($authors[$i]); $tx_tl = $authors[$i]; $lostr = ''; for ($ix_tl = 1; $ix_tl <= $lx_tl; ++$ix_tl) { $cx_tl = substr($tx_tl, $ix_tl, 1); $cx_tl = substr(($lcaz . $cx_tl), index(($UCAZ . $cx_tl), $cx_tl), 1); $lostr = ($lostr . $cx_tl); } } $lstr = length($lostr); $mystr = ''; $pchar = ' '; for ($qnsi = 1; $qnsi <= $lstr; ++$qnsi) { $mychar = substr($lostr, $qnsi, 1); if ($pchar eq ' ' || $pchar eq ',' || $pchar eq '.' || $pchar eq '-' || $pchar eq "'" || $pchar eq '(' || $pchar eq '*' || $pchar eq '/') { $mychar = substr(($UCAZ . $mychar), index(($lcaz . $mychar), $mychar), 1); } if (($mychar ne '*' && $mychar ne "\$" && $mychar ne '/') || ($mychar eq $pchar)) { #??? $mystr = ($mystr . $mychar); } if ($pchar eq '/') { if ($mychar eq "\$" || $mychar eq '-') { $pchar = $mychar; } # end if( mychar == "$" || mychar == "-" ) ; } else { $pchar = $mychar; } # end if( pchar == "/" ) ; } # end for( qnsi=1; qnsi <= lstr; ++qnsi) # # See if a specific replacement was given # { $lx_tu = length($mystr); $tx_tu = $mystr; $name_temp = ''; for ($ix_tu = 1; $ix_tu <= $lx_tu; ++$ix_tu) { $cx_tu = substr($tx_tu, $ix_tu, 1); $cx_tu = substr(($UCAZ . $cx_tu), index(($lcaz . $cx_tu), $cx_tu), 1); $name_temp = ($name_temp . $cx_tu); } } if ($rep_name{$name_temp} ne '') { $mystr = $rep_name{$name_temp}; # # See if there is a comma in place if so we are done # ; } if (index($mystr, $comma) != 0) { $ret_val = $mystr; } else { $nam_suf = ''; { $num_namp = (@x_namep = split(' ', $mystr, 9999)); if ($num_namp >= 1 && $x_namep[$num_namp] eq '' && ' ' eq ' ') { --$num_namp; } } if ($num_namp > 1) { { $lx_tu = length($x_namep[$num_namp]); $tx_tu = $x_namep[$num_namp]; $xtemp = ''; for ($ix_tu = 1; $ix_tu <= $lx_tu; ++$ix_tu) { $cx_tu = substr($tx_tu, $ix_tu, 1); $cx_tu = substr(($UCAZ . $cx_tu), index(($lcaz . $cx_tu), $cx_tu), 1); $xtemp = ($xtemp . $cx_tu); } } if ($rep_suffix{$xtemp} ne '') { if ($junior_on_last eq 'yes') { $x_namep[$num_namp] = $rep_suffix{$xtemp}; } else { $nam_suf = (' ' . $rep_suffix{$xtemp}); --$num_namp; } } $mystr = $x_namep[1]; for ($knamp = 2; $knamp <= $num_namp; ++$knamp) { $mystr = ($mystr . ' ' . $x_namep[$knamp]); } } # end if (num_namp > 1) $llname = length($mystr); $cc = ''; for ($kc = $llname - 1; $kc > 1; --$kc) { $cp = $cc; $cc = substr($mystr, $kc, 1); if ($cc eq '.') { #??? if ($cp ne ' ') { $mystr = (substr($mystr, $kc + 1, $llname - $kc) . $comma . ' ' . substr($mystr, 1, $kc)); } else { $mystr = (substr($mystr, $kc + 2, $llname - $kc - 1) . $comma . ' ' . substr($mystr, 1, $kc)); } # if (cp != " ") $kc = 0; } # end if (cc == ".") ; } # for (kc=llname-1; kc>1; --kc) $mystr = ($mystr . $nam_suf); $ret_val = $mystr; } # end if (index(mystr,comma) != 0 ) $authors[$i] = $ret_val; } } } $is_blank = $authors[$num_auth]; if ($is_blank eq '') { --$num_auth; } if ($num_auth >= 1 && $author_flag eq '1') { #??? printf (("\n\n\n")); printf (("####################\n")); printf (("# #\n")); printf (("# AUDIT_AUTHOR #\n")); printf (("# #\n")); printf (("####################\n\n\n")); printf (("loop_ \n")); printf (("_audit_author.name \n")); } for ($i = 1; $i <= $num_auth; ++$i) { printf "'%s' \n", $authors[$i]; } if ($num_auth > 0) { ++$author_flag; } } #=========================================================================== # Keyword CAVEAT # # In the 1995 format, a new record, CAVEAT, was added to warn of severe # errors in an entry. # # caveat_cont [9-10] # caveat_id [12-15] # caveat_text [20-70] = _struct.title if ($first_field eq 'CAVEAT') { $xlat_flag = $xlat_save; $caveat_cont = substr(($_), 9, 2); $caveat_id = substr(($_), 12, 4); $caveat_text = substr(($_), 20, 51); if ($caveat_cont eq ' ') { $compnd{$compnd_flag++} = 'Warning of Severe Error::'; } $bp = ' '; if ($caveat_cont ne ' ') { $bp = ' '; } $compnd{$compnd_flag++} = ($bp . $caveat_text); } #=========================================================================== # Keyword CISPEP # # Introduced with the February 1996 PDB format # # cp_sernum [ 8-10] # cp_res_name_beg [12-14] # cp_chain_id_beg [16] # cp_res_seq_num_beg [18-21] # cp_icode_beg [22] # cp_res_name_end [26-28] = _struct_mon_prot.label_comp_id # _struct_mon_prot_cis.label_comp_id # cp_chain_id_end [30] = _struct_mon_prot.label_asym_id # _struct_mon_prot_cis.label_asym_id # cp_res_seq_num_end [32-35] = _struct_mon_prot.auth_seq_id # _struct_mon_prot_cis.auth_seq_id # cp_icode_end [36] append to # _struct_mon_prot.auth_seq_id # _struct_mon_prot_cis.auth_seq_id # cp_modnum [44-46] = _struct_mon_prot.label_model_id # _struct_mon_prot_cis.label_model_id # cp_omega [54-59] = _struct_mon_prot.omega if ($first_field eq 'CISPEP') { $xlat_flag = $xlat_save; $cp_res_name_end{++$cispep_flag} = substr(($_), 26, 3); $cp_chain_id_end{$cispep_flag} = substr(($_), 30, 1); $cp_res_seq_num_end{$cispep_flag} = substr(($_), 32, 5); $cp_modnum{$cispep_flag} = substr(($_), 44, 3); $cp_omega{$cispep_flag} = substr(($_), 54, 6); if ($cp_res_name_end{$cispep_flag} eq ' ') { $cp_res_name_end{$cispep_flag} = ' . '; } if ($cp_chain_id_end{$cispep_flag} eq ' ') { $cp_chain_id_end{$cispep_flag} = $bcid; } if ($cp_modnum{$cispep_flag} eq ' ') { $cp_modnum{$cispep_flag} = ' . '; } if ($cp_omega{$cispep_flag} eq ' ') { $cp_omega{$cispep_flag} = ' . '; } } #========================================================================== # keyword CRYST1 # # if ($first_field eq 'CRYST1') { $xlat_flag = $xlat_save; # # Contains a b c alpha beta gamma SG Z # # calculate cell volume { $ca = cos(substr(($_), 34, 7) * 0.0174532); $cb = cos(substr(($_), 41, 7) * 0.0174532); $cc = cos(substr(($_), 48, 7) * 0.0174532); $cz = (1.0 - ($ca * $ca - $cb * $cb - $cc * $cc) + (2.0 * $ca * $cb * $cc)); $vol = (substr(($_), 7, 9) * substr(($_), 16, 9) * substr(($_), 25, 9) * (sqrt($cz))); if ($vol - 1 < .01) { $warning_list{++$warning_flag} = "#=# CELL: The volume is 1, may be model or NMR, read REMARKs\n"; ++$vol_flag; } } # localize space group and Z { $sg = substr(($_), 56, 11); $Z = substr(($_), 67, 4); } printf (("\n")); printf "_cell.entry_id %s\n", $head_PDB_code; printf "_cell.length_a %9.3f\n", substr(($_), 7, 9); printf "_cell.length_b %9.3f\n", substr(($_), 16, 9); printf "_cell.length_c %9.3f\n", substr(($_), 25, 9); printf "_cell.angle_alpha %7.2f\n", substr(($_), 34, 7); printf "_cell.angle_beta %7.2f\n", substr(($_), 41, 7); printf "_cell.angle_gamma %7.2f\n", substr(($_), 48, 7); printf "_cell.volume %10.1f \n", $vol; printf (("_cell.details ? \n")); printf "_cell.Z_PDB %3d \n\n", $Z; printf "_symmetry.entry_id %s \n", $head_PDB_code; printf "_symmetry.space_group_name_H-M '%11s' \n\n", $sg; if ($verbose eq 'yes') { printf (("_cell_measurement.temp ? \n")); printf (("_cell_measurement.theta_min ? \n")); printf (("_cell_measurement.theta_max ? \n")); printf (("_cell_measurement.wavelength ? \n")); printf (("_cell_measurement.pressure ? \n")); printf (("_cell_measurement.radiation ? \n")); printf (("_cell_measurement.reflns_used ? \n\n")); printf (("loop_\n")); printf (("_cell_measurement_refln.index_h \n")); printf (("_cell_measurement_refln.index_k \n")); printf (("_cell_measurement_refln.index_l \n")); printf (("_cell_measurement_refln.theta \n")); printf ((" ? ? ? ? \n")); } } #====================================================================== # Keyword COMPND # # This is considered a common name for the macromolecule # in the 1992 format, and a more detailed description with # keywords in the 1995 format. In either case the entire # COMPND record is added to the information used for # _struct.title along with the information from TITLE, # SOURCE and CAVEAT # # record name [ 1 - 6] = "COMPND" # continuation flag [ 9 - 10] = blank for first record # compound [11 - 70] = _struct.title # # if ($first_field eq 'COMPND') { $xlat_flag = $xlat_save; $compnd_contin = substr(($_), 9, 2); if ($compnd_contin eq ' ') { $compnd{$compnd_flag++} = 'Compound::'; } $bp = ' '; if ($compnd_contin ne ' ') { $bp = ' '; } $compnd{$compnd_flag} = ($bp . substr(($_), 11, 60)); # typeset information, if requested if ($convtext eq 'yes') { # # apply PDB typsetting codes if any to a line # { $lx_tl = length($compnd{$compnd_flag}); $tx_tl = $compnd{$compnd_flag}; $lostr = ''; for ($ix_tl = 1; $ix_tl <= $lx_tl; ++$ix_tl) { $cx_tl = substr($tx_tl, $ix_tl, 1); $cx_tl = substr(($lcaz . $cx_tl), index(($UCAZ . $cx_tl), $cx_tl), 1); $lostr = ($lostr . $cx_tl); } } $lstr = length($lostr); $mystr = ''; $pchar = ' '; for ($qtsi = 1; $qtsi <= $lstr; ++$qtsi) { $mychar = substr($lostr, $qtsi, 1); if ($pchar eq ' ' || $pchar eq ',' || $pchar eq '.' || $pchar eq '(' || $pchar eq '*' || $pchar eq '/') { $mychar = substr(($UCAZ . $mychar), index(($lcaz . $mychar), $mychar), 1); } if (($mychar ne '*' && $mychar ne "\$" && $mychar ne '/') || ($mychar eq $pchar)) { #??? $mystr = ($mystr . $mychar); } if ($pchar eq '/') { if ($mychar eq "\$" || $mychar eq '-') { $pchar = $mychar; } } else { $pchar = $mychar; } } $ret_val = $mystr; $compnd{$compnd_flag} = $ret_val; } ++$compnd_flag; } #====================================================================== # Keyword CONECT # # Origin serial number = _struct_conn.ptnr1_label_comp_id # = _struct_conn.ptnr1_label_asym_id # = _struct_conn.ptnr1_auth_seq_id # = _struct_conn.ptnr1_label_atom_id # = _struct_conn.ptnr1_label_alt_id # Target serial numbers = _struct_conn.ptnr2_label_comp_id # = _struct_conn.ptnr2_label_asym_id # = _struct_conn.ptnr2_auth_seq_id # = _struct_conn.ptnr2_label_atom_id # = _struct_conn.ptnr2_label_alt_id # Hydrogen bond donor = _struct_conn.conn_type_id # Hydrogen bond acceptor = _struct_conn.conn_type_id # Salt bridge excess -ve = _struct_conn.conn_type_id # Salt bridge excess +ve = _struct_conn.conn_type_id # # _struct_conn.id = incremental number assigned to each # CONECT record # _struct_conn.conn_type_id = matches generic _struct_conn_type.criteria # # all atoms at 1_555 ie no support for -ve targets # No special details included # if ($first_field eq 'CONECT') { $xlat_flag = $xlat_save; $connect_save{++$connect_flag} = substr(($_), 1, 61); ++$conect_flag_2; } #=========================================================================== # Keyword DBREF # # In the 1995 format, a new record, DBREF, was added to provide # "cross-reference links between PDB and the corresponding sequence # database entries." The citations may be to subchains specified # by PDB sequence number and insertion code ranges. # # DBREF [1- 5] # dbref_idcode [8-11] = idcode of this entry # dbref_chainID [ 13 ] = _struct_asym.id # = _struct_ref.biol_id # dbref_seqBegin [15-19] = combines seqBegin and insertBegin # used to obtain start point in _entity_poly_seq.num # then mapped to _struct_ref_seq.seq_align_beg # dbref_seqEnd [21-25] = combines seqEnd and insertEnd # used to obtain start point in _entity_poly_seq.num # then mapped to _struct_ref_seq.seq_align_end # dbref_database [27-32] = _struct_ref.db_name # dbref_dbAccession # [34-41] = _struct_ref.db_code # dbref_dbIdCode [43-54] = add to _struct_ref.db_code # dbref_dbseqBeg [56-61] = _struct_ref_seq.db_align_beg # dbref_dbseqEnd [63-68] = _struct_ref_seq.db_align_end # # Note: as of this writing, _struct_ref_seq_dif.db_seq_num is # not in the mmCIF dictionary. # # if the database is PDB, columns 61 and 68 contain an insertion code # for other databases, these columns are blank if ($first_field eq 'DBREF') { $xlat_flag = $xlat_save; $dbref_chainID{++$dbref_flag} = substr(($_), 13, 1); if ($dbref_chainID{$dbref_flag} eq ' ') { $dbref_chainID{$dbref_flag} = $bcid; } $dbref_seqBegin{$dbref_flag} = substr(($_), 15, 5); $dbref_seqEnd{$dbref_flag} = substr(($_), 21, 5); $dbref_database{$dbref_flag} = substr(($_), 27, 6); $dbref_dbAccession{$dbref_flag} = substr(($_), 34, 8); $dbref_dbIdCode{$dbref_flag} = substr(($_), 43, 12); $dbref_dbseqBeg{$dbref_flag} = substr(($_), 56, 6); $dbref_dbseqEnd{$dbref_flag} = substr(($_), 63, 6); { $numx = (@dblist = split(' ', $dbref_database{$dbref_flag}, 9999)); if ($numx >= 1 && $dblist[$numx] eq '' && ' ' eq ' ') { --$numx; } } $dbref_database{$dbref_flag} = '.'; if ($numx > 0) { $dbref_database{$dbref_flag} = $dblist[1]; for ($j = 2; $j <= $numx; ++$j) { $dbref_database{$dbref_flag} = ($dbref_database{$dbref_flag} . '_' . $dblist[$j]); } } { $numx = (@dblist = split(' ', $dbref_dbAccession{$dbref_flag}, 9999)); if ($numx >= 1 && $dblist[$numx] eq '' && ' ' eq ' ') { --$numx; } } $dbref_dbAccession{$dbref_flag} = '.'; if ($numx > 0) { $dbref_dbAccession{$dbref_flag} = $dblist[1]; for ($j = 2; $j <= $numx; ++$j) { $dbref_dbAccession{$dbref_flag} = ($dbref_dbAccession{$dbref_flag} . '_' . $dblist[$j]); } } { $numx = (@dblist = split(' ', $dbref_dbIdCode{$dbref_flag}, 9999)); if ($numx >= 1 && $dblist[$numx] eq '' && ' ' eq ' ') { --$numx; } } $dbref_dbIdCode{$dbref_flag} = ''; if ($numx > 0) { $dbref_dbIdCode{$dbref_flag} = (' ' . $dblist[1]); for ($j = 2; $j <= $numx; ++$j) { $dbref_dbIdCode{$dbref_flag} = ($dbref_dbIdCode{$dbref_flag} . '_' . $dblist[$j]); } } if ((' ' . $dbref_dbAccession{$dbref_flag}) eq #??? $dbref_dbIdCode{$dbref_flag}) { $dbref_dbIdCode{$dbref_flag} = ''; } } #========================================================================== # keyword END # # terminates processing of records, but remainder of file is read # # if ($first_field eq 'END') { $xlat_flag = $xlat_save; ++$end_flag; } #============================================================================= # Keyword ENDMDL if ($first_field eq 'ENDMDL') { $xlat_flag = $xlat_save; $model_flag = '.'; } #==================================================================== # Keyword EXPDTA # # expdta [11-70] = _exptl.method # if ($first_field eq 'EXPDTA') { $xlat_flag = $xlat_save; # parse field $expdta = substr(($_), 11, 60); $nmr_flag = index($expdta, 'NMR'); { $lx_tl = length($expdta); $tx_tl = $expdta; $loexpdta = ''; for ($ix_tl = 1; $ix_tl <= $lx_tl; ++$ix_tl) { $cx_tl = substr($tx_tl, $ix_tl, 1); $cx_tl = substr(($lcaz . $cx_tl), index(($UCAZ . $cx_tl), $cx_tl), 1); $loexpdta = ($loexpdta . $cx_tl); } } { $num_expdta = (@exp_split = split(' ', $loexpdta, 9999)); if ($num_expdta >= 1 && $exp_split[$num_expdta] eq '' && ' ' eq ' ') { --$num_expdta; } } $loexpdta = ''; if ($num_expdta > 0) { $loexpdta = $exp_split[1]; for ($j = 2; $j <= $num_expdta; ++$j) { $loexpdta = ($loexpdta . ' ' . $exp_split[$j]); } } $expwarn = 'true'; if ($loexpdta eq 'x-ray diffraction') { $loexpdta = 'single-crystal x-ray diffraction'; $expwarn = 'false'; } if ($loexpdta eq 'theoretical model') { $expwarn = 'false'; } if ($expwarn eq 'true') { $warning_list{++$warning_flag} = ('#=# EXPTL: Non-enumerated method: ' . $loexpdta . "\n"); } printf "_exptl.entry_id %s\n", $head_PDB_code; printf "_exptl.method '%-s'\n", $loexpdta; } #====================================================================== # Keyword FORMUL - chemical formula of non-standard groups # # component number == _entity.id & _chem_comp.entity_id # het identifier == _entity_name_common & _chem_comp.id # het_formula_mw == ignored # het_formula_text == _chem_comp.formula # ?? == _entity_special_details # # Information written in ATOM/HETATM keyword if ($first_field eq 'FORMUL') { $xlat_flag = $xlat_save; # parse field $formul_het_number{$formul_flag} = substr(($_), 9, 2) + 0; $formul_het_site_symbol{$formul_flag} = substr(($_), 13, 3); $hetatm_entity = $formul_het_site_symbol{$formul_flag}; $formul_het_cont_flag{$formul_flag} = substr(($_), 17, 2); $hetatm_entity = substr(($_), 13, 3); $entity_seq_num{$hetatm_entity} = $formul_het_number{$formul_flag} + 0; $formul_het_text{$formul_flag} = substr(($_), 20, 51); if (substr(($_), 17, 2) eq ' ') { $het_formula{$hetatm_entity} = ("\n; " . $formul_het_text{$formul_flag}); } else { $het_formula{$hetatm_entity} = ($het_formula{$hetatm_entity} . "\n " . $formul_het_text{$formul_flag}); } $pxxc = ''; for ($ii = 0; $ii <= 50; ++$ii) { $xxc = substr(($_), 20 + $ii, 1); if (index($UCAZ, $xxc) == 0) { $xxc = ' '; } if ($pxxc ne ' ' || $xxc ne ' ') { $xhet_formula{$hetatm_entity} = ($xhet_formula{$hetatm_entity} . $xxc); } $pxxc = $xxc; } $xhet_formula{$hetatm_entity} = ($xhet_formula{$hetatm_entity} . ' '); $ent_non_poly_id{$hetatm_entity}++; if ($ent_non_poly_id{$hetatm_entity} == 1) { $next_non_poly_id = $ent_non_poly_point{' '}; $prev_non_poly_id = ' '; while ($next_non_poly_id ne '') { $prev_non_poly_id = $next_non_poly_id; $next_non_poly_id = $ent_non_poly_point{$prev_non_poly_id}; } $ent_non_poly_point{$prev_non_poly_id} = $hetatm_entity; $ent_non_poly_point{$hetatm_entity} = ''; ++$num_non_poly_ents; $ent_non_poly_num{$hetatm_entity} = $formul_het_number{$formul_flag}; } ++$formul_flag; # Set up to read addiional entities from ATOM records ($entity_flag = $formul_flag - 1); } #========================================================================= # keyword FTNOTE -- footnote to atoms or residues # # footnote number == _atom_sites_footnote.id # footnote text == _atom_sites_footnote.text if ($first_field eq 'FTNOTE') { $xlat_flag = $xlat_save; $X = substr(($_), 10, 1); if ($ftnote_flag{1} eq '0') { $ft_save{++$foot_flag} = "\nloop_\n"; $ft_save{++$foot_flag} = "_atom_sites_footnote.id \n"; $ft_save{++$foot_flag} = "_atom_sites_footnote.text \n"; } $ftnote_num = substr(($_), 10, 1); $ftnote_text = substr(($_), 12, 59); if ($ftnote_num > $ftnote_flag_old) { #??? $ft_save{++$foot_flag} = "; \n"; } if ($ftnote_flag{$ftnote_num} eq '0') { $ft_save{++$foot_flag} = (' ' . $ftnote_num . "\n"); } if ($ftnote_flag{$ftnote_num} eq '0') { $ft_save{++$foot_flag} = ('; ' . $ftnote_text . "\n"); ++$ftnote_flag{$ftnote_num}; } else { $ft_save{++$foot_flag} = (' ' . $ftnote_text . "\n"); ++$ftnote_flag{$ftnote_num}; } $ftnote_flag_old = $ftnote_num; } #==================================================================== # Keyword HEADER # # This is a good place to place the _struct_biol data items. Templates # are given but no information has been parsed excluding # _special_details. # # head_funct_class [11-50] == _struct_biol.details # head_dep_date [51-59] == _database_PDB_rev.date_original # _audit.creation_date # head_PDB_code [63-66] == _database_2.database_code # _struct_biol.id # _audit.revision_id if ($first_field eq 'HEADER') { $xlat_flag = $xlat_save; $head_funct_class = substr(($_), 11, 40); $head_dep_date = substr(($_), 51, 9); $head_PDB_code = substr(($_), 63, 4); if ($head_PDB_code eq ' ') { $head_PDB_code = '.'; } # # Output the PDB code immediately as the data block name # printf "data_%4s\n\n", $head_PDB_code; printf "_entry.id %4s\n\n", $head_PDB_code; # # save the header id as a possible audit.revision_id # $aud_rev_id = $head_PDB_code; } #======================================================================= # Keyword HELIX # # 8 - 10 helix_no. == (not used) # 12 - 14 helix_id == _struct_conf.id # 16 - 18 helix_res_name_beg == _struct_conf.beg_label_comp_id # 20 helix_chain_id_beg == _struct_conf.beg_label_asym_id # 22 - 26 helix_res_seq_beg == _struct_conf.beg_auth_seq_id # 28 - 30 helix_res_name_end == _struct_conf.end_label_comp_id # 32 helix_chain_id_end == _struct_conf.end_label_asym_id # 34 - 38 helix_res_seq_end == _struct_conf.end_auth_seq_id # 39 - 40 helix_class == _struct_conf.conf_type_id # 41 - 70 helix_comment == _struct_conf.details # # note helix classes 9 and 10 as defined by the PDB do not have CIF # definitions # # if ($first_field eq 'HELIX') { $xlat_flag = $xlat_save; $helix_no{$ss_flag} = substr(($_), 8, 3); $helix_id{$ss_flag} = substr(($_), 12, 3); $helix_res_name_beg{$ss_flag} = substr(($_), 16, 3); $helix_chain_id_beg{$ss_flag} = substr(($_), 20, 1); $helix_res_seq_beg{$ss_flag} = substr(($_), 22, 5); $helix_res_name_end{$ss_flag} = substr(($_), 28, 3); $helix_chain_id_end{$ss_flag} = substr(($_), 32, 1); $helix_res_seq_end{$ss_flag} = substr(($_), 34, 5); $helix_class{$ss_flag} = substr(($_), 39, 2); $helix_comment{$ss_flag} = substr(($_), 41, 30); if ($helix_comment{$ss_flag} eq ' ' || $helix_comment{$ss_flag} eq '') { $helix_comment{$ss_flag} = ' . '; if ($helix_class{$ss_flag} + 0 == 1) { $helix_comment{$ss_flag} = 'RIGHT-HANDED ALPHA HELIX'; } if ($helix_class{$ss_flag} + 0 == 2) { $helix_comment{$ss_flag} = 'RIGHT-HANDED OMEGA HELIX'; } if ($helix_class{$ss_flag} + 0 == 3) { $helix_comment{$ss_flag} = 'RIGHT-HANDED PI HELIX'; } if ($helix_class{$ss_flag} + 0 == 4) { $helix_comment{$ss_flag} = 'RIGHT-HANDED GAMMA HELIX'; } if ($helix_class{$ss_flag} + 0 == 5) { $helix_comment{$ss_flag} = 'RIGHT-HANDED 3/10 HELIX'; } if ($helix_class{$ss_flag} + 0 == 6) { $helix_comment{$ss_flag} = 'LEFT-HANDED ALPHA HELIX'; } if ($helix_class{$ss_flag} + 0 == 7) { $helix_comment{$ss_flag} = 'LEFT-HANDED OMEGA HELIX'; } if ($helix_class{$ss_flag} + 0 == 8) { $helix_comment{$ss_flag} = 'LEFT-HANDED GAMMA HELIX'; } if ($helix_class{$ss_flag} + 0 == 9) { $helix_comment{$ss_flag} = '2/7 RIBBON/HELIX'; } if ($helix_class{$ss_flag} + 0 == 10) { $helix_comment{$ss_flag} = 'POLYPROLINE'; } } # strip blanks from id { $num_x = (@xxx = split(' ', $helix_id{$ss_flag}, 9999)); if ($num_x >= 1 && $xxx[$num_x] eq '' && ' ' eq ' ') { --$num_x; } } $helix_id{$ss_flag} = ''; if ($num_x == 1) { $helix_id{$ss_flag} = $xxx[1]; } if ($num_x == 2) { $helix_id{$ss_flag} = ($xxx[1] . '_' . $xxx[2]); # provide default conditions ; } if ($helix_chain_id_beg{$ss_flag} eq ' ') { $helix_chain_id_beg{$ss_flag} = $bcid; } if ($helix_chain_id_end{$ss_flag} eq ' ') { $helix_chain_id_end{$ss_flag} = $bcid; # give real names to helix classes ; } $h_class_suffix = '_P'; { $num_x = (@xxx = split(' ', ($helix_res_name_end{$ss_flag} . ' ' . $helix_res_name_beg{$ss_flag}), 9999)); if ($num_x >= 1 && $xxx[$num_x] eq '' && ' ' eq ' ') { --$num_x; } } foreach $i ($[ .. $#na_list) { if ($na_list[$i] eq $xxx[1] || $na_list[$i] eq $xxx[2]) { #??? #??? $h_class_suffix = '_N'; } } if ($helix_class{$ss_flag} eq ' 1') { $helix_class{$ss_flag} = ('HELX_RH_AL' . $h_class_suffix); } if ($helix_class{$ss_flag} eq ' 2') { $helix_class{$ss_flag} = ('HELX_RH_OM' . $h_class_suffix); } if ($helix_class{$ss_flag} eq ' 3') { $helix_class{$ss_flag} = ('HELX_RH_PI' . $h_class_suffix); } if ($helix_class{$ss_flag} eq ' 4') { $helix_class{$ss_flag} = ('HELX_RH_GA' . $h_class_suffix); } if ($helix_class{$ss_flag} eq ' 5') { $helix_class{$ss_flag} = ('HELX_RH_3T' . $h_class_suffix); } if ($helix_class{$ss_flag} eq ' 6') { $helix_class{$ss_flag} = ('HELX_LH_AL' . $h_class_suffix); } if ($helix_class{$ss_flag} eq ' 7') { $helix_class{$ss_flag} = ('HELX_LH_OM' . $h_class_suffix); } if ($helix_class{$ss_flag} eq ' 8') { $helix_class{$ss_flag} = ('HELX_LH_GA' . $h_class_suffix); } if ($helix_class{$ss_flag} eq ' 9') { $helix_class{$ss_flag} = ('HELX_27' . $h_class_suffix); } if ($helix_class{$ss_flag} eq '10') { $helix_class{$ss_flag} = ('HELX_PP' . $h_class_suffix); } ++$ss_flag; ++$helix_flag; } #=================================================================== # Keyword HET # # het_site_symbol [ 8 - 10] == to link to _entity.id from FORMUL # het_site_chain [13] == ???? # het_site_seqNum [14 - 17] == sequence no, or -999 if more than 15 # het_site_iCode [18] == append to seq no # het_atoms_number [21 - 25] == THIS IS THE NUMBER OF HETATM LINES # NOT A COUNT OF ATOMS # By careful processing of the HETATM # information, paying attention to # occupancies, this number could # be related to # _chem_comp.number_atoms_all or # _chem_comp.number_atoms_nh # but we do not attempt this # het_site_text [31-70] == _chem_comp.details # # if ($first_field eq 'HET') { $xlat_flag = $xlat_save; # parse field $het_site_symbol{$het_flag} = substr(($_), 8, 3); $het_site_chain{$het_flag} = substr(($_), 13, 1); $het_site_residue{$het_flag} = substr(($_), 14, 5); $het_atoms_number{$het_flag} = substr(($_), 21, 4); $het_site_text{$het_site_symbol{$het_flag}} = substr(($_), 31, 40); if ($het_site_chain{$het_flag} eq ' ') { $het_site_chain{$het_flag} = '.'; } ++$het_flag; } #=================================================================== # Keyword HETNAM # # hetnam_cont [ 9 - 10] == continuation flag # hetnam_symbol [12 - 14] == to link to entity_id from FORMUL # hetnam_text [16 - 70] == text of chemical name # _chem_comp.name and # _entity_name_com.name # if ($first_field eq 'HETNAM') { $xlat_flag = $xlat_save; # parse field $hetnam_cont{$hetnam_flag} = substr(($_), 9, 2); $hetnam_symbol{$hetnam_flag} = substr(($_), 12, 3); $hetnam_text{$hetnam_flag} = substr(($_), 16, 55); if ($hetnam_cont{$hetnam_flag} eq ' ') { $het_site_name{$hetnam_symbol{$hetnam_flag}} = (' ' . substr(($_), 16, 55)); } else { $het_site_name{$hetnam_symbol{$hetnam_flag}} = ($het_site_name{$hetnam_symbol{$hetnam_flag}} . "\n " . substr(($_), 16, 55)); } ++$hetnam_flag; } #=================================================================== # Keyword HETSYN # # hetsyn_cont [ 9 - 10] == continuation flag # hetsyn_symbol [12 - 14] == to link to entity_id from FORMUL # hetsyn_text [16 - 70] == text of chemical name # _entity_name_com.name # if ($first_field eq 'HETSYN') { $xlat_flag = $xlat_save; # parse field $hetsyn_cont{$hetsyn_flag} = substr(($_), 9, 2); $hetsyn_symbol{$hetsyn_flag} = substr(($_), 12, 3); $hetsyn_text{$hetsyn_flag} = substr(($_), 16, 55); if ($hetsyn_cont{$hetsyn_flag} eq ' ') { $het_site_syn{$hetsyn_symbol{$hetsyn_flag}} = (substr(($_), 16, 55)); } else { $het_site_syn{$hetsyn_symbol{$hetsyn_flag}} = ($het_site_syn{$hetsyn_symbol{$hetsyn_flag}} . substr(($_), 16, 55)); } ++$hetsyn_flag; } #=========================================================================== # Keyword HYDBND # # Introduced with the February 1996 PDB format # # There is no way to define the hydrogen atom of a hydrogen bond in mmCIF # we treat it as a partner in a hydrogen bond with role "hydrogen" # # # hb_atom_beg [13-16] = _struct_conn.ptnr1_label_atom_id # hb_alt_loc_beg [17] = _struct_conn.ptnr1_label_alt_id # hb_res_name_beg [18-20] = _struct_conn.ptnr1_label_comp_id # hb_chain_id_beg [22] = _struct_conn.ptnr1_label_asym_id # hb_res_seq_num_beg [24-27] = _struct_conn.ptnr1_auth_seq_id # hb_icode_beg [28] append to # _struct_conn.ptnr1_auth_seq_id # hb_name_ha [30-33] = _struct_conn.ptnr1_label_atom_id # hb_alt_loc_ha [34] = _struct_conn.ptnr1_label_alt_id # hb_chain_id_ha [36] = _struct_conn.ptnr1_label_asym_id # hb_res_seq_num_ha [37-41] = _struct_conn.ptnr1_auth_seq_id # hb_icode_ha [42] append to # _struct_conn.ptnr1_auth_seq_id # hb_atom_end [44-47] = _struct_conn.ptnr2_label_atom_id # hb_alt_loc_end [48] = _struct_conn.ptnr2_label_alt_id # hb_res_name_end [49-51] = _struct_conn.ptnr2_label_comp_id # hb_chain_id_end [53] = _struct_conn.ptnr2_label_asym_id # hb_res_seq_num_end [55-58] = _struct_conn.ptnr2_auth_seq_id # hb_icode_end [59] append to # _struct_conn.ptnr2_auth_seq_id # hb_symop1 [60-65] = _struct_conn.ptnr1_symmetry # hb_symop2 [67-72] = _struct_conn.ptnr2_symmetry # if ($first_field eq 'HYDBND') { $xlat_flag = $xlat_save; $hb_atom_beg{++$hydbnd_flag} = substr(($_), 13, 4); $hb_alt_loc_beg{$hydbnd_flag} = substr(($_), 17, 1); $hb_res_name_beg{$hydbnd_flag} = substr(($_), 18, 3); $hb_chain_id_beg{$hydbnd_flag} = substr(($_), 22, 1); $hb_res_seq_num_beg{$hydbnd_flag} = substr(($_), 24, 5); $hb_atom_ha{$hydbnd_flag} = substr(($_), 30, 4); $hb_alt_loc_ha{$hydbnd_flag} = substr(($_), 34, 1); $hb_chain_id_ha{$hydbnd_flag} = substr(($_), 36, 1); $hb_res_seq_num_ha{$hydbnd_flag} = substr(($_), 38, 5); $hb_atom_end{$hydbnd_flag} = substr(($_), 44, 4); $hb_alt_loc_end{$hydbnd_flag} = substr(($_), 48, 1); $hb_res_name_end{$hydbnd_flag} = substr(($_), 49, 3); $hb_chain_id_end{$hydbnd_flag} = substr(($_), 53, 1); $hb_res_seq_num_end{$hydbnd_flag} = substr(($_), 55, 5); $hb_symm_1{$hydbnd_flag} = substr(($_), 60, 6); $hb_symm_2{$hydbnd_flag} = substr(($_), 67, 6); { # # # fix up atom_name by squeezing out blanks in the middle # $temp_a_name = $hb_atom_beg{$hydbnd_flag}; if (substr($temp_a_name, 3, 1) eq ' ') { $temp_a_name = (substr($temp_a_name, 1, 2) . substr($temp_a_name, 4, 1) . ' '); } if (substr($temp_a_name, 2, 1) eq ' ') { $temp_a_name = (' ' . substr($temp_a_name, 1, 1) . substr($temp_a_name, 3, 2)); } if ($temp_a_name eq ' ') { $temp_a_name = ' . '; } $ret_val = $temp_a_name; $temp_name = $ret_val; } $hb_atom_beg{$hydbnd_flag} = $temp_name; { # # # fix up atom_name by squeezing out blanks in the middle # $temp_a_name = $hb_atom_end{$hydbnd_flag}; if (substr($temp_a_name, 3, 1) eq ' ') { $temp_a_name = (substr($temp_a_name, 1, 2) . substr($temp_a_name, 4, 1) . ' '); } if (substr($temp_a_name, 2, 1) eq ' ') { $temp_a_name = (' ' . substr($temp_a_name, 1, 1) . substr($temp_a_name, 3, 2)); } if ($temp_a_name eq ' ') { $temp_a_name = ' . '; } $ret_val = $temp_a_name; $temp_name = $ret_val; } $hb_atom_end{$hydbnd_flag} = $temp_name; { # # # fix up atom_name by squeezing out blanks in the middle # $temp_a_name = $hb_atom_ha{$hydbnd_flag}; if (substr($temp_a_name, 3, 1) eq ' ') { $temp_a_name = (substr($temp_a_name, 1, 2) . substr($temp_a_name, 4, 1) . ' '); } if (substr($temp_a_name, 2, 1) eq ' ') { $temp_a_name = (' ' . substr($temp_a_name, 1, 1) . substr($temp_a_name, 3, 2)); } if ($temp_a_name eq ' ') { $temp_a_name = ' . '; } $ret_val = $temp_a_name; $temp_name = $ret_val; } $hb_atom_ha{$hydbnd_flag} = $temp_name; if ($hb_alt_loc_beg{$hydbnd_flag} eq ' ') { $hb_alt_loc_beg{$hydbnd_flag} = '.'; } if ($hb_alt_loc_end{$hydbnd_flag} eq ' ') { $hb_alt_loc_end{$hydbnd_flag} = '.'; } if ($hb_alt_loc_ha{$hydbnd_flag} eq ' ') { $hb_alt_loc_ha{$hydbnd_flag} = '.'; } if ($hb_res_name_beg{$hydbnd_flag} eq ' ') { $hb_res_name_beg{$hydbnd_flag} = ' . '; } if ($hb_res_name_end{$hydbnd_flag} eq ' ') { $hb_res_name_end{$hydbnd_flag} = ' . '; } if ($hb_res_seq_num_beg{$hydbnd_flag} eq ' ') { $hb_res_seq_num_beg{$hydbnd_flag} = ' . '; } if ($hb_res_seq_num_end{$hydbnd_flag} eq ' ') { $hb_res_seq_num_end{$hydbnd_flag} = ' . '; } if ($hb_res_seq_num_ha{$hydbnd_flag} eq ' ') { $hb_res_seq_num_ha{$hydbnd_flag} = ' . '; } if ($hb_chain_id_beg{$hydbnd_flag} eq ' ') { $hb_chain_id_beg{$hydbnd_flag} = $bcid; } if ($hb_chain_id_end{$hydbnd_flag} eq ' ') { $hb_chain_id_end{$hydbnd_flag} = $bcid; } if ($hb_chain_id_ha{$hydbnd_flag} eq ' ') { $hb_chain_id_ha{$hydbnd_flag} = $bcid; } if ($hb_symm_1{$hydbnd_flag} eq ' ') { $hb_symm_1{$hydbnd_flag} = ' . '; } else { $hb_symm_1{$hydbnd_flag} = (substr($hb_symm_1{$hydbnd_flag}, 1, 3) . '_' . substr($hb_symm_1{$hydbnd_flag}, 4, 3)); } if ($hb_symm_2{$hydbnd_flag} eq ' ') { $hb_symm_2{$hydbnd_flag} = ' . '; } else { $hb_symm_2{$hydbnd_flag} = (substr($hb_symm_2{$hydbnd_flag}, 1, 3) . '_' . substr($hb_symm_2{$hydbnd_flag}, 4, 3)); } } #================================================================== # Keyword JRNL # # As defined by the PDB, this is the primary citation that matches the # given coordinate set. It is written before the REMARK 2 record # # "primary" = _citation.id # = _citation_author.citation_id # "yes"/"no" = _citation.coordinate_linkage # jrnl_rec_type [13-16] = # jrnl_cont [17-18] = # #AUTH # jrnl_auth [20-70] = _citation_author.name # #TITL # jrnl_titl [20-70] = _citation.title # #REF # jrnl_ref_jour [20-47] = _citation.journal_abbrev (this is not # always abbreviated but it will do) # jrnl_ref_vol [53-55] = _citation.journal_volume # "?" = _citation.journal_issue # jrnl_ref_page [57-61] = _citation.page_first # "?" = _citation.page_last # jrnl_ref_year [63-66] = _citation.year # #PUBL # jrnl_pub_pub [20-70] = _citation.book_publisher # #REFN # jrnl_astm [25-30] = _citation.journal_id_ASTM # jrnl_country [33-34] = _citation.country # jrnl_isbn [41-65] = _citation.journal_id_ISSN or # = _citation.book_id_ISBN # "?" = _citation.abstract # # = _citation.details if ($first_field eq 'JRNL') { $xlat_flag = $xlat_save; $flush_refs = 1; $jrnl_rec_type = substr(($_), 13, 4); $jrnl_cont = substr(($_), 17, 2); $jrnl_title = substr(($_), 20, 51); $text = substr(($_), 20, 51); $jrnl_auth = $text; if ($convtext eq 'yes') { # # apply PDB typsetting codes if any to a line # { $lx_tl = length($text); $tx_tl = $text; $lostr = ''; for ($ix_tl = 1; $ix_tl <= $lx_tl; ++$ix_tl) { $cx_tl = substr($tx_tl, $ix_tl, 1); $cx_tl = substr(($lcaz . $cx_tl), index(($UCAZ . $cx_tl), $cx_tl), 1); $lostr = ($lostr . $cx_tl); } } $lstr = length($lostr); $mystr = ''; $pchar = ' '; for ($qtsi = 1; $qtsi <= $lstr; ++$qtsi) { $mychar = substr($lostr, $qtsi, 1); if ($pchar eq ' ' || $pchar eq ',' || $pchar eq '.' || $pchar eq '(' || $pchar eq '*' || $pchar eq '/') { $mychar = substr(($UCAZ . $mychar), index(($lcaz . $mychar), $mychar), 1); } if (($mychar ne '*' && $mychar ne "\$" && $mychar ne '/') || ($mychar eq $pchar)) { #??? $mystr = ($mystr . $mychar); } if ($pchar eq '/') { if ($mychar eq "\$" || $mychar eq '-') { $pchar = $mychar; } } else { $pchar = $mychar; } } $ret_val = $mystr; $text = $ret_val; } $jrnl_title = $text; $cit_flag = 1; $primary = 'yes'; $cit_refNum{$cit_flag} = 'primary'; if ($jrnl_rec_type eq 'TITL' && $jrnl_cont eq ' ') { $cit_title_1{$cit_flag} = $jrnl_title; $cit_title_2{$cit_flag} = ''; } if ($jrnl_rec_type eq 'TITL' && $jrnl_cont ne ' ') { if ($cit_title_2{$cit_flag} eq '') { $cit_title_2{$cit_flag} = $jrnl_title; } else { $cit_title_2{$cit_flag} = ($cit_title_2{$cit_flag} . "\n " . $jrnl_title); } } if ($jrnl_rec_type eq 'AUTH' && $jrnl_cont eq ' ') { $cit_auth_1{$cit_flag} = $jrnl_auth; $cit_auth_2{$cit_flag} = ''; } if ($jrnl_rec_type eq 'AUTH' && $jrnl_cont ne ' ') { $cit_auth_2{$cit_flag} = ($cit_auth_2{$cit_flag} . $jrnl_auth); } if ($jrnl_rec_type eq 'REF ' && $jrnl_cont eq ' ') { $jour_1{$cit_flag} = substr(($_), 20, 28); $jour_2{$cit_flag} = ''; $volu{$cit_flag} = substr(($_), 52, 4); $page{$cit_flag} = substr(($_), 57, 5); $year{$cit_flag} = substr(($_), 63, 4); $jrnl_pub_pub_1{$cit_flag} = '?'; } if ($jrnl_rec_type eq 'REF ' && $jrnl_cont ne ' ') { if ($jour_2{$cit_flag} eq '') { $jour_2{$cit_flag} = substr(($_), 20, 28); } else { $jour_2{$cit_flag} = ($jour_2{$cit_flag} . "\n " . substr(($_), 20, 28)); } } if ($jrnl_rec_type eq 'PUBL' && $jrnl_cont eq ' ') { $jrnl_pub_pub{$cit_flag} = substr(($_), 20, 51); $jour_1{$cit_flag} = '?'; if ($volu{$cit_flag} eq '' || $volu{$cit_flag} eq ' ') { $volu{$cit_flag} = '?'; } $page{$cit_flag} = '?'; $year{$cit_flag} = '?'; } if ($jrnl_rec_type eq 'PUBL' && $jrnl_cont eq ' ') { $jrnl_pub_pub_1{$cit_flag} = substr(($_), 20, 51); $jrnl_pub_pub_2{$cit_flag} = ''; } if ($jrnl_rec_type eq 'PUBL' && $jrnl_cont ne ' ') { if ($jrnl_pub_pub_2{$cit_flag} eq '') { $jrnl_pub_pub_2{$cit_flag} = substr(($_), 20, 51); } else { $jrnl_pub_pub_2{$cit_flag} = ($jrnl_pub_pub_2{$cit_flag} . "\n " . substr(($_), 20, 51)); } } if ($jrnl_rec_type eq 'REFN') { $astm{$cit_flag} = substr(($_), 25, 6); $country{$cit_flag} = substr(($_), 33, 2); $issn_isbn{$cit_flag} = substr(($_), 36, 4); if ($issn_isbn{$cit_flag} eq ' ' && substr($jour_1{$cit_flag}, 1, 9) ne 'TO BE PUB') { if ($jrnl_pub_pub_1{$cit_flag} ne '?') { $issn_isbn{$cit_flag} = 'ISBN'; } if ($volu{$cit_flag} eq '' || $volu{$cit_flag} eq '?' || $volu{$cit_flag} eq ' ') { $issn_isbn{$cit_flag} = 'ISBN'; } } if ($issn_isbn{$cit_flag} ne 'ISBN') { $isbn{$cit_flag} = '?'; $issn{$cit_flag} = substr(($_), 41, 25); } else { $issn{$cit_flag} = '?'; $isbn{$cit_flag} = substr(($_), 41, 25); } $csd{$cit_flag} = substr(($_), 67, 4); if ($csd{$cit_flag} eq ' ') { $csd{$cit_flag} = '?'; } } } #==================================================================== # Keyword KEYWRD # # keywrd_list [11-70] == _struct_keywords.text # from HEADER: # head_PDB_code == _struct_keywords.entry_id if ($first_field eq 'KEYWRD' || $first_field eq 'KEYWDS') { $xlat_flag = $xlat_save; $keywrd_list = substr(($_), 11, 60); # if ($keywrd_flag == 0) { $keywrd_tail = ''; $key_save{++$keywrd_flag} = "\n\n\n"; $key_save{++$keywrd_flag} = "#############################\n"; $key_save{++$keywrd_flag} = "# #\n"; $key_save{++$keywrd_flag} = "# STRUCT_KEYWORDS #\n"; $key_save{++$keywrd_flag} = "# #\n"; $key_save{++$keywrd_flag} = "#############################\n\n"; $key_save{++$keywrd_flag} = "loop_\n"; $key_save{++$keywrd_flag} = "_struct_keywords.entry_id\n"; $key_save{++$keywrd_flag} = "_struct_keywords.text\n"; } if ($key_tail ne '') { --$keywrd_flag; $keywrd_list = ($key_tail . $keywrd_list); } { $num_keys = (@keyslist = split($comma, $keywrd_list, 9999)); if ($num_keys >= 1 && $keyslist[$num_keys] eq '' && $comma eq ' ') { --$num_keys; } } for ($ii = 1; $ii <= $num_keys; ++$ii) { { $num_k_split = (@keys_split = split(' ', $keyslist[$ii], 9999)); if ($num_k_split >= 1 && $keys_split[$num_k_split] eq '' && ' ' eq ' ') { --$num_k_split; } } $keyslist[$ii] = ''; if ($num_k_split > 0) { $keyslist[$ii] = $keys_split[1]; for ($j = 2; $j <= $num_k_split; ++$j) { $keyslist[$ii] = ($keyslist[$ii] . ' ' . $keys_split[$j]); } $key_save{++$keywrd_flag} = sprintf("%4s '%s'\n", $head_PDB_code, $keyslist[$ii]); } $key_tail = ''; if ($num_keys > 0) { $key_tail = $keyslist[$num_keys]; } } } #=========================================================================== # Keyword LINK # # Introduced with the February 1996 PDB format # # lk_atom_beg [13-16] = _struct_conn.ptnr1_label_atom_id # lk_alt_loc_beg [17] = _struct_conn.ptnr1_label_alt_id # lk_res_name_beg [18-20] = _struct_conn.ptnr1_label_comp_id # lk_chain_id_beg [22] = _struct_conn.ptnr1_label_asym_id # lk_res_seq_num_beg [23-26] = _struct_conn.ptnr1_auth_seq_id # lk_icode_beg [27] append to # _struct_conn.ptnr1_auth_seq_id # lk_atom_end [43-46] = _struct_conn.ptnr2_label_atom_id # lk_alt_loc_end [47] = _struct_conn.ptnr2_label_alt_id # lk_res_name_end [48-50] = _struct_conn.ptnr2_label_comp_id # lk_chain_id_end [52] = _struct_conn.ptnr2_label_asym_id # lk_res_seq_num_end [53-56] = _struct_conn.ptnr2_auth_seq_id # lk_icode_end [57] append to # _struct_conn.ptnr2_auth_seq_id # lk_symop1 [60-65] = _struct_conn.ptnr1_symmetry # lk_symop2 [67-72] = _struct_conn.ptnr2_symmetry # if ($first_field eq 'LINK') { $xlat_flag = $xlat_save; $lk_atom_beg{++$link_flag} = substr(($_), 13, 4); $lk_alt_loc_beg{$link_flag} = substr(($_), 17, 1); $lk_res_name_beg{$link_flag} = substr(($_), 18, 3); $lk_chain_id_beg{$link_flag} = substr(($_), 22, 1); $lk_res_seq_num_beg{$link_flag} = substr(($_), 23, 5); $lk_atom_end{$link_flag} = substr(($_), 43, 4); $lk_alt_loc_end{$link_flag} = substr(($_), 47, 1); $lk_res_name_end{$link_flag} = substr(($_), 48, 3); $lk_chain_id_end{$link_flag} = substr(($_), 52, 1); $lk_res_seq_num_end{$link_flag} = substr(($_), 53, 5); $lk_symm_1{$link_flag} = substr(($_), 60, 6); $lk_symm_2{$link_flag} = substr(($_), 67, 6); { # # # fix up atom_name by squeezing out blanks in the middle # $temp_a_name = $lk_atom_beg{$link_flag}; if (substr($temp_a_name, 3, 1) eq ' ') { $temp_a_name = (substr($temp_a_name, 1, 2) . substr($temp_a_name, 4, 1) . ' '); } if (substr($temp_a_name, 2, 1) eq ' ') { $temp_a_name = (' ' . substr($temp_a_name, 1, 1) . substr($temp_a_name, 3, 2)); } if ($temp_a_name eq ' ') { $temp_a_name = ' . '; } $ret_val = $temp_a_name; $temp_name = $ret_val; } $lk_atom_beg{$link_flag} = $temp_name; { # # # fix up atom_name by squeezing out blanks in the middle # $temp_a_name = $lk_atom_end{$link_flag}; if (substr($temp_a_name, 3, 1) eq ' ') { $temp_a_name = (substr($temp_a_name, 1, 2) . substr($temp_a_name, 4, 1) . ' '); } if (substr($temp_a_name, 2, 1) eq ' ') { $temp_a_name = (' ' . substr($temp_a_name, 1, 1) . substr($temp_a_name, 3, 2)); } if ($temp_a_name eq ' ') { $temp_a_name = ' . '; } $ret_val = $temp_a_name; $temp_name = $ret_val; } $lk_atom_end{$link_flag} = $temp_name; if ($lk_alt_loc_beg{$link_flag} eq ' ') { $lk_alt_loc_beg{$link_flag} = '.'; } if ($lk_alt_loc_end{$link_flag} eq ' ') { $lk_alt_loc_end{$link_flag} = '.'; } if ($lk_res_name_beg{$link_flag} eq ' ') { $lk_res_name_beg{$link_flag} = ' . '; } if ($lk_res_name_end{$link_flag} eq ' ') { $lk_res_name_end{$link_flag} = ' . '; } if ($lk_chain_id_beg{$link_flag} eq ' ') { $lk_chain_id_beg{$link_flag} = $bcid; } if ($lk_chain_id_end{$link_flag} eq ' ') { $lk_chain_id_end{$link_flag} = $bcid; } if ($lk_symm_1{$link_flag} eq ' ') { $lk_symm_1{$link_flag} = ' . '; } else { $lk_symm_1{$link_flag} = (substr($lk_symm_1{$link_flag}, 1, 3) . '_' . substr($lk_symm_1{$link_flag}, 4, 3)); } if ($lk_symm_2{$link_flag} eq ' ') { $lk_symm_2{$link_flag} = ' . '; } else { $lk_symm_2{$link_flag} = (substr($lk_symm_2{$link_flag}, 1, 3) . '_' . substr($lk_symm_2{$link_flag}, 4, 3)); } } #============================================================================= # Keyword MASTER # # (used in END statement) if ($first_field eq 'MASTER') { $xlat_flag = $xlat_save; # parse totals $total_remark = substr(($_), 11, 5); $total_ftnote = substr(($_), 16, 5); $total_het = substr(($_), 21, 5); $total_helix = substr(($_), 26, 5); $total_sheet = substr(($_), 31, 5); $total_turn = substr(($_), 36, 5); $total_site = substr(($_), 41, 5); $total_o_s_m = substr(($_), 46, 5); $total_a_h = substr(($_), 51, 5); $total_ter = substr(($_), 56, 5); $total_conect = substr(($_), 61, 5); $total_seqres = substr(($_), 66, 5); } #============================================================================= # Keyword MODEL if ($first_field eq 'MODEL') { $xlat_flag = $xlat_save; $model_flag = $Fld[2]; $model_flags = 'yes'; if ($model_flag eq '') { $model_flag = '.'; } } #=========================================================================== # Keyword MODRES # # In the 1995 format, a new record, MODRES, was added to provide # "descriptions of modifications (e.g., chemical or post- # translational) to protein and nucleic acid residues. Inlcuded # are mapping between residue names given in a PDB entry # and standard residues." We treat this record as if it were # a SEQADV with no database specified. To complete the necessary # mmCIf category relationships, a dummy DBREF is created for # each chain involved. # # MODRES [1- 6] # modres_idcode [8-11] = idcode of this entry (not used) # modres_resName [13-15] = _struct_ref_seq_dif.mon_id # modres_chainID [ 17 ] = _struct_ref.biol_id # modres_seq [19-23] = combines seqNum and insertCode # used to derive # _struct_ref_seq_dif.seq_num # "." = _struct_ref.db_name # "." # = _struct_ref.db_code # modres_dbRes [25-27] = _struct_ref_seq_dif.db_mon_id # "." = _struct_ref_seq_dif.db_seq_num # modres_conflict [30-70] = _struct_ref_seq_dif.details if ($first_field eq 'MODRES') { $xlat_flag = $xlat_save; $modres_resName{++$modres_flag} = substr(($_), 13, 3); $modres_chainID{$modres_flag} = substr(($_), 17, 1); if ($modres_chainID{$modres_flag} eq ' ') { $modres_chainID{$modres_flag} = $bcid; } $modres_seq{$modres_flag} = substr(($_), 19, 5); $modres_dbRes{$modres_flag} = substr(($_), 25, 3); $modres_conflict{$modres_flag} = substr(($_), 30, 41); if ($modres_dbSeq{$modres_flag} eq ' ') { $modres_dbSeq{$modres_flag} = '.'; } $modres_conflict{$modres_flag} = ('Chain ' . $modres_chainID{$modres_flag} . ': ' . $modres_conflict{$modres_flag}); { $numx = (@dblist = split(' ', $modres_conflict{$modres_flag}, 9999)); if ($numx >= 1 && $dblist[$numx] eq '' && ' ' eq ' ') { --$numx; } } $modres_conflict{$modres_flag} = '.'; if ($numx > 0) { $modres_conflict{$modres_flag} = $dblist[1]; for ($j = 2; $j <= $numx; ++$j) { $modres_conflict{$modres_flag} = ($modres_conflict{$modres_flag} . ' ' . $dblist[$j]); } if ($numx > 1) { $modres_conflict{$modres_flag} = ("'" . $modres_conflict{$modres_flag} . "'"); } } } #============================================================================ # Keyword MTRIX # if ($first_field eq 'MTRIX1' || $first_field eq 'MTRIX2' || $first_field eq 'MTRIX3') { $xlat_flag = $xlat_save; $mtrix_col1 = substr(($_), 11, 10); $mtrix_col2 = substr(($_), 21, 10); $mtrix_col3 = substr(($_), 31, 10); $mtrix_col4 = substr(($_), 46, 10); # print loop headers if ($mtrix_flag eq '0') { #??? $mat_save{++$mtrix_flag} = "\n\n\n"; $mat_save{++$mtrix_flag} = "##############################\n"; $mat_save{++$mtrix_flag} = "# #\n"; $mat_save{++$mtrix_flag} = "# STRUCT_NCS_OPER #\n"; $mat_save{++$mtrix_flag} = "# #\n"; $mat_save{++$mtrix_flag} = "##############################\n"; $mat_save{++$mtrix_flag} = "\n"; $mat_save{++$mtrix_flag} = "# **** WARNING **** Domain information needed \n"; $warning_list{++$warning_flag} = "#=# STRUCT_NCS_OPER: Domain information needed\n"; $mat_save{++$mtrix_flag} = "\nloop_ \n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.id\n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.code\n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.matrix[1][1]\n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.matrix[1][2]\n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.matrix[1][3]\n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.vector[1] \n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.matrix[2][1]\n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.matrix[2][2]\n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.matrix[2][3]\n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.vector[2] \n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.matrix[3][1]\n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.matrix[3][2]\n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.matrix[3][3]\n"; $mat_save{++$mtrix_flag} = "_struct_ncs_oper.vector[3] \n"; } $mtrix_id = substr(($_), 8, 3); $mtrix_given = substr(($_), 60, 1); $x_given = 'generate'; if ($mtrix_given ne ' ') { $x_given = 'given'; } if ($first_field eq 'MTRIX1') { $mat_save{++$mtrix_flag} = sprintf("%3s %s\n", $mtrix_id, $x_given); } $mat_save{++$mtrix_flag} = ($mtrix_col1 . ' ' . $mtrix_col2 . ' ' . $mtrix_col3 . ' ' . $mtrix_col4 . "\n"); } #=========================================================================== # Keyword OBSLTE: see SPRSDE, below # #============================================================================ # Keyword ORIGX # # _database_pdb_matrix.origx[1][1] .. [3][3] # _database_pdb_matrix.origx_vector[1] .. _3 if ($first_field eq 'ORIGX1' || $first_field eq 'ORIGX2' || $first_field eq 'ORIGX3') { $xlat_flag = $xlat_save; $origx_col1 = substr(($_), 11, 10); $origx_col2 = substr(($_), 21, 10); $origx_col3 = substr(($_), 31, 10); $origx_col4 = substr(($_), 46, 10); # print loop headers if ($origx_flag eq '0') { #??? $om_save{++$origx_flag} = "\n\n\n"; $om_save{++$origx_flag} = "\nloop_ \n"; $om_save{++$origx_flag} = "_database_pdb_matrix.entry_id\n"; $om_save{++$origx_flag} = "_database_pdb_matrix.origx[1][1]\n"; $om_save{++$origx_flag} = "_database_pdb_matrix.origx[1][2]\n"; $om_save{++$origx_flag} = "_database_pdb_matrix.origx[1][3]\n"; $om_save{++$origx_flag} = "_database_pdb_matrix.origx_vector[1] \n"; $om_save{++$origx_flag} = "_database_pdb_matrix.origx[2][1]\n"; $om_save{++$origx_flag} = "_database_pdb_matrix.origx[2][2]\n"; $om_save{++$origx_flag} = "_database_pdb_matrix.origx[2][3]\n"; $om_save{++$origx_flag} = "_database_pdb_matrix.origx_vector[2] \n"; $om_save{++$origx_flag} = "_database_pdb_matrix.origx[3][1]\n"; $om_save{++$origx_flag} = "_database_pdb_matrix.origx[3][2]\n"; $om_save{++$origx_flag} = "_database_pdb_matrix.origx[3][3]\n"; $om_save{++$origx_flag} = "_database_pdb_matrix.origx_vector[3] \n"; $om_save{++$origx_flag} = (' ' . $head_PDB_code . "\n\n"); } $origx_id = substr(($_), 8, 3); $om_save{++$origx_flag} = ($origx_col1 . ' ' . $origx_col2 . ' ' . $origx_col3 . ' ' . $origx_col4 . "\n"); } #=========================================================================== # Keyword REMARK # # # print all citations from JNRL and REMARK 1 records # # First check if it is time to flush references # if ($flush_refs == 1) { $remark_number = substr(($_), 8, 3); if (($first_field eq 'REMARK' && $remark_number eq ' 2' && #??? $Fld[3] eq 'RESOLUTION.') || (($first_field ne 'REMARK') && ($first_field ne 'JRNL'))) { if ($jrnl_flag eq '1') { #??? printf (("\nloop_\n")); printf (("_citation.id\n")); printf (("_citation.coordinate_linkage\n")); printf (("_citation.title\n")); printf (("_citation.country\n")); printf (("_citation.journal_abbrev\n")); printf (("_citation.journal_volume\n")); printf (("_citation.journal_issue\n")); printf (("_citation.page_first\n")); printf (("_citation.year\n")); printf (("_citation.journal_id_ASTM\n")); printf (("_citation.journal_id_ISSN\n")); printf (("_citation.journal_id_CSD\n")); printf (("_citation.book_title\n")); printf (("_citation.book_publisher\n")); printf (("_citation.book_id_ISBN\n")); printf (("_citation.details\n")); ++$jrnl_flag; } $cit_decr = 0; if ($primary) { $cit_decr = 1; } for ($i = 1; $i <= $cit_flag; ++$i) { if ($i eq '1' && $primary) { #??? printf ((" \nprimary yes\n")); } else { printf " \n%3s no\n", $i - $cit_decr; if ($i - $cit_decr != $cit_refNum{$i}) { $warning_list{++$warning_flag} = sprintf("#=# CITATION: Mismatch PDB refNum %s to id %s\n", $cit_refNum{$i}, $i - $cit_decr); } } # for books # _citation.title == TITL (if present) # _citation.country == country[i] # _citation.journal_abbrev == ? # _citation.journal_volume == volu[i] # _citation.journal_issue == ? # _citation.page_first == ? # _citation.year == year[i] # _citation.journal_id_ASTM == ? # _citation.journal_id_ISSN == ? # _citation.journal_id_PDB == ? # _citation.book_title == REF (jour_x) # _citation.book_publisher == jrnl_pub_pub_x[i] # _citation.book_id_ISBN == isbn[i] # _citation.details == ? if ($jrnl_pub_pub_1{$i} ne '?' || $issn_isbn{$i} eq 'ISBN') { if ($country{$i} eq ' ') { $country{$i} = '?'; } if ($jour_1{$i} eq ' ') { $jour_1{$i} = '?'; } if ($volu{$i} eq ' ') { $volu{$i} = '?'; } if ($year{$i} eq ' ') { $year{$i} = '?'; } if ($page{$i} eq ' ') { $page{$i} = '?'; } if ($cit_title_1{$i}) { printf "; %s\n", $cit_title_1{$i}; } else { printf ((' ? ')); } if ($cit_title_2{$i}) { printf " %s\n", $cit_title_2{$i}; } if ($cit_title_1{$i}) { printf ((";\n")); } printf " %2s ? %3s ? %5s %4s ? ? %4s\n", $country{$i}, $volu{$i}, $page{$i}, $year{$i}, $csd{$i}; if (!$jour_2{$i}) { printf " '%28s' \n", $jour_1{$i}; } if ($jour_2{$i}) { printf "; %28s\n %s\n;\n", $jour_1{$i}, $jour_2{$i}; } if ($jrnl_pub_pub_1{$i}) { printf "; %s \n", $jrnl_pub_pub_1{$i}; } if ($jrnl_pub_pub_2{$i}) { printf " %s \n", $jrnl_pub_pub_2{$i}; } if ($jrnl_pub_pub_1{$i}) { printf ((";\n")); } printf " '%25s' ? \n", $isbn{$i}; } else { # for journals if ($cit_title_1{$i}) { printf "; %s\n", $cit_title_1{$i}; } if ($cit_title_2{$i}) { printf " %s\n", $cit_title_2{$i}; } if ($country{$i} eq ' ') { $country{$i} = '?'; } if ($volu{$i} eq ' ') { $volu{$i} = '?'; } if ($year{$i} eq ' ') { $year{$i} = '?'; } if ($page{$i} eq ' ') { $page{$i} = '?'; } if (!$jour_2{$i}) { printf ";\n %2s '%28s' %4s ? %5s %4s \n'%-15s' '%15s' %4s ? ? ? ?\n", $country{$i}, $jour_1{$i}, $volu{$i}, $page{$i}, $year{$i}, $astm{$i}, $issn{$i}, $csd{$i}; } if ($jour_2{$i}) { printf ((";\n %2s \n; %-28s\n %s\n;\n" . " %4s ? %5s %4s \n'%-15s' '%15s' %4s ? ? ? ?\n"), $country{$i}, $jour_1{$i}, $jour_2{$i}, $volu{$i}, $page{$i}, $year{$i}, $astm{$i}, $issn{$i}, $csd{$i}); } } } # Loop Editor List for ($i = 1; $i <= $cit_flag; ++$i) { if ($cit_edit_1{$i}) { printf (("\nloop_\n")); printf (("_citation_editor.citation_id\n")); printf (("_citation_editor.name\n")); last; } } for ($i = 1; $i <= $cit_flag; ++$i) { if ($cit_edit_1{$i}) { { $num_edit = (@editors = split($comma, $cit_edit_1{$i}, 9999)); if ($num_edit >= 1 && $editors[$num_edit] eq '' && $comma eq ' ') { --$num_edit; } } for ($ii = 1; $ii <= $num_edit; ++$ii) { { $num_e_split = (@e_split = split(' ', $editors[$ii], 9999)); if ($num_e_split >= 1 && $e_split[$num_e_split] eq '' && ' ' eq ' ') { --$num_e_split; } } $editors[$ii] = ''; if ($num_e_split > 0) { $editors[$ii] = $e_split[1]; for ($j = 2; $j <= $num_e_split; ++$j) { $editors[$ii] = ($editors[$ii] . ' ' . $e_split[$j]); } } if ($auth_convtext eq 'yes' || ($auth_convtext eq 'conditional' && $convtext eq 'yes')) { { # # produce a CIF-style name from a PDB name # # begin by applying typesetting codes if any # but always treat "-" and "'" as breaks for capitalization # in names # { $lx_tl = length($editors[$ii]); $tx_tl = $editors[$ii]; $lostr = ''; for ($ix_tl = 1; $ix_tl <= $lx_tl; ++$ix_tl) { $cx_tl = substr($tx_tl, $ix_tl, 1); $cx_tl = substr(($lcaz . $cx_tl), index(($UCAZ . $cx_tl), $cx_tl), 1); $lostr = ($lostr . $cx_tl); } } $lstr = length($lostr); $mystr = ''; $pchar = ' '; for ($qnsi = 1; $qnsi <= $lstr; ++$qnsi) { $mychar = substr($lostr, $qnsi, 1); if ($pchar eq ' ' || $pchar eq ',' || $pchar eq '.' || $pchar eq '-' || $pchar eq "'" || $pchar eq '(' || $pchar eq '*' || $pchar eq '/') { $mychar = substr(($UCAZ . $mychar), index(($lcaz . $mychar), $mychar), 1); } if (($mychar ne '*' && $mychar ne "\$" && $mychar ne '/') || ($mychar eq $pchar)) { #??? $mystr = ($mystr . $mychar); } if ($pchar eq '/') { if ($mychar eq "\$" || $mychar eq '-') { $pchar = $mychar; } # end if( mychar == "$" || mychar == "-" ) ; } else { $pchar = $mychar; } # end if( pchar == "/" ) ; } # end for( qnsi=1; qnsi <= lstr; ++qnsi) # # See if a specific replacement was given # { $lx_tu = length($mystr); $tx_tu = $mystr; $name_temp = ''; for ($ix_tu = 1; $ix_tu <= $lx_tu; ++$ix_tu) { $cx_tu = substr($tx_tu, $ix_tu, 1); $cx_tu = substr(($UCAZ . $cx_tu), index(($lcaz . $cx_tu), $cx_tu), 1); $name_temp = ($name_temp . $cx_tu); } } if ($rep_name{$name_temp} ne '') { $mystr = $rep_name{$name_temp}; # # See if there is a comma in place if so we are done # ; } if (index($mystr, $comma) != 0) { $ret_val = $mystr; } else { $nam_suf = ''; { $num_namp = (@x_namep = split(' ', $mystr, 9999)); if ($num_namp >= 1 && $x_namep[$num_namp] eq '' && ' ' eq ' ') { --$num_namp; } } if ($num_namp > 1) { { $lx_tu = length($x_namep[$num_namp]); $tx_tu = $x_namep[$num_namp]; $xtemp = ''; for ($ix_tu = 1; $ix_tu <= $lx_tu; ++$ix_tu) { $cx_tu = substr($tx_tu, $ix_tu, 1); $cx_tu = substr(($UCAZ . $cx_tu), index(($lcaz . $cx_tu), $cx_tu), 1); $xtemp = ($xtemp . $cx_tu); } } if ($rep_suffix{$xtemp} ne '') { if ($junior_on_last eq 'yes') { $x_namep[$num_namp] = $rep_suffix{$xtemp}; } else { $nam_suf = (' ' . $rep_suffix{$xtemp}); --$num_namp; } } $mystr = $x_namep[1]; for ($knamp = 2; $knamp <= $num_namp; ++$knamp) { $mystr = ($mystr . ' ' . $x_namep[$knamp]); } } # end if (num_namp > 1) $llname = length($mystr); $cc = ''; for ($kc = $llname - 1; $kc > 1; --$kc) { $cp = $cc; $cc = substr($mystr, $kc, 1); if ($cc eq '.') { #??? if ($cp ne ' ') { $mystr = (substr($mystr, $kc + 1, $llname - $kc) . $comma . ' ' . substr($mystr, 1, $kc)); } else { $mystr = (substr($mystr, $kc + 2, $llname - $kc - 1) . $comma . ' ' . substr($mystr, 1, $kc)); } # if (cp != " ") $kc = 0; } # end if (cc == ".") ; } # for (kc=llname-1; kc>1; --kc) $mystr = ($mystr . $nam_suf); $ret_val = $mystr; } # end if (index(mystr,comma) != 0 ) $editors[$ii] = $ret_val; } } } if ($cit_edit_2{$i}) { ($num_edit = $num_edit - 1); } for ($j = 1; $j <= $num_edit; ++$j) { if (($primary) && $i eq '1') { #??? printf " primary '%s' \n", $editors[$j]; } else { printf " %3s '%s' \n", $i - $cit_decr, $editors[$j]; } } } if ($cit_edit_2{$i}) { { $num_edit = (@editors = split($comma, $cit_edit_2{$i}, 9999)); if ($num_edit >= 1 && $editors[$num_edit] eq '' && $comma eq ' ') { --$num_edit; } } for ($ii = 1; $ii <= $num_edit; ++$ii) { { $num_e_split = (@e_split = split(' ', $editors[$ii], 9999)); if ($num_e_split >= 1 && $e_split[$num_e_split] eq '' && ' ' eq ' ') { --$num_e_split; } } $editors[$ii] = ''; if ($num_e_split > 0) { $editors[$ii] = $e_split[1]; for ($j = 2; $j <= $num_e_split; ++$j) { $editors[$ii] = ($editors[$ii] . ' ' . $e_split[$j]); } } if ($auth_convtext eq 'yes' || ($auth_convtext eq 'conditional' && $convtext eq 'yes')) { { # # produce a CIF-style name from a PDB name # # begin by applying typesetting codes if any # but always treat "-" and "'" as breaks for capitalization # in names # { $lx_tl = length($editors[$ii]); $tx_tl = $editors[$ii]; $lostr = ''; for ($ix_tl = 1; $ix_tl <= $lx_tl; ++$ix_tl) { $cx_tl = substr($tx_tl, $ix_tl, 1); $cx_tl = substr(($lcaz . $cx_tl), index(($UCAZ . $cx_tl), $cx_tl), 1); $lostr = ($lostr . $cx_tl); } } $lstr = length($lostr); $mystr = ''; $pchar = ' '; for ($qnsi = 1; $qnsi <= $lstr; ++$qnsi) { $mychar = substr($lostr, $qnsi, 1); if ($pchar eq ' ' || $pchar eq ',' || $pchar eq '.' || $pchar eq '-' || $pchar eq "'" || $pchar eq '(' || $pchar eq '*' || $pchar eq '/') { $mychar = substr(($UCAZ . $mychar), index(($lcaz . $mychar), $mychar), 1); } if (($mychar ne '*' && $mychar ne "\$" && $mychar ne '/') || ($mychar eq $pchar)) { #??? $mystr = ($mystr . $mychar); } if ($pchar eq '/') { if ($mychar eq "\$" || $mychar eq '-') { $pchar = $mychar; } # end if( mychar == "$" || mychar == "-" ) ; } else { $pchar = $mychar; } # end if( pchar == "/" ) ; } # end for( qnsi=1; qnsi <= lstr; ++qnsi) # # See if a specific replacement was given # { $lx_tu = length($mystr); $tx_tu = $mystr; $name_temp = ''; for ($ix_tu = 1; $ix_tu <= $lx_tu; ++$ix_tu) { $cx_tu = substr($tx_tu, $ix_tu, 1); $cx_tu = substr(($UCAZ . $cx_tu), index(($lcaz . $cx_tu), $cx_tu), 1); $name_temp = ($name_temp . $cx_tu); } } if ($rep_name{$name_temp} ne '') { $mystr = $rep_name{$name_temp}; # # See if there is a comma in place if so we are done # ; } if (index($mystr, $comma) != 0) { $ret_val = $mystr; } else { $nam_suf = ''; { $num_namp = (@x_namep = split(' ', $mystr, 9999)); if ($num_namp >= 1 && $x_namep[$num_namp] eq '' && ' ' eq ' ') { --$num_namp; } } if ($num_namp > 1) { { $lx_tu = length($x_namep[$num_namp]); $tx_tu = $x_namep[$num_namp]; $xtemp = ''; for ($ix_tu = 1; $ix_tu <= $lx_tu; ++$ix_tu) { $cx_tu = substr($tx_tu, $ix_tu, 1); $cx_tu = substr(($UCAZ . $cx_tu), index(($lcaz . $cx_tu), $cx_tu), 1); $xtemp = ($xtemp . $cx_tu); } } if ($rep_suffix{$xtemp} ne '') { if ($junior_on_last eq 'yes') { $x_namep[$num_namp] = $rep_suffix{$xtemp}; } else { $nam_suf = (' ' . $rep_suffix{$xtemp}); --$num_namp; } } $mystr = $x_namep[1]; for ($knamp = 2; $knamp <= $num_namp; ++$knamp) { $mystr = ($mystr . ' ' . $x_namep[$knamp]); } } # end if (num_namp > 1) $llname = length($mystr); $cc = ''; for ($kc = $llname - 1; $kc > 1; --$kc) { $cp = $cc; $cc = substr($mystr, $kc, 1); if ($cc eq '.') { #??? if ($cp ne ' ') { $mystr = (substr($mystr, $kc + 1, $llname - $kc) . $comma . ' ' . substr($mystr, 1, $kc)); } else { $mystr = (substr($mystr, $kc + 2, $llname - $kc - 1) . $comma . ' ' . substr($mystr, 1, $kc)); } # if (cp != " ") $kc = 0; } # end if (cc == ".") ; } # for (kc=llname-1; kc>1; --kc) $mystr = ($mystr . $nam_suf); $ret_val = $mystr; } # end if (index(mystr,comma) != 0 ) $editors[$ii] = $ret_val; } } } for ($j = 1; $j <= $num_edit; ++$j) { if (($primary) && $i eq '1') { #??? printf " primary '%s' \n", $editors[$j]; } else { printf " %3s '%s' \n", $i - $cit_decr, $editors[$j]; } } } } # Loop Author List for ($i = 1; $i <= $cit_flag; ++$i) { if ($cit_auth_1{$i}) { printf (("\nloop_\n")); printf (("_citation_author.citation_id\n")); printf (("_citation_author.name\n")); last; } } for ($i = 1; $i <= $cit_flag; ++$i) { if ($cit_auth_1{$i}) { { $num_auth = (@authors = split($comma, $cit_auth_1{$i}, 9999)); if ($num_auth >= 1 && $authors[$num_auth] eq '' && $comma eq ' ') { --$num_auth; } } for ($ii = 1; $ii <= $num_auth; ++$ii) { { $num_a_split = (@a_split = split(' ', $authors[$ii], 9999)); if ($num_a_split >= 1 && $a_split[$num_a_split] eq '' && ' ' eq ' ') { --$num_a_split; } } $authors[$ii] = ''; if ($num_a_split > 0) { $authors[$ii] = $a_split[1]; for ($j = 2; $j <= $num_a_split; ++$j) { $authors[$ii] = ($authors[$ii] . ' ' . $a_split[$j]); } } if ($auth_convtext eq 'yes' || ($auth_convtext eq 'conditional' && $convtext eq 'yes')) { { # # produce a CIF-style name from a PDB name # # begin by applying typesetting codes if any # but always treat "-" and "'" as breaks for capitalization # in names # { $lx_tl = length($authors[$ii]); $tx_tl = $authors[$ii]; $lostr = ''; for ($ix_tl = 1; $ix_tl <= $lx_tl; ++$ix_tl) { $cx_tl = substr($tx_tl, $ix_tl, 1); $cx_tl = substr(($lcaz . $cx_tl), index(($UCAZ . $cx_tl), $cx_tl), 1); $lostr = ($lostr . $cx_tl); } } $lstr = length($lostr); $mystr = ''; $pchar = ' '; for ($qnsi = 1; $qnsi <= $lstr; ++$qnsi) { $mychar = substr($lostr, $qnsi, 1); if ($pchar eq ' ' || $pchar eq ',' || $pchar eq '.' || $pchar eq '-' || $pchar eq "'" || $pchar eq '(' || $pchar eq '*' || $pchar eq '/') { $mychar = substr(($UCAZ . $mychar), index(($lcaz . $mychar), $mychar), 1); } if (($mychar ne '*' && $mychar ne "\$" && $mychar ne '/') || ($mychar eq $pchar)) { #??? $mystr = ($mystr . $mychar); } if ($pchar eq '/') { if ($mychar eq "\$" || $mychar eq '-') { $pchar = $mychar; } # end if( mychar == "$" || mychar == "-" ) ; } else { $pchar = $mychar; } # end if( pchar == "/" ) ; } # end for( qnsi=1; qnsi <= lstr; ++qnsi) # # See if a specific replacement was given # { $lx_tu = length($mystr); $tx_tu = $mystr; $name_temp = ''; for ($ix_tu = 1; $ix_tu <= $lx_tu; ++$ix_tu) { $cx_tu = substr($tx_tu, $ix_tu, 1); $cx_tu = substr(($UCAZ . $cx_tu), index(($lcaz . $cx_tu), $cx_tu), 1); $name_temp = ($name_temp . $cx_tu); } } if ($rep_name{$name_temp} ne '') { $mystr = $rep_name{$name_temp}; # # See if there is a comma in place if so we are done # ; } if (index($mystr, $comma) != 0) { $ret_val = $mystr; } else { $nam_suf = ''; { $num_namp = (@x_namep = split(' ', $mystr, 9999)); if ($num_namp >= 1 && $x_namep[$num_namp] eq '' && ' ' eq ' ') { --$num_namp; } } if ($num_namp > 1) { { $lx_tu = length($x_namep[$num_namp]); $tx_tu = $x_namep[$num_namp]; $xtemp = ''; for ($ix_tu = 1; $ix_tu <= $lx_tu; ++$ix_tu) { $cx_tu = substr($tx_tu, $ix_tu, 1); $cx_tu = substr(($UCAZ . $cx_tu), index(($lcaz . $cx_tu), $cx_tu), 1); $xtemp = ($xtemp . $cx_tu); } } if ($rep_suffix{$xtemp} ne '') { if ($junior_on_last eq 'yes') { $x_namep[$num_namp] = $rep_suffix{$xtemp}; } else { $nam_suf = (' ' . $rep_suffix{$xtemp}); --$num_namp; } } $mystr = $x_namep[1]; for ($knamp = 2; $knamp <= $num_namp; ++$knamp) { $mystr = ($mystr . ' ' . $x_namep[$knamp]); } } # end if (num_namp > 1) $llname = length($mystr); $cc = ''; for ($kc = $llname - 1; $kc > 1; --$kc) { $cp = $cc; $cc = substr($mystr, $kc, 1); if ($cc eq '.') { #??? if ($cp ne ' ') { $mystr = (substr($mystr, $kc + 1, $llname - $kc) . $comma . ' ' . substr($mystr, 1, $kc)); } else { $mystr = (substr($mystr, $kc + 2, $llname - $kc - 1) . $comma . ' ' . substr($mystr, 1, $kc)); } # if (cp != " ") $kc = 0; } # end if (cc == ".") ; } # for (kc=llname-1; kc>1; --kc) $mystr = ($mystr . $nam_suf); $ret_val = $mystr; } # end if (index(mystr,comma) != 0 ) $authors[$ii] = $ret_val; } } } if ($cit_auth_2{$i}) { ($num_auth = $num_auth - 1); } for ($j = 1; $j <= $num_auth; ++$j) { if (($primary) && $i eq '1') { #??? printf " primary '%s' \n", $authors[$j]; } else { printf " %3s '%s' \n", $i - $cit_decr, $authors[$j]; } } } if ($cit_auth_2{$i}) { { $num_auth = (@authors = split($comma, $cit_auth_2{$i}, 9999)); if ($num_auth >= 1 && $authors[$num_auth] eq '' && $comma eq ' ') { --$num_auth; } } for ($ii = 1; $ii <= $num_auth; ++$ii) { { $num_a_split = (@a_split = split(' ', $authors[$ii], 9999)); if ($num_a_split >= 1 && $a_split[$num_a_split] eq '' && ' ' eq ' ') { --$num_a_split; } } $authors[$ii] = ''; if ($num_a_split > 0) { $authors[$ii] = $a_split[1]; for ($j = 2; $j <= $num_a_split; ++$j) { $authors[$ii] = ($authors[$ii] . ' ' . $a_split[$j]); } } if ($auth_convtext eq 'yes' || ($auth_convtext eq 'conditional' && $convtext eq 'yes')) { { # # produce a CIF-style name from a PDB name # # begin by applying typesetting codes if any # but always treat "-" and "'" as breaks for capitalization # in names # { $lx_tl = length($authors[$ii]); $tx_tl = $authors[$ii]; $lostr = ''; for ($ix_tl = 1; $ix_tl <= $lx_tl; ++$ix_tl) { $cx_tl = substr($tx_tl, $ix_tl, 1); $cx_tl = substr(($lcaz . $cx_tl), index(($UCAZ . $cx_tl), $cx_tl), 1); $lostr = ($lostr . $cx_tl); } } $lstr = length($lostr); $mystr = ''; $pchar = ' '; for ($qnsi = 1; $qnsi <= $lstr; ++$qnsi) { $mychar = substr($lostr, $qnsi, 1); if ($pchar eq ' ' || $pchar eq ',' || $pchar eq '.' || $pchar eq '-' || $pchar eq "'" || $pchar eq '(' || $pchar eq '*' || $pchar eq '/') { $mychar = substr(($UCAZ . $mychar), index(($lcaz . $mychar), $mychar), 1); } if (($mychar ne '*' && $mychar ne "\$" && $mychar ne '/') || ($mychar eq $pchar)) { #??? $mystr = ($mystr . $mychar); } if ($pchar eq '/') { if ($mychar eq "\$" || $mychar eq '-') { $pchar = $mychar; } # end if( mychar == "$" || mychar == "-" ) ; } else { $pchar = $mychar; } # end if( pchar == "/" ) ; } # end for( qnsi=1; qnsi <= lstr; ++qnsi) # # See if a specific replacement was given # { $lx_tu = length($mystr); $tx_tu = $mystr; $name_temp = ''; for ($ix_tu = 1; $ix_tu <= $lx_tu; ++$ix_tu) { $cx_tu = substr($tx_tu, $ix_tu, 1); $cx_tu = substr(($UCAZ . $cx_tu), index(($lcaz . $cx_tu), $cx_tu), 1); $name_temp = ($name_temp . $cx_tu); } } if ($rep_name{$name_temp} ne '') { $mystr = $rep_name{$name_temp}; # # See if there is a comma in place if so we are done # ; } if (index($mystr, $comma) != 0) { $ret_val = $mystr; } else { $nam_suf = ''; { $num_namp = (@x_namep = split(' ', $mystr, 9999)); if ($num_namp >= 1 && $x_namep[$num_namp] eq '' && ' ' eq ' ') { --$num_namp; } } if ($num_namp > 1) { { $lx_tu = length($x_namep[$num_namp]); $tx_tu = $x_namep[$num_namp]; $xtemp = ''; for ($ix_tu = 1; $ix_tu <= $lx_tu; ++$ix_tu) { $cx_tu = substr($tx_tu, $ix_tu, 1); $cx_tu = substr(($UCAZ . $cx_tu), index(($lcaz . $cx_tu), $cx_tu), 1); $xtemp = ($xtemp . $cx_tu); } } if ($rep_suffix{$xtemp} ne '') { if ($junior_on_last eq 'yes') { $x_namep[$num_namp] = $rep_suffix{$xtemp}; } else { $nam_suf = (' ' . $rep_suffix{$xtemp}); --$num_namp; } } $mystr = $x_namep[1]; for ($knamp = 2; $knamp <= $num_namp; ++$knamp) { $mystr = ($mystr . ' ' . $x_namep[$knamp]); } } # end if (num_namp > 1) $llname = length($mystr); $cc = ''; for ($kc = $llname - 1; $kc > 1; --$kc) { $cp = $cc; $cc = substr($mystr, $kc, 1); if ($cc eq '.') { #??? if ($cp ne ' ') { $mystr = (substr($mystr, $kc + 1, $llname - $kc) . $comma . ' ' . substr($mystr, 1, $kc)); } else { $mystr = (substr($mystr, $kc + 2, $llname - $kc - 1) . $comma . ' ' . substr($mystr, 1, $kc)); } # if (cp != " ") $kc = 0; } # end if (cc == ".") ; } # for (kc=llname-1; kc>1; --kc) $mystr = ($mystr . $nam_suf); $ret_val = $mystr; } # end if (index(mystr,comma) != 0 ) $authors[$ii] = $ret_val; } } } for ($j = 1; $j <= $num_auth; ++$j) { if (($primary) && $i eq '1') { #??? printf " primary '%s' \n", $authors[$j]; } else { printf " %3s '%s' \n", $i - $cit_decr, $authors[$j]; } } } } $flush_refs = 0; } } if ($first_field eq 'REMARK') { $xlat_flag = $xlat_save; ++$all_remarks; # parse record $remark_number = substr(($_), 8, 3); $remark_cont = substr(($_), 17, 2); $jrnl_rec_type = substr(($_), 13, 4); $jrnl_refNum = substr(($_), 22, 49) + 0; $remark_text = substr(($_), 12, 60); $remark_cit_text = substr(($_), 20, 51); if ($convtext eq 'yes' && $jrnl_rec_type eq 'TITL') { { # # apply PDB typsetting codes if any to a line # { $lx_tl = length($remark_cit_text); $tx_tl = $remark_cit_text; $lostr = ''; for ($ix_tl = 1; $ix_tl <= $lx_tl; ++$ix_tl) { $cx_tl = substr($tx_tl, $ix_tl, 1); $cx_tl = substr(($lcaz . $cx_tl), index(($UCAZ . $cx_tl), $cx_tl), 1); $lostr = ($lostr . $cx_tl); } } $lstr = length($lostr); $mystr = ''; $pchar = ' '; for ($qtsi = 1; $qtsi <= $lstr; ++$qtsi) { $mychar = substr($lostr, $qtsi, 1); if ($pchar eq ' ' || $pchar eq ',' || $pchar eq '.' || $pchar eq '(' || $pchar eq '*' || $pchar eq '/') { $mychar = substr(($UCAZ . $mychar), index(($lcaz . $mychar), $mychar), 1); } if (($mychar ne '*' && $mychar ne "\$" && $mychar ne '/') || ($mychar eq $pchar)) { #??? $mystr = ($mystr . $mychar); } if ($pchar eq '/') { if ($mychar eq "\$" || $mychar eq '-') { $pchar = $mychar; } } else { $pchar = $mychar; } } $ret_val = $mystr; $remark_cit_text = $ret_val; } } $remark_test = substr(($_), 12, 3); # Deal with change of remark number $remark_test = substr(($_), 12, 3); if ($remark_number_old != $remark_number) { #??? $remark_flag = '0'; $remark_number_old = $remark_number; if ($remark_number ne ' 3' && $remark_number ne ' 2' && #??? #??? $remark_number ne ' 1') { #??? printf ((";\n\n")); } } # # As of the February 1996 PDB format, Remark 4 contains text # indicating the format with which the entry complies # if ($remark_number eq ' 4') { #??? if (substr($_, 17, 23) eq 'COMPLIES WITH FORMAT V.') { $compliance_level = substr($_, 41, 3); } } # type 1 remarks - additional references # data items identical to JRNL if ($remark_number eq ' 1' && $remark_test eq 'REF') { #??? ++$cit_flag; $flush_refs = 1; $cit_refNum{$cit_flag} = $jrnl_refNum; } if ($remark_number eq ' 1' && $remark_test ne 'REF' && #??? $remark_test ne ' ') { # Assign TITL records if ($jrnl_rec_type eq 'TITL' && $remark_cont eq ' ') { $cit_title_1{$cit_flag} = $remark_cit_text; $cit_title_2{$cit_flag} = ''; } if ($jrnl_rec_type eq 'TITL' && $remark_cont ne ' ') { if ($cit_title_2{$cit_flag} eq '') { $cit_title_2{$cit_flag} = $remark_cit_text; } else { $cit_title_2{$cit_flag} = ($cit_title_2{$cit_flag} . "\n " . $remark_cit_text); } } # Assign AUTH records if ($jrnl_rec_type eq 'AUTH' && $remark_cont eq ' ') { $cit_auth_1{$cit_flag} = $remark_cit_text; $cit_auth_2{$cit_flag} = ''; } if ($jrnl_rec_type eq 'AUTH' && $remark_cont ne ' ') { $cit_auth_2{$cit_flag} = ($cit_auth_2{$cit_flag} . $remark_cit_text); # Assign EDIT records ; } if ($jrnl_rec_type eq 'EDIT' && $remark_cont eq ' ') { $cit_edit_1{$cit_flag} = $remark_cit_text; $cit_edit_2{$cit_flag} = ''; } if ($jrnl_rec_type eq 'EDIT' && $remark_cont ne ' ') { $cit_edit_2{$cit_flag} = ($cit_edit_2{$cit_flag} . $remark_cit_text); # Assign REF records ; } if ($jrnl_rec_type eq 'REF ' && $remark_cont eq ' ') { $jour_1{$cit_flag} = substr(($_), 20, 28); $jour_2{$cit_flag} = ''; $volu{$cit_flag} = substr(($_), 52, 4); $page{$cit_flag} = substr(($_), 57, 5); $year{$cit_flag} = substr(($_), 63, 4); $jrnl_pub_pub_1{$cit_flag} = '?'; } if ($jrnl_rec_type eq 'REF ' && $remark_cont ne ' ') { if ($jour_2{$cit_flag} eq '') { $jour_2{$cit_flag} = substr(($_), 20, 28); } else { $jour_2{$cit_flag} = ($jour_2{$cit_flag} . "\n " . substr(($_), 20, 28)); } } # Assign PUBL records if ($jrnl_rec_type eq 'PUBL' && $remark_cont eq ' ') { $jrnl_pub_pub_1{$cit_flag} = substr(($_), 20, 51); $jrnl_pub_pub_2{$cit_flag} = ''; } if ($jrnl_rec_type eq 'PUBL' && $remark_cont ne ' ') { if ($jrnl_pub_pub_2{$cit_flag} eq '') { $jrnl_pub_pub_2{$cit_flag} = substr(($_), 20, 51); } else { $jrnl_pub_pub_2{$cit_flag} = ($jrnl_pub_pub_2{$cit_flag} . "\n " . substr(($_), 20, 51)); } } if ($jrnl_rec_type eq 'REFN') { $astm{$cit_flag} = substr(($_), 25, 6); $country{$cit_flag} = substr(($_), 33, 2); $issn_isbn{$cit_flag} = substr(($_), 36, 4); if ($issn_isbn{$cit_flag} eq ' ' && substr($jour_1{$cit_flag}, 1, 9) ne 'TO BE PUB') { if ($jrnl_pub_pub_1{$cit_flag} ne '?') { $issn_isbn{$cit_flag} = 'ISBN'; } if ($volu{$cit_flag} eq '' || $volu{$cit_flag} eq '?' || $volu{$cit_flag} eq ' ') { $issn_isbn{$cit_flag} = 'ISBN'; } } if ($issn_isbn{$cit_flag} ne 'ISBN') { $isbn{$cit_flag} = '?'; $issn{$cit_flag} = substr(($_), 41, 25); } else { $issn{$cit_flag} = '?'; $isbn{$cit_flag} = substr(($_), 41, 25); } $csd{$cit_flag} = substr(($_), 67, 4); if ($csd{$cit_flag} eq ' ') { $csd{$cit_flag} = '?'; } } ++$remark_flag; } # # type 2 remarks - resolution # if ($remark_number eq ' 2' && $Fld[3] eq 'RESOLUTION.') { #??? $resolution = substr(($_), 23, 45); { $num_split = (@res_split = split(' ', $resolution, 9999)); if ($num_split >= 1 && $res_split[$num_split] eq '' && ' ' eq ' ') { --$num_split; } } if ($res_split[1] ne 'NOT') { $res_flag = 1; printf "\n_reflns.entry_id %s \n", $head_PDB_code; printf "_reflns.d_resolution_high %8.2g \n", $res_split[1]; } ++$remark_flag; # Include _exptl templates if ($verbose eq 'yes') { printf (("_exptl.absorpt_coefficient_mu ? \n")); printf (("_exptl.absorpt_correction_T_max ? \n")); printf (("_exptl.absorpt_correction_type ? \n")); printf (("_exptl.absorpt_process_details ? \n\n")); printf (("_exptl_crystal.colour ? \n")); printf (("_exptl_crystal.density_diffrn ? \n")); printf (("_exptl_crystal.density_meas ? \n")); printf (("_exptl_crystal.density_meas_temp ? \n")); printf (("_exptl_crystal.density_method ? \n")); printf (("_exptl_crystal.description ? \n")); printf (("_exptl_crystal.F_000 ? \n")); printf (("_exptl_crystal_face.diffr_chi ? \n")); printf (("_exptl_crystal_face.diffr_kappa ? \n")); printf (("_exptl_crystal_face.diffr_phi ? \n")); printf (("_exptl_crystal_face.diffr_psi ? \n")); printf (("_exptl_crystal_face.index_h ? \n")); printf (("_exptl_crystal_face.index_k ? \n")); printf (("_exptl_crystal_face.index_l ? \n")); printf (("_exptl_crystal_face.perp_dist ? \n")); printf (("_exptl_crystal.id ? \n")); printf (("_exptl_crystal.preparation ? \n")); printf (("_exptl_crystal.size_max ? \n")); printf (("_exptl_crystal.size_mid ? \n")); printf (("_exptl_crystal.size_min ? \n")); printf (("_exptl_crystal.size_rad ? \n")); printf (("_exptl.crystals_number ? \n")); printf (("_exptl_crystal_grow.apparatus ? \n")); printf (("_exptl_crystal_grow.atmosphere ? \n")); printf (("_exptl_crystal_grow.crystal_id ? \n")); printf (("_exptl_crystal_grow.details ? \n")); printf (("_exptl_crystal_grow.method ? \n")); printf (("_exptl_crystal_grow.method_ref ? \n")); printf (("_exptl_crystal_grow.pH ? \n")); printf (("_exptl_crystal_grow.pressure ? \n")); printf (("_exptl_crystal_grow.seeding ? \n")); printf (("_exptl_crystal_grow.seeding_ref ? \n")); printf (("_exptl_crystal_grow.temp ? \n")); printf (("_exptl_crystal_grow.time ? \n")); printf (("\nloop_\n")); printf (("_exptl_crystal_grow_comp.crystal_id \n")); printf (("_exptl_crystal_grow_comp.id \n")); printf (("_exptl_crystal_grow_comp.conc \n")); printf (("_exptl_crystal_grow_comp.details \n")); printf (("_exptl_crystal_grow_comp.name \n")); printf (("_exptl_crystal_grow_comp.sol_id \n")); printf (("_exptl_crystal_grow_comp.volume \n")); printf ((" ? ? ? ? ? ? ? \n\n")); } # Include additional data items to be added on diffraction experiment. # A rigourous treatment of REMARK 3 might be able to parse some of # this info. if ($verbose eq 'yes') { printf (("_diffrn.ambient_temp ? \n")); printf (("_diffrn.ambient_pressure ? \n")); printf (("_diffrn_attenuator.code ? \n")); printf (("_diffrn_attenuator.scale ? \n")); printf (("_diffrn.details ? \n\n")); printf (("_diffrn.ambient_environment ? \n")); printf (("_diffrn.crystal_support ? \n")); printf (("_diffrn.crystal_treatment ? \n\n")); printf (("_diffrn_measurement.method ? \n")); printf (("_diffrn_measurement.details ? \n")); printf (("_diffrn_measurement.device ? \n")); printf (("_diffrn_measurement.device_details ? \n")); printf (("_diffrn_measurement.device_type ? \n")); printf (("_diffrn_orient_matrix.type ? \n")); printf (("_diffrn_orient_matrix.UB[1][1] ? \n")); printf (("_diffrn_orient_matrix.UB[1][2] ? \n")); printf (("_diffrn_orient_matrix.UB[1][3] ? \n")); printf (("_diffrn_orient_matrix.UB[2][1] ? \n")); printf (("_diffrn_orient_matrix.UB[2][2] ? \n")); printf (("_diffrn_orient_matrix.UB[2][3] ? \n")); printf (("_diffrn_orient_matrix.UB[3][1] ? \n")); printf (("_diffrn_orient_matrix.UB[3][2] ? \n")); printf (("_diffrn_orient_matrix.UB[3][3] ? \n\n")); printf (("loop_\n")); printf (("_diffrn_orient_refln.index_h\n")); printf (("_diffrn_orient_refln.index_k\n")); printf (("_diffrn_orient_refln.index_l\n")); printf (("_diffrn_orient_refln.angle_chi\n")); printf