



// https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE129194&targ=self&form=text&view=quick

const sample = `
^SERIES = GSE129194
!Series_title = m6A in mRNA coding regions promotes translation via the RNA helicase-containing YTHDC2
!Series_geo_accession = GSE129194
!Series_status = Public on Jul 01 2019
!Series_submission_date = Apr 02 2019
!Series_last_update_date = Dec 03 2019
!Series_pubmed_id = 31767846
!Series_summary = Dynamic mRNA modification in the form of N6-methyladenosine (m6A) adds considerable richness and sophistication to gene regulation. The m6A mark is asymmetrically distributed along mature mRNAs, with a strong preference around the stop codon and 3’UTR. Nevertheless, approximately 35% of m6A residues are located within the coding region (CDS). It has been suggested that methylation in CDS slows down translation elongation by interfering the decoding process. However, neither the decoding feature of endogenous mRNAs nor the physiological significance of CDS m6A has been clearly defined. By integrating Ribo-seq and m6A-seq data sets, we found that CDS m6A methylation leads to ribosome pausing in a codon-specific manner. Unexpectedly, removing CDS m6A modification from these transcripts results in a further decrease of translation. A systemic analysis of RNA structural datasets revealed that CDS m6A methylation positively regulates translation by resolving mRNA secondary structures. We further demonstrate that the elongation-promoting effect of CDS methylation requires the RNA helicase-containing m6A reader YTHDC2. Our findings established the physiological significance of CDS methylation and uncovered non-overlapping function of m6A reader proteins.
!Series_overall_design = Using Ribo-seq to investigate the effect of m6A/m6A reader on translation efficiency
!Series_type = Expression profiling by high throughput sequencing
!Series_type = Other
!Series_contributor = Yuanhui,,Mao
!Series_contributor = Shu-Bing,,Qian
!Series_sample_id = GSM3702256
!Series_sample_id = GSM3702257
!Series_sample_id = GSM3702258
!Series_sample_id = GSM3702259
!Series_sample_id = GSM3702260
!Series_sample_id = GSM3702261
!Series_sample_id = GSM3702262
!Series_sample_id = GSM3702263
!Series_sample_id = GSM3702264
!Series_sample_id = GSM3702265
!Series_sample_id = GSM3702266
!Series_sample_id = GSM3702267
!Series_contact_name = Yuanhui,,Mao
!Series_contact_email = maoyuanhui123@gmail.com
!Series_contact_institute = Cornell University
!Series_contact_address = 526 Campus Road, Cornell University
!Series_contact_city = Ithaca
!Series_contact_state = New York
!Series_contact_zip/postal_code = 14850
!Series_contact_country = USA
!Series_supplementary_file = ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE129nnn/GSE129194/suppl/GSE129194_RAW.tar
!Series_platform_id = GPL16791
!Series_platform_id = GPL17021
!Series_platform_organism = Homo sapiens
!Series_platform_organism = Mus musculus
!Series_platform_taxid = 9606
!Series_platform_taxid = 10090
!Series_sample_organism = Homo sapiens
!Series_sample_organism = Mus musculus
!Series_sample_taxid = 9606
!Series_sample_taxid = 10090
!Series_relation = BioProject: https://www.ncbi.nlm.nih.gov/bioproject/PRJNA530508
!Series_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRP190145
`

export const parseRiboSeqStudy = softDocInString => {
    return  _
        .chain(softDocInString.split("\n"))
        .map(l => {
            const idxEq = l.indexOf("=")

            if(idxEq === -1) {
                return null
            }

            return [
                l.substr(0, idxEq).trim(),
                l.substr(idxEq+1,l.length).trim()
            ]
        })
        .filter(x=>x)
        .groupBy(([k,v]) => k)
        .mapValues(vals => vals.map(p => p[1]))
        .value()
}