Difference between revisions of "GPU610/TeamLean"

From CDOT Wiki
Jump to: navigation, search
(Assignment 1)
Line 7: Line 7:
 
== Progress ==
 
== Progress ==
 
=== Assignment 1 ===
 
=== Assignment 1 ===
 +
 +
'''For the first assignment we each profiled open source libraries, Lame - a audio compression utility and Squish an image compression utility.'''
 +
 +
'''
 +
'''
 +
 +
'''Alex - results for Lame'''
 +
 +
I have examined the LAME mp3 encoder to see if the process of encoding a wav file into an mp3 can be  parallelized.
 +
 +
The below source, code from the psymodel.c file, could potentially be parallelized.
 +
 +
From the sample runs and the given results in the profile, as the size of the wave file gets  larger, the percentage of the function that takes the most time actually goes down.  This might mean that it is not worth parallelizing. If my group chooses this project, we will have to examine this carefully.
 +
 +
  '''<u>SOURCE CODE</u>'''
 +
int L3psycho_anal_vbr(lame_internal_flags * gfc, const sample_t * const buffer[2], int  gr_out,
 +
    III_psy_ratio masking_ratio[2][2],
 +
    III_psy_ratio  masking_MS_ratio[2][2],
 +
    FLOAT percep_entropy[2], FLOAT percep_MS_entropy[2],
 +
    FLOAT energy[4], int blocktype_d[2]){
 +
    SessionConfig_t const *const cfg = &gfc->cfg;
 +
    PsyStateVar_t *const psv = &gfc->sv_psy;
 +
    PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l;
 +
    PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->s;
 +
    plotting_data *plt = cfg->analysis ? gfc->pinfo  : 0;
 +
    III_psy_xmin last_thm[4];
 +
    /* fft and energy calculation  */
 +
    FLOAT(*wsamp_l)[BLKSIZE];
 +
    FLOAT(*wsamp_s)[3][BLKSIZE_s];
 +
    FLOAT  fftenergy[HBLKSIZE];
 +
    FLOAT  fftenergy_s[3][HBLKSIZE_s];
 +
    FLOAT  wsamp_L[2][BLKSIZE];
 +
    FLOAT  wsamp_S[2][3][BLKSIZE_s];
 +
    FLOAT  eb[4][CBANDS], thr[4] [CBANDS];
 +
    FLOAT  sub_short_factor[4][3];
 +
    FLOAT  thmm;
 +
    FLOAT const pcfact = 0.6f;
 +
    FLOAT  const ath_factor = (cfg->msfix > 0.f) ? (cfg->ATH_offset_factor * gfc->ATH->adjust_factor) : 1.f;
 +
    const  FLOAT(*const_eb)[CBANDS] = (const FLOAT(*)[CBANDS]) eb;
 +
    const  FLOAT(*const_fftenergy_s) [HBLKSIZE_s] = (const FLOAT(*)[HBLKSIZE_s]) fftenergy_s;
 +
    /* block type  */
 +
    int    ns_attacks[4] [4] = { {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} };
 +
    int    uselongblock[2];
 +
    /*  usual variables like loop indices, etc..    */
 +
    int    chn, sb, sblock;
 +
    /* chn=2 and 3 = Mid and  Side channels */
 +
    int const n_chn_psy = (cfg->mode == JOINT_STEREO) ? 4 : cfg->channels_out;
 +
    memcpy(&last_thm[0], &psv->thm[0], sizeof(last_thm));
 +
    vbrpsy_attack_detection(gfc, buffer, gr_out,  masking_ratio, masking_MS_ratio, energy,
 +
                            sub_short_factor, ns_attacks,  uselongblock);
 +
    vbrpsy_compute_block_type(cfg, uselongblock);
 +
    /* LONG BLOCK CASE */
 +
    {
 +
        for  (chn = 0; chn < n_chn_psy; chn++) {
 +
            int const ch01 = chn & 0x01;
 +
            wsamp_l = wsamp_L  + ch01;
 +
            vbrpsy_compute_fft_l(gfc, buffer, chn, gr_out, fftenergy, wsamp_l);
 +
            vbrpsy_compute_loudness_approximation_l(gfc, gr_out, chn, fftenergy);
 +
            vbrpsy_compute_masking_l(gfc, fftenergy, eb[chn], thr[chn], chn);
 +
        }
 +
        if (cfg->mode ==  JOINT_STEREO) {
 +
            if ((uselongblock[0] + uselongblock[1]) == 2) {
 +
                vbrpsy_compute_MS_thresholds(const_eb, thr, gdl->mld_cb, gfc->ATH->cb_l,
 +
                                              ath_factor, cfg->msfix, gdl->npart);
 +
            }
 +
        }
 +
        /* TODO: apply adaptive  ATH masking here ?? */
 +
        for (chn = 0; chn < n_chn_psy; chn++) {
 +
            convert_partition2scalefac_l(gfc, eb[chn], thr[chn], chn);
 +
            convert_partition2scalefac_l_to_s (gfc, eb[chn], thr[chn], chn);
 +
        }
 +
    }
 +
    /* SHORT BLOCKS CASE */
 +
    {
 +
        int const  force_short_block_calc = gfc->cd_psy->force_short_block_calc;
 +
        for (sblock = 0; sblock < 3;  sblock++) {
 +
            for (chn = 0; chn < n_chn_psy; ++chn) {
 +
                int const ch01 = chn &  0x01;
 +
                if (uselongblock[ch01] && !force_short_block_calc) {
 +
                    vbrpsy_skip_masking_s(gfc, chn, sblock);
 +
                }
 +
                else {
 +
                    /*  compute masking thresholds for short blocks */
 +
                    wsamp_s = wsamp_S + ch01;
 +
                    vbrpsy_compute_fft_s(gfc, buffer, chn, sblock, fftenergy_s, wsamp_s);
 +
                    vbrpsy_compute_masking_s(gfc, const_fftenergy_s, eb[chn], thr[chn], chn,
 +
                                              sblock);
 +
                }
 +
            }
 +
            if (cfg->mode == JOINT_STEREO) {
 +
                if ((uselongblock[0] + uselongblock[1]) == 0) {
 +
                    vbrpsy_compute_MS_thresholds (const_eb, thr, gds->mld_cb, gfc->ATH->cb_s,
 +
                                                ath_factor,  cfg->msfix, gds->npart);
 +
                }
 +
            }
 +
            /* TODO: apply adaptive ATH masking  here ?? */
 +
            for (chn = 0; chn < n_chn_psy; ++chn) {
 +
                int const ch01 = chn &  0x01;
 +
                if (!uselongblock[ch01] || force_short_block_calc) {
 +
                    convert_partition2scalefac_s(gfc, eb[chn], thr[chn], chn, sblock);
 +
                }
 +
            }
 +
        }
 +
        /****  short block pre-echo control  ****/
 +
        for (chn = 0; chn < n_chn_psy; chn++) {
 +
            for (sb = 0; sb < SBMAX_s; sb++) {
 +
                FLOAT  new_thmm[3], prev_thm, t1, t2;
 +
                for (sblock = 0; sblock < 3; sblock++) {
 +
                    thmm = psv->thm[chn].s[sb][sblock];
 +
                    thmm *= NS_PREECHO_ATT0;
 +
                    t1 = t2 = thmm;
 +
                    if  (sblock > 0) {
 +
                        prev_thm = new_thmm[sblock - 1];
 +
                    }
 +
                    else {
 +
                        prev_thm = last_thm[chn].s[sb][2];
 +
                    }
 +
                    if (ns_attacks[chn][sblock] >= 2 || ns_attacks[chn][sblock + 1] == 1) {
 +
                        t1  = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT1 * pcfact);
 +
                    }
 +
                    thmm =  Min(t1, thmm);
 +
                    if (ns_attacks[chn][sblock] == 1) {
 +
                        t2 =  NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT2 * pcfact);
 +
                    }
 +
                    else if  ((sblock == 0 && psv->last_attacks[chn] == 3)
 +
                            || (sblock > 0 && ns_attacks [chn][sblock - 1] == 3)) { /* 2nd preceeding block */
 +
                        switch (sblock) {
 +
                        case 0:
 +
                            prev_thm = last_thm[chn].s[sb][1];
 +
                            break;
 +
                        case 1:
 +
                            prev_thm = last_thm[chn].s[sb][2];
 +
                            break;
 +
                        case 2:
 +
                            prev_thm =  new_thmm[0];
 +
                            break;
 +
                        }
 +
                        t2 =  NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT2 * pcfact);
 +
                    }
 +
                    thmm = Min (t1, thmm);
 +
                    thmm = Min(t2, thmm);
 +
                    /* pulse like signal detection  for fatboy.wav and so on */
 +
                    thmm *= sub_short_factor[chn][sblock];
 +
                    new_thmm[sblock] = thmm;
 +
                }
 +
                for (sblock = 0; sblock < 3; sblock++) {
 +
                    psv->thm[chn].s[sb][sblock] = new_thmm[sblock];
 +
                }
 +
            }
 +
        }
 +
    }
 +
    for (chn = 0; chn < n_chn_psy; chn++) {
 +
        psv->last_attacks[chn] = ns_attacks[chn][2];
 +
    }
 +
    /***************************************************************
 +
    * determine final block type
 +
    ***************************************************************/
 +
    vbrpsy_apply_block_type(psv, cfg- >channels_out, uselongblock, blocktype_d);
 +
    /*********************************************************************
 +
    * compute the value of PE to  return ... no delay and advance
 +
    *********************************************************************/
 +
    for (chn = 0; chn < n_chn_psy;  chn++) {
 +
        FLOAT  *ppe;
 +
        int    type;
 +
        III_psy_ratio const *mr;
 +
        if (chn > 1) {
 +
            ppe = percep_MS_entropy - 2;
 +
            type = NORM_TYPE;
 +
            if (blocktype_d[0] ==  SHORT_TYPE || blocktype_d[1] == SHORT_TYPE)
 +
                type = SHORT_TYPE;
 +
            mr =  &masking_MS_ratio[gr_out][chn - 2];
 +
        }
 +
        else {
 +
            ppe = percep_entropy;
 +
            type = blocktype_d[chn];
 +
            mr = &masking_ratio[gr_out][chn];
 +
        }
 +
        if (type ==  SHORT_TYPE) {
 +
            ppe[chn] = pecalc_s(mr, gfc->sv_qnt.masking_lower);
 +
        }
 +
        else {
 +
            ppe[chn] = pecalc_l(mr, gfc->sv_qnt.masking_lower);
 +
        }
 +
        if (plt) {
 +
            plt->pe [gr_out][chn] = ppe[chn];
 +
        }
 +
    }
 +
    return 0;
 +
}

Revision as of 18:49, 12 April 2013

Team Members

  1. Alex D'Amico
  2. Barry Tulchinsky

Email All

Progress

Assignment 1

For the first assignment we each profiled open source libraries, Lame - a audio compression utility and Squish an image compression utility.

Alex - results for Lame

I have examined the LAME mp3 encoder to see if the process of encoding a wav file into an mp3 can be parallelized.

The below source, code from the psymodel.c file, could potentially be parallelized.

From the sample runs and the given results in the profile, as the size of the wave file gets larger, the percentage of the function that takes the most time actually goes down. This might mean that it is not worth parallelizing. If my group chooses this project, we will have to examine this carefully.

 SOURCE CODE

int L3psycho_anal_vbr(lame_internal_flags * gfc, const sample_t * const buffer[2], int gr_out,

   III_psy_ratio masking_ratio[2][2],
   III_psy_ratio  masking_MS_ratio[2][2],
   FLOAT percep_entropy[2], FLOAT percep_MS_entropy[2],
   FLOAT energy[4], int blocktype_d[2]){
   SessionConfig_t const *const cfg = &gfc->cfg;
   PsyStateVar_t *const psv = &gfc->sv_psy;
   PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l;
   PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->s;
   plotting_data *plt = cfg->analysis ? gfc->pinfo  : 0;
   III_psy_xmin last_thm[4];
   /* fft and energy calculation   */
   FLOAT(*wsamp_l)[BLKSIZE];
   FLOAT(*wsamp_s)[3][BLKSIZE_s];
   FLOAT   fftenergy[HBLKSIZE];
   FLOAT   fftenergy_s[3][HBLKSIZE_s];
   FLOAT   wsamp_L[2][BLKSIZE];
   FLOAT   wsamp_S[2][3][BLKSIZE_s];
   FLOAT   eb[4][CBANDS], thr[4] [CBANDS];
   FLOAT   sub_short_factor[4][3];
   FLOAT   thmm;
   FLOAT const pcfact = 0.6f;
   FLOAT  const ath_factor = (cfg->msfix > 0.f) ? (cfg->ATH_offset_factor * gfc->ATH->adjust_factor) : 1.f;
   const   FLOAT(*const_eb)[CBANDS] = (const FLOAT(*)[CBANDS]) eb;
   const   FLOAT(*const_fftenergy_s) [HBLKSIZE_s] = (const FLOAT(*)[HBLKSIZE_s]) fftenergy_s;
   /* block type  */
   int     ns_attacks[4] [4] = { {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} };
   int     uselongblock[2];
   /*  usual variables like loop indices, etc..    */
   int     chn, sb, sblock;
   /* chn=2 and 3 = Mid and  Side channels */
   int const n_chn_psy = (cfg->mode == JOINT_STEREO) ? 4 : cfg->channels_out;
    memcpy(&last_thm[0], &psv->thm[0], sizeof(last_thm));
   vbrpsy_attack_detection(gfc, buffer, gr_out,  masking_ratio, masking_MS_ratio, energy,
                           sub_short_factor, ns_attacks,  uselongblock);
   vbrpsy_compute_block_type(cfg, uselongblock);
   /* LONG BLOCK CASE */
   {
       for  (chn = 0; chn < n_chn_psy; chn++) {
           int const ch01 = chn & 0x01;
           wsamp_l = wsamp_L  + ch01;
           vbrpsy_compute_fft_l(gfc, buffer, chn, gr_out, fftenergy, wsamp_l);
           vbrpsy_compute_loudness_approximation_l(gfc, gr_out, chn, fftenergy);
           vbrpsy_compute_masking_l(gfc, fftenergy, eb[chn], thr[chn], chn);
       }
       if (cfg->mode ==  JOINT_STEREO) {
           if ((uselongblock[0] + uselongblock[1]) == 2) {
                vbrpsy_compute_MS_thresholds(const_eb, thr, gdl->mld_cb, gfc->ATH->cb_l,
                                             ath_factor, cfg->msfix, gdl->npart);
           }
       }
       /* TODO: apply adaptive  ATH masking here ?? */
       for (chn = 0; chn < n_chn_psy; chn++) {
            convert_partition2scalefac_l(gfc, eb[chn], thr[chn], chn);
           convert_partition2scalefac_l_to_s (gfc, eb[chn], thr[chn], chn);
       }
   }
   /* SHORT BLOCKS CASE */
   {
       int const  force_short_block_calc = gfc->cd_psy->force_short_block_calc;
       for (sblock = 0; sblock < 3;  sblock++) {
           for (chn = 0; chn < n_chn_psy; ++chn) {
               int const ch01 = chn &  0x01;
               if (uselongblock[ch01] && !force_short_block_calc) {
                    vbrpsy_skip_masking_s(gfc, chn, sblock);
               }
               else {
                   /*  compute masking thresholds for short blocks */
                   wsamp_s = wsamp_S + ch01;
                    vbrpsy_compute_fft_s(gfc, buffer, chn, sblock, fftenergy_s, wsamp_s);
                    vbrpsy_compute_masking_s(gfc, const_fftenergy_s, eb[chn], thr[chn], chn,
                                             sblock);
               }
           }
           if (cfg->mode == JOINT_STEREO) {
                if ((uselongblock[0] + uselongblock[1]) == 0) {
                   vbrpsy_compute_MS_thresholds (const_eb, thr, gds->mld_cb, gfc->ATH->cb_s,
                                                ath_factor,  cfg->msfix, gds->npart);
               }
           }
           /* TODO: apply adaptive ATH masking  here ?? */
           for (chn = 0; chn < n_chn_psy; ++chn) {
               int const ch01 = chn &  0x01;
               if (!uselongblock[ch01] || force_short_block_calc) {
                    convert_partition2scalefac_s(gfc, eb[chn], thr[chn], chn, sblock);
               }
           }
        }
       /****   short block pre-echo control   ****/
       for (chn = 0; chn < n_chn_psy; chn++) {
            for (sb = 0; sb < SBMAX_s; sb++) {
               FLOAT   new_thmm[3], prev_thm, t1, t2;
                for (sblock = 0; sblock < 3; sblock++) {
                   thmm = psv->thm[chn].s[sb][sblock];
                    thmm *= NS_PREECHO_ATT0;
                   t1 = t2 = thmm;
                   if  (sblock > 0) {
                       prev_thm = new_thmm[sblock - 1];
                   }
                    else {
                       prev_thm = last_thm[chn].s[sb][2];
                   }
                    if (ns_attacks[chn][sblock] >= 2 || ns_attacks[chn][sblock + 1] == 1) {
                       t1  = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT1 * pcfact);
                   }
                   thmm =  Min(t1, thmm);
                   if (ns_attacks[chn][sblock] == 1) {
                       t2 =  NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT2 * pcfact);
                   }
                   else if  ((sblock == 0 && psv->last_attacks[chn] == 3)
                            || (sblock > 0 && ns_attacks [chn][sblock - 1] == 3)) { /* 2nd preceeding block */
                       switch (sblock) {
                        case 0:
                           prev_thm = last_thm[chn].s[sb][1];
                            break;
                       case 1:
                           prev_thm = last_thm[chn].s[sb][2];
                            break;
                       case 2:
                           prev_thm =  new_thmm[0];
                           break;
                       }
                       t2 =  NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT2 * pcfact);
                   }
                   thmm = Min (t1, thmm);
                   thmm = Min(t2, thmm);
                   /* pulse like signal detection  for fatboy.wav and so on */
                   thmm *= sub_short_factor[chn][sblock];
                    new_thmm[sblock] = thmm;
               }
               for (sblock = 0; sblock < 3; sblock++) {
                    psv->thm[chn].s[sb][sblock] = new_thmm[sblock];
               }
           }
       }
   }
    for (chn = 0; chn < n_chn_psy; chn++) {
       psv->last_attacks[chn] = ns_attacks[chn][2];
   }
    /***************************************************************
   * determine final block type
    ***************************************************************/
   vbrpsy_apply_block_type(psv, cfg- >channels_out, uselongblock, blocktype_d);
    /*********************************************************************
   * compute the value of PE to  return ... no delay and advance
    *********************************************************************/
   for (chn = 0; chn < n_chn_psy;  chn++) {
       FLOAT  *ppe;
       int     type;
       III_psy_ratio const *mr;
       if (chn > 1) {
            ppe = percep_MS_entropy - 2;
           type = NORM_TYPE;
           if (blocktype_d[0] ==  SHORT_TYPE || blocktype_d[1] == SHORT_TYPE)
               type = SHORT_TYPE;
           mr =  &masking_MS_ratio[gr_out][chn - 2];
       }
       else {
           ppe = percep_entropy;
            type = blocktype_d[chn];
           mr = &masking_ratio[gr_out][chn];
       }
       if (type ==  SHORT_TYPE) {
           ppe[chn] = pecalc_s(mr, gfc->sv_qnt.masking_lower);
       }
       else {
            ppe[chn] = pecalc_l(mr, gfc->sv_qnt.masking_lower);
       }
       if (plt) {
           plt->pe [gr_out][chn] = ppe[chn];
       }
   }
   return 0;

}