0 1 23
play

!"#$%&'%()#*&$+%,'-.#-/0%1(,23% - PDF document

Marco Turchi turchi@fbk.eu Statistical Machine Translation - ICT Doctoral School 19 March 2013 Quality Estimation Turchi, FBK 1 !"#$%&'%()#*&$+%,'-.#-/0%1(,23% 4%5&$%/6%"&'$/7+%


  1. Marco Turchi turchi@fbk.eu Statistical Machine Translation - ICT Doctoral School – 19 March 2013 Quality Estimation – Turchi, FBK 1 • !"#$%&'%()#*&$+%,'-.#-/0%1(,23% – 4%5&$%/6%"&'$/7+% – ()#*&$+%809&:#$/7'% – ()#*&$+%;:/7<'% – =<#70&0>%4*>/7&$".'% – ?#$#'<$% – ,@#*)#-/0%A<$7&:'% – BC<0%8'')<'% • ()#*&$+%,'-.#-/0%#$D% – !/79%=<@<*% – ?/:).<0$%=<@<*% • E/F%:#0%F<%)'<%(,%&0%7<#*%#CC*&:#-/0'3% Quality Estimation – Turchi, FBK 2

  2. • 4)$/.#-:%,@#*)#-/0%/6%AG%/)$C)$% G7#0'*#$<9% ;AG% G<'$%;<$%% %;/)7:<% =#0>H% G<'$%;<$% Quality Estimation – Turchi, FBK 3 • I%./7<%9&J:)*$%&6%$"<7<%#7<%./7<%;AG%'+'$<.'KKK% ;AG% ;AG% ;AG% G<'$%;<$'% ;AG% ;AG% %;/)7:<% =#0>H% G<'$%;<$% Quality Estimation – Turchi, FBK 4

  3. • E).#0%C7<'<0:<% G7#0'*#$<9% ;AG% G<'$%;<$%% • I%5)$%&$%&'D% • ')5L<:-@<% %;/)7:<% • -.<%:/0').&0>% =#0>H% G<'$%;<$% • 9&J:)*$% • 0/$%7<C*&:#5*<% Quality Estimation – Turchi, FBK 5 G7#0'*#$<9% ;AG% G<'$%;<$%% N=,O% !,M% %;/)7:<% M<6<7<0:<% G,M% =#0>H% ;:/7<% G<'$%;<$%% A<$</7% G<'$%;<$% P8;G% I% G<'$%;<$% • M<6<7<0:<%G<'$%'<$D%").#0%$7#0'*#-/0%/6% $"<%'/)7:<%*#0>)#><%$<'$%'<$%&0$/%$"<% $#7><$%*#0>)#><H% Quality Estimation – Turchi, FBK 6

  4. • =#7><%#./)0$%/6D% – %C#7#**<*%$<'$%'<$'%&0%9&Q<7<0$%*#0>)#><'D% • !/7R'"/C%/6%A#:"&0<%G7#0'*#-/0%1!AG2%STTUV%IV%STWXV% ,)7/C#7*V%4Y)&'%Z/..)0&$#&7<V%P8;G%:#.C#&>0V%I% – 4)$/.#-:%;:/7&0>%A<$"/9'%1N*<)V%G,MV%!,MV%A<$</72D% • */F%:/'$%1F7$%").#0%<@#*)#-/02% • /5L<:-@<%1)05&#'<92% • &06/7.#-@<%16/7%'+'$<.%9<@<*/C<7'2D%$/%C7/[*<%'+'$<.%5<"#@&/7% • 9&':7&.&0#-@<D%$/%$<**%&6%#09%F"<7<%&.C7/@<.<0$'%#7<% • <Q<:-@<%#09%7<C*&:#5*<D%$/%5<%:/.C)$<9%Y)&:R*+%#09%/\<0% Quality Estimation – Turchi, FBK 7 • ]<76<:$%'<^0>%&0%$"<%:/0$7/**<9%':<0#7&/%1*#5%$<'$'V% <@#*)#-/0%:#.C#&>0'V%I2%F"<7<%&$%&'%7<Y)&7<9%$/%:/.C#7<% '<@<7#*%'+'$<.'%/0%#%>&@<0%$<'$%'<$H% • 5)$%I% ? Quality Estimation – Turchi, FBK 8

  5. %;/)7:<% G7#0'*#$<9% ;AG% ;<0$<0:<% ;<0$<0:<% • In real applications, several questions : – Is the translated text good enough to be published? – Can we trust it? – Can a reader get the gist? – Is it worst to post-edit it? • Quality Estimation (QE) can help us to find the right answers!! Quality Estimation – Turchi, FBK 9 • ()#*&$+%,'-.#-/0%1(,2D% – .<$7&:'%$"#$%C7/@&9<%#0%<'-.#$<%/0%$"<%Y)#*&$+%/6% )0'<<0%$7#0'*#$<9%$<_$'%F&$"/)$%$"<%7<6<7<0:<% $7#0'*#-/0'%% – #%')C<7@&'<9%*<#70&0>%$#'RV%F"<7<%#%.#:"&0<%$7&<'% $/%./9<*%"/F%").#0'%L)9><%$7#0'*#$<9%$<_$'% 1*<#70&0>%67/.%").#0%*#5<*<9%9#$#2%% Quality Estimation – Turchi, FBK 10

  6. G7#0'*#$<9% =#5<*<9% ;AG% G<'$%;<$%% ?#$#% %;/)7:<% (,% ()#*&$+% =#0>H% ]7<9&:-/0`% ,'-.#$/7% G<'$%;<$% `%':/7<%#$%'<0$<0:<%*<@<*% Quality Estimation – Turchi, FBK 11 • Automatic Evaluation: – require parallel test sets – fundamental in controlled experiments – compare different system performances • Quality Estimation: – does not require parallel data, but labeled data – application oriented • Both very complex tasks Quality Estimation – Turchi, FBK 12

  7. • Example 1: • ;/)7:<D% – a80%Y)<'$#%'&$)#b&/0<V%c%C/''&5&*<%:7<#7<%)0% '#*@#$#>>&/d%#*%$<7.&0<V%&*%'#*@#$#>>&/%c%&09&C<09<0$<% 9#*%@/*).<%9&%C7/9)b&/0<He% • G#7><$D% – a80%$"&'%'&$)#-/0V%+/)%:#0%:7<#$<%#%5#:R)CV%#\<7%$"<% 5#:R)C%.#+%5<%&09<C<09<0$%/6%$"<%@/*).<%/6% C7/9):-/0Hf% • Can a reader get the gist? • Is the translated text good enough to be published? Quality Estimation – Turchi, FBK 13 • Example 2: • ;/)7:<D% – a80%Y)<'$#%'&$)#b&/0<V%c%C/''&5&*<%:7<#7<%)0% '#*@#$#>>&/d%#*%$<7.&0<V%&*%'#*@#$#>>&/%c%&09&C<09<0$<% 9#*%@/*).<%9&%C7/9)b&/0<He% • G#7><$D% – a80%$"&'%'&$)#-/0V%+/)%:7<#$<%#%'#*@#$#>>&/%#09%#\<7%&$% :/.C*<$<'V%$"<%'#*@#$#>>&/%&'%&09<C<09<0$%/6%$"<% C7/9):-/0%@/*).<Hf% • Can a reader get the gist? • Is the translated text good enough to be published? Quality Estimation – Turchi, FBK 14

  8. • Example 3: • ;/)7:<D% – a80%Y)<'$#%'&$)#b&/0<V%c%C/''&5&*<%:7<#7<%)0% '#*@#$#>>&/d%#*%$<7.&0<V%&*%'#*@#$#>>&/%c%&09&C<09<0$<% 9#*%@/*).<%9&%C7/9)b&/0<He% • G#7><$D% – a80%$"&'%'&$)#-/0V%+/)%:#0%:7<#$<%#%5#:R)CV%#\<7%$"<% 5#:R)C%.#+%5<%&09<C<09<0$%/6%$"<%@/*).<%/6% C7/9):-/0Hf% -1(Bad) 1 (Good) Quality Estimation – Turchi, FBK 15 • Example 4: • ;/)7:<D% – a80%Y)<'$#%'&$)#b&/0<V%c%C/''&5&*<%:7<#7<%)0% '#*@#$#>>&/d%#*%$<7.&0<V%&*%'#*@#$#>>&/%c%&09&C<09<0$<% 9#*%@/*).<%9&%C7/9)b&/0<He% • G#7><$D% – a80%$"&'%'&$)#-/0V%+/)%:#0%:7<#$<%#%5#:R)CV%#\<7%$"<% 5#:R)C%.#+%5<%&09<C<09<0$%/6%$"<%@/*).<%/6% C7/9):-/0Hf% 1(Bad) 2(Fair) 3(Partially Good) 4(Good) Quality Estimation – Turchi, FBK 16

  9. • ;/)7:<D% – a80%Y)<'$#%'&$)#b&/0<V%c%C/''&5&*<%:7<#7<%)0% '#*@#$#>>&/d%#*%$<7.&0<V%&*%'#*@#$#>>&/%c%&09&C<09<0$<% 9#*%@/*).<%9&%C7/9)b&/0<He% • G#7><$D% – a80%$"&'%'&$)#-/0V%+/)%:#0%:7<#$<%#%5#:R)CV%#\<7%$"<% 5#:R)C%.#+%5<%&09<C<09<0$%/6%$"<%@/*).<%/6% C7/9):-/0Hf% • ]/'$g,9&$%;<0$<0:<D% – a80%$"&'%'&$)#-/0V%+/)%:7<#$<%#%5#:R)C%#09%#\<7%&$% :/.C*<$<'V%$"<%5#:R)C%&'%&09<C<09<0$%/6%$"<% C7/9):-/0%@/*).<Hf% Quality Estimation – Turchi, FBK 17 • !"#$%:#0%(,%C7<9&:$3% – 7<>7<''&/0%C7/5*<.D%#%7<#*%0).5<7%Y)#*&$+%':/7<%1<H>H% hTHHUi2% – :*#''&[:#-/0%C7/5*<.D%#%:#$<>/7+%*#5<*%1<H>H%5#9%/7%>//92 % – 7#0R&0>%C7/5*<.D%$"<%7#0R%/6%#%'<$%/6%'<0$<0:<'%1<H>H% ' U V' X V' j V%I2% • !"<7<%:#0%(,%5<%)'<93D% – '<0$<0:<%*<@<*% – F/79%*<@<*% – 9/:).<0$%*<@<*%%% Quality Estimation – Turchi, FBK 18

  10. • !"#$%&'%()#*&$+%,'-.#-/0%1(,23% – 4%5&$%/6%"&'$/7+% – ()#*&$+%809&:#$/7'% – ()#*&$+%;:/7<'% – =<#70&0>%4*>/7&$".'% – ?#$#'<$% – ,@#*)#-/0%A<$7&:'% – BC<0%8'')<'% • ()#*&$+%,'-.#-/0%#$D% – !/79%=<@<*% – ?/:).<0$%=<@<*% • E/F%:#0%F<%)'<%(,%&0%7<#*%#CC*&:#-/0'3% Quality Estimation – Turchi, FBK 19 • ()#*&$+%,'-.#-/0%&'%#0%/*9k0<F%$/C&:H% • lEO%!/7R'"/C%STTX%mlEOVSTTXnD% – ]7<9&:$%N=,OV%G,M%#09%!,M% – Bad %AG%/)$C)$% ! hard to beat the baselines – No real use in applications Quality Estimation – Turchi, FBK 20

  11. • Specia et al, 2009 [Spe,2009]: – Better MT output – More attention in the private sector – Predict Post-Editing (PE) Effort • human effort to transform the MT output into a good text (Post-editing time, human scores, % errors to fix) – First dataset freely available Quality Estimation – Turchi, FBK 21 • Specia et al, 2010 [Spe,2010]: – New interpretation of QE: – Predict the MT evaluation: – Select the best translation from multiple MT-systems – Filtering out bad translations – Regression vs Binary Classification vs Multi-label Classification Quality Estimation – Turchi, FBK 22

  12. • !/7R'"/C%/0%AGV%STWSV%'"#7<9%$#'R%()#*&$+%,'-.#-/0%mZ#*VWSn%% – Training/test data with human annotation (1..5) – 11 participants – Scoring and ranking tasks – New quality indicators (features) – Tested a large number of machine learning techniques • Workshop on MT, 2013, shared task Quality Estimation… – coming soon Quality Estimation – Turchi, FBK 23 • !"#$%&'%()#*&$+%,'-.#-/0%1(,23% – 4%5&$%/6%"&'$/7+% – ()#*&$+%809&:#$/7'% – ()#*&$+%;:/7<'% – =<#70&0>%4*>/7&$".'% – ?#$#'<$% – ,@#*)#-/0%A<$7&:'% – BC<0%8'')<'% • ()#*&$+%,'-.#-/0%#$D% – !/79%=<@<*% – ?/:).<0$%=<@<*% • E/F%:#0%F<%)'<%(,%&0%7<#*%#CC*&:#-/0'3% Quality Estimation – Turchi, FBK 24

Download Presentation
Download Policy: The content available on the website is offered to you 'AS IS' for your personal information and use only. It cannot be commercialized, licensed, or distributed on other websites without prior consent from the author. To download a presentation, simply click this link. If you encounter any difficulties during the download process, it's possible that the publisher has removed the file from their server.

Recommend


More recommend