https://aclweb.org/aclwiki/index.php?title=Training_the_C%26C_Parser&feed=atom&action=historyTraining the C&C Parser - Revision history2024-03-29T06:50:34ZRevision history for this page on the wikiMediaWiki 1.35.2https://aclweb.org/aclwiki/index.php?title=Training_the_C%26C_Parser&diff=11032&oldid=prevKEvang at 12:27, 21 April 20152015-04-21T12:27:49Z<p></p>
<table class="diff diff-contentalign-left diff-editfont-monospace" data-mw="interface">
<col class="diff-marker" />
<col class="diff-content" />
<col class="diff-marker" />
<col class="diff-content" />
<tr class="diff-title" lang="en">
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">← Older revision</td>
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">Revision as of 12:27, 21 April 2015</td>
</tr><tr><td colspan="2" class="diff-lineno" id="mw-diff-left-l93" >Line 93:</td>
<td colspan="2" class="diff-lineno">Line 93:</td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> # Evaluate the parser model:</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> # Evaluate the parser model:</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> $SCRIPTS/cl07_table7 working/</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> $SCRIPTS/cl07_table7 working/</div></td></tr>
<tr><td colspan="2"> </td><td class='diff-marker'>+</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;"><div><ins style="font-weight: bold; text-decoration: none;"></ins></div></td></tr>
<tr><td colspan="2"> </td><td class='diff-marker'>+</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;"><div><ins style="font-weight: bold; text-decoration: none;">== References ==</ins></div></td></tr>
</table>KEvanghttps://aclweb.org/aclwiki/index.php?title=Training_the_C%26C_Parser&diff=11031&oldid=prevKEvang at 12:27, 21 April 20152015-04-21T12:27:34Z<p></p>
<table class="diff diff-contentalign-left diff-editfont-monospace" data-mw="interface">
<col class="diff-marker" />
<col class="diff-content" />
<col class="diff-marker" />
<col class="diff-content" />
<tr class="diff-title" lang="en">
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">← Older revision</td>
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">Revision as of 12:27, 21 April 2015</td>
</tr><tr><td colspan="2" class="diff-lineno" id="mw-diff-left-l63" >Line 63:</td>
<td colspan="2" class="diff-lineno">Line 63:</td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> tar jxf mrmpi-22Apr09.tbz2</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> tar jxf mrmpi-22Apr09.tbz2</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> cd mrmpi-22Apr09/src</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> cd mrmpi-22Apr09/src</div></td></tr>
<tr><td class='diff-marker'>−</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;"><div> make -f Makefile.<del class="diffchange diffchange-inline">linux </del>clean</div></td><td class='diff-marker'>+</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;"><div> make -f Makefile.<ins class="diffchange diffchange-inline">unix </ins>clean</div></td></tr>
<tr><td class='diff-marker'>−</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;"><div> make -f Makefile.<del class="diffchange diffchange-inline">linux</del></div></td><td class='diff-marker'>+</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;"><div> make -f Makefile.<ins class="diffchange diffchange-inline">unix</ins></div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> cp *.h $EXT/include</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> cp *.h $EXT/include</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> cp libmrmpi.a $EXT/lib</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> cp libmrmpi.a $EXT/lib</div></td></tr>
<tr><td colspan="2" class="diff-lineno" id="mw-diff-left-l70" >Line 70:</td>
<td colspan="2" class="diff-lineno">Line 70:</td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> # Build C&C</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> # Build C&C</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> cd $CANDC</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> cd $CANDC</div></td></tr>
<tr><td class='diff-marker'>−</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;"><div> make -f Makefile.<del class="diffchange diffchange-inline">linux </del>all train bin/generate</div></td><td class='diff-marker'>+</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;"><div> make -f Makefile.<ins class="diffchange diffchange-inline">unix </ins>all train bin/generate</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> </div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> </div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> # Create data</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> # Create data</div></td></tr>
</table>KEvanghttps://aclweb.org/aclwiki/index.php?title=Training_the_C%26C_Parser&diff=10245&oldid=prevKEvang at 10:55, 10 September 20132013-09-10T10:55:08Z<p></p>
<table class="diff diff-contentalign-left diff-editfont-monospace" data-mw="interface">
<col class="diff-marker" />
<col class="diff-content" />
<col class="diff-marker" />
<col class="diff-content" />
<tr class="diff-title" lang="en">
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">← Older revision</td>
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">Revision as of 10:55, 10 September 2013</td>
</tr><tr><td colspan="2" class="diff-lineno" id="mw-diff-left-l1" >Line 1:</td>
<td colspan="2" class="diff-lineno">Line 1:</td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div>The [http://svn.ask.it.usyd.edu.au/trac/candc C&C Parser] is an advanced statistical parser using the framework of Combinatory Categorial Grammar (CCG). It is quite easy to use with pre-trained models, but creating one's own models is a slightly different story. Although the software is distributed with a wealth of scripts that should make training easy, differences between systems and dependencies on various libraries make the task of getting the training code to work a bit daunting. The following are detailed step-by-step instructions to replicate the (almost) exact figures reported in Clark&Curran (2007)<ref>Stephen Clark and James Curran (2007): Wide-Coverage Efficient Statistical Parsing with CCG and Log-Linear Models. In <i>Computational Linguistics 33(4)</i>, http://aclweb.org/anthology-new/J/J07/J07-4004.pdf</ref> on a single '''64-bit Ubuntu 12.04''' machine (which should have multiple cores and at least around 40 GB of main memory). The steps to take on other recent Linux distributions should be very similar.</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div>The [http://svn.ask.it.usyd.edu.au/trac/candc C&C Parser] is an advanced statistical parser using the framework of Combinatory Categorial Grammar (CCG). It is quite easy to use with pre-trained models, but creating one's own models is a slightly different story. Although the software is distributed with a wealth of scripts that should make training easy, differences between systems and dependencies on various libraries make the task of getting the training code to work a bit daunting. The following are detailed step-by-step instructions to replicate the (almost) exact figures reported in Clark&Curran (2007)<ref>Stephen Clark and James Curran (2007): Wide-Coverage Efficient Statistical Parsing with CCG and Log-Linear Models. In <i>Computational Linguistics 33(4)</i>, http://aclweb.org/anthology-new/J/J07/J07-4004.pdf</ref> on a single '''64-bit Ubuntu 12.04''' machine (which should have multiple cores and at least around 40 GB of main memory). The steps to take on other recent Linux distributions should be very similar.</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"></td></tr>
<tr><td class='diff-marker'>−</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;"><div>Please extend the instructions with more detail, helpful hints and notes on other operating systems! They were initially written up by [[User:KEvang|Kilian Evang]]; thanks are due to Tim <del class="diffchange diffchange-inline">Dawborn, </del>Stephen Clark and James Curran for advice without which I would probably never have gotten it to run.</div></td><td class='diff-marker'>+</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;"><div>Please extend the instructions with more detail, helpful hints and notes on other operating systems! They were initially written up by [[User:KEvang|Kilian Evang]] <ins class="diffchange diffchange-inline">based on instructions from Tim Dawborn</ins>; thanks are due to Tim <ins class="diffchange diffchange-inline">and also to </ins>Stephen Clark and James Curran for advice without which I would probably never have gotten it to run.</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> # Customize these variables:</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> # Customize these variables:</div></td></tr>
</table>KEvanghttps://aclweb.org/aclwiki/index.php?title=Training_the_C%26C_Parser&diff=10244&oldid=prevKEvang at 10:54, 10 September 20132013-09-10T10:54:11Z<p></p>
<table class="diff diff-contentalign-left diff-editfont-monospace" data-mw="interface">
<col class="diff-marker" />
<col class="diff-content" />
<col class="diff-marker" />
<col class="diff-content" />
<tr class="diff-title" lang="en">
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">← Older revision</td>
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">Revision as of 10:54, 10 September 2013</td>
</tr><tr><td colspan="2" class="diff-lineno" id="mw-diff-left-l1" >Line 1:</td>
<td colspan="2" class="diff-lineno">Line 1:</td></tr>
<tr><td class='diff-marker'>−</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;"><div>The [http://svn.ask.it.usyd.edu.au/trac/candc C&C Parser] is an advanced statistical parser using the framework of Combinatory Categorial Grammar (CCG). It is quite easy to use with pre-trained models, but creating one's own models is a slightly different story. Although the software is distributed with a wealth of scripts that should make training easy, differences between systems and dependencies on various libraries make the task of getting the training code to work a bit daunting. The following are detailed step-by-step instructions to replicate the (almost) exact figures reported in Clark&Curran (2007)<ref>Stephen Clark and James Curran (2007): Wide-Coverage Efficient Statistical Parsing with CCG and Log-Linear Models. In <i>Computational Linguistics 33(4)</i>, http://aclweb.org/anthology-new/J/J07/J07-4004.pdf</ref> on a single '''64-bit Ubuntu 12.04''' machine. The steps to take on other recent Linux distributions should be very similar.</div></td><td class='diff-marker'>+</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;"><div>The [http://svn.ask.it.usyd.edu.au/trac/candc C&C Parser] is an advanced statistical parser using the framework of Combinatory Categorial Grammar (CCG). It is quite easy to use with pre-trained models, but creating one's own models is a slightly different story. Although the software is distributed with a wealth of scripts that should make training easy, differences between systems and dependencies on various libraries make the task of getting the training code to work a bit daunting. The following are detailed step-by-step instructions to replicate the (almost) exact figures reported in Clark&Curran (2007)<ref>Stephen Clark and James Curran (2007): Wide-Coverage Efficient Statistical Parsing with CCG and Log-Linear Models. In <i>Computational Linguistics 33(4)</i>, http://aclweb.org/anthology-new/J/J07/J07-4004.pdf</ref> on a single '''64-bit Ubuntu 12.04''' machine <ins class="diffchange diffchange-inline">(which should have multiple cores and at least around 40 GB of main memory)</ins>. The steps to take on other recent Linux distributions should be very similar.</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div>Please extend the instructions with more detail, helpful hints and notes on other operating systems! They were initially written up by [[User:KEvang|Kilian Evang]]; thanks are due to Tim Dawborn, Stephen Clark and James Curran for advice without which I would probably never have gotten it to run.</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div>Please extend the instructions with more detail, helpful hints and notes on other operating systems! They were initially written up by [[User:KEvang|Kilian Evang]]; thanks are due to Tim Dawborn, Stephen Clark and James Curran for advice without which I would probably never have gotten it to run.</div></td></tr>
</table>KEvanghttps://aclweb.org/aclwiki/index.php?title=Training_the_C%26C_Parser&diff=10243&oldid=prevKEvang: typo2013-09-10T10:46:51Z<p>typo</p>
<table class="diff diff-contentalign-left diff-editfont-monospace" data-mw="interface">
<col class="diff-marker" />
<col class="diff-content" />
<col class="diff-marker" />
<col class="diff-content" />
<tr class="diff-title" lang="en">
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">← Older revision</td>
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">Revision as of 10:46, 10 September 2013</td>
</tr><tr><td colspan="2" class="diff-lineno" id="mw-diff-left-l1" >Line 1:</td>
<td colspan="2" class="diff-lineno">Line 1:</td></tr>
<tr><td class='diff-marker'>−</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;"><div>The [http://svn.ask.it.usyd.edu.au/trac/candc C&C Parser] is an advanced statistical parser using the framework of Combinatory Categorial Grammar (<del class="diffchange diffchange-inline">C&C</del>). It is quite easy to use with pre-trained models, but creating one's own models is a slightly different story. Although the software is distributed with a wealth of scripts that should make training easy, differences between systems and dependencies on various libraries make the task of getting the training code to work a bit daunting. The following are detailed step-by-step instructions to replicate the (almost) exact figures reported in Clark&Curran (2007)<ref>Stephen Clark and James Curran (2007): Wide-Coverage Efficient Statistical Parsing with CCG and Log-Linear Models. In <i>Computational Linguistics 33(4)</i>, http://aclweb.org/anthology-new/J/J07/J07-4004.pdf</ref> on a single '''64-bit Ubuntu 12.04''' machine. The steps to take on other recent Linux distributions should be very similar.</div></td><td class='diff-marker'>+</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;"><div>The [http://svn.ask.it.usyd.edu.au/trac/candc C&C Parser] is an advanced statistical parser using the framework of Combinatory Categorial Grammar (<ins class="diffchange diffchange-inline">CCG</ins>). It is quite easy to use with pre-trained models, but creating one's own models is a slightly different story. Although the software is distributed with a wealth of scripts that should make training easy, differences between systems and dependencies on various libraries make the task of getting the training code to work a bit daunting. The following are detailed step-by-step instructions to replicate the (almost) exact figures reported in Clark&Curran (2007)<ref>Stephen Clark and James Curran (2007): Wide-Coverage Efficient Statistical Parsing with CCG and Log-Linear Models. In <i>Computational Linguistics 33(4)</i>, http://aclweb.org/anthology-new/J/J07/J07-4004.pdf</ref> on a single '''64-bit Ubuntu 12.04''' machine. The steps to take on other recent Linux distributions should be very similar.</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div>Please extend the instructions with more detail, helpful hints and notes on other operating systems! They were initially written up by [[User:KEvang|Kilian Evang]]; thanks are due to Tim Dawborn, Stephen Clark and James Curran for advice without which I would probably never have gotten it to run.</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div>Please extend the instructions with more detail, helpful hints and notes on other operating systems! They were initially written up by [[User:KEvang|Kilian Evang]]; thanks are due to Tim Dawborn, Stephen Clark and James Curran for advice without which I would probably never have gotten it to run.</div></td></tr>
</table>KEvanghttps://aclweb.org/aclwiki/index.php?title=Training_the_C%26C_Parser&diff=10242&oldid=prevKEvang at 15:45, 9 September 20132013-09-09T15:45:16Z<p></p>
<table class="diff diff-contentalign-left diff-editfont-monospace" data-mw="interface">
<col class="diff-marker" />
<col class="diff-content" />
<col class="diff-marker" />
<col class="diff-content" />
<tr class="diff-title" lang="en">
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">← Older revision</td>
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">Revision as of 15:45, 9 September 2013</td>
</tr><tr><td colspan="2" class="diff-lineno" id="mw-diff-left-l5" >Line 5:</td>
<td colspan="2" class="diff-lineno">Line 5:</td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> # Customize these variables:</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> # Customize these variables:</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> export CANDC_PREFIX=$HOME</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> export CANDC_PREFIX=$HOME</div></td></tr>
<tr><td class='diff-marker'>−</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;"><div> export CCGBANK=$HOME/data/CCGbank1.2 <del class="diffchange diffchange-inline"># </del></div></td><td class='diff-marker'>+</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;"><div> export CCGBANK=$HOME/data/CCGbank1.2</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> export TMPDIR=$HOME/tmp # the default /tmp is often on a tiny filesystem</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> export TMPDIR=$HOME/tmp # the default /tmp is often on a tiny filesystem</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> export NUMNODES=32</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> export NUMNODES=32</div></td></tr>
</table>KEvanghttps://aclweb.org/aclwiki/index.php?title=Training_the_C%26C_Parser&diff=10241&oldid=prevKEvang at 15:44, 9 September 20132013-09-09T15:44:28Z<p></p>
<table class="diff diff-contentalign-left diff-editfont-monospace" data-mw="interface">
<col class="diff-marker" />
<col class="diff-content" />
<col class="diff-marker" />
<col class="diff-content" />
<tr class="diff-title" lang="en">
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">← Older revision</td>
<td colspan="2" style="background-color: #fff; color: #202122; text-align: center;">Revision as of 15:44, 9 September 2013</td>
</tr><tr><td colspan="2" class="diff-lineno" id="mw-diff-left-l1" >Line 1:</td>
<td colspan="2" class="diff-lineno">Line 1:</td></tr>
<tr><td class='diff-marker'>−</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;"><div>The [http://svn.ask.it.usyd.edu.au/trac/candc C&C Parser] is an advanced statistical parser using the framework of Combinatory Categorial Grammar (C&C). It is quite easy to use with pre-trained models, but creating one's own models is a slightly <del class="diffchange diffchange-inline">differnt stories</del>. Although the software is distributed with a wealth of scripts that should make training easy, differences between systems and dependencies on various libraries make the task of getting the training code to work a bit daunting. The following are <del class="diffchange diffchange-inline">terse but </del>detailed step-by-step instructions to replicate the (almost) exact figures reported in Clark&Curran (2007)<ref>Stephen Clark and James Curran (2007): Wide-Coverage Efficient Statistical Parsing with CCG and Log-Linear Models. In <i>Computational Linguistics 33(4)</i>, http://aclweb.org/anthology-new/J/J07/J07-4004.pdf</ref> on a single '''64-bit Ubuntu 12.04''' machine. The steps to take on other recent Linux distributions should be very similar.</div></td><td class='diff-marker'>+</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;"><div>The [http://svn.ask.it.usyd.edu.au/trac/candc C&C Parser] is an advanced statistical parser using the framework of Combinatory Categorial Grammar (C&C). It is quite easy to use with pre-trained models, but creating one's own models is a slightly <ins class="diffchange diffchange-inline">different story</ins>. Although the software is distributed with a wealth of scripts that should make training easy, differences between systems and dependencies on various libraries make the task of getting the training code to work a bit daunting. The following are detailed step-by-step instructions to replicate the (almost) exact figures reported in Clark&Curran (2007)<ref>Stephen Clark and James Curran (2007): Wide-Coverage Efficient Statistical Parsing with CCG and Log-Linear Models. In <i>Computational Linguistics 33(4)</i>, http://aclweb.org/anthology-new/J/J07/J07-4004.pdf</ref> on a single '''64-bit Ubuntu 12.04''' machine. The steps to take on other recent Linux distributions should be very similar.</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div>Please extend the instructions with more detail, helpful hints and notes on other operating systems! They were initially written up by [[User:KEvang|Kilian Evang]]; thanks are due to Tim Dawborn, Stephen Clark and James Curran for advice without which I would probably never have gotten it to run.</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div>Please extend the instructions with more detail, helpful hints and notes on other operating systems! They were initially written up by [[User:KEvang|Kilian Evang]]; thanks are due to Tim Dawborn, Stephen Clark and James Curran for advice without which I would probably never have gotten it to run.</div></td></tr>
<tr><td colspan="2" class="diff-lineno" id="mw-diff-left-l5" >Line 5:</td>
<td colspan="2" class="diff-lineno">Line 5:</td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> # Customize these variables:</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> # Customize these variables:</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> export CANDC_PREFIX=$HOME</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> export CANDC_PREFIX=$HOME</div></td></tr>
<tr><td class='diff-marker'>−</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;"><div> export CCGBANK=$HOME/data/CCGbank1.2</div></td><td class='diff-marker'>+</td><td style="color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;"><div> export CCGBANK=$HOME/data/CCGbank1.2 <ins class="diffchange diffchange-inline"># </ins></div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> export TMPDIR=$HOME/tmp # the default /tmp is often on a tiny filesystem</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> export TMPDIR=$HOME/tmp # the default /tmp is often on a tiny filesystem</div></td></tr>
<tr><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> export NUMNODES=32</div></td><td class='diff-marker'> </td><td style="background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;"><div> export NUMNODES=32</div></td></tr>
</table>KEvanghttps://aclweb.org/aclwiki/index.php?title=Training_the_C%26C_Parser&diff=10240&oldid=prevKEvang: Created page with "The [http://svn.ask.it.usyd.edu.au/trac/candc C&C Parser] is an advanced statistical parser using the framework of Combinatory Categorial Grammar (C&C). It is quite easy to us..."2013-09-09T15:39:12Z<p>Created page with "The [http://svn.ask.it.usyd.edu.au/trac/candc C&C Parser] is an advanced statistical parser using the framework of Combinatory Categorial Grammar (C&C). It is quite easy to us..."</p>
<p><b>New page</b></p><div>The [http://svn.ask.it.usyd.edu.au/trac/candc C&C Parser] is an advanced statistical parser using the framework of Combinatory Categorial Grammar (C&C). It is quite easy to use with pre-trained models, but creating one's own models is a slightly differnt stories. Although the software is distributed with a wealth of scripts that should make training easy, differences between systems and dependencies on various libraries make the task of getting the training code to work a bit daunting. The following are terse but detailed step-by-step instructions to replicate the (almost) exact figures reported in Clark&Curran (2007)<ref>Stephen Clark and James Curran (2007): Wide-Coverage Efficient Statistical Parsing with CCG and Log-Linear Models. In <i>Computational Linguistics 33(4)</i>, http://aclweb.org/anthology-new/J/J07/J07-4004.pdf</ref> on a single '''64-bit Ubuntu 12.04''' machine. The steps to take on other recent Linux distributions should be very similar.<br />
<br />
Please extend the instructions with more detail, helpful hints and notes on other operating systems! They were initially written up by [[User:KEvang|Kilian Evang]]; thanks are due to Tim Dawborn, Stephen Clark and James Curran for advice without which I would probably never have gotten it to run.<br />
<br />
# Customize these variables:<br />
export CANDC_PREFIX=$HOME<br />
export CCGBANK=$HOME/data/CCGbank1.2<br />
export TMPDIR=$HOME/tmp # the default /tmp is often on a tiny filesystem<br />
export NUMNODES=32<br />
export LIB=/usr/lib<br />
<br />
# Some variables for use below:<br />
export CANDC=$CANDC_PREFIX/candc<br />
export SCRIPTS=$CANDC/src/scripts/ccg<br />
export EXT=$CANDC/ext<br />
<br />
# Package dependencies:<br />
sudo apt-get install g++ gawk libibumad-dev mpich2 subversion<br />
<br />
# Check out the C&C tools.<br />
# You need credentials for that, see<br />
# http://svn.ask.it.usyd.edu.au/trac/candc/wiki/Subversion<br />
cd $CANDC_PREFIX<br />
svn checkout http://svn.ask.it.usyd.edu.au/candc/trunk candc -r 2400<br />
<br />
# Some patches to fix various problems with the scripts provided:<br />
<br />
# Use a temp directory different from /tmp since that often doesn't have enough<br />
# space:<br />
sed -i -e "s|/tmp|$TMPDIR|" $SCRIPTS/*_model_*<br />
<br />
# Replace /bin/env by /usr/bin/env<br />
sed -i -e "s|/bin/env|/usr/bin/env|" $SCRIPTS/lexicon_features \<br />
$SCRIPTS/count_features<br />
<br />
# Work around non-portable sed -f shebang<br />
sed -i -e 's|$SCRIPTS/convert_brackets|sed -f $SCRIPTS/convert_brackets|g' \<br />
$SCRIPTS/create_data<br />
<br />
# TODO patches to make the scripts work with the LDC version of CCGbank should<br />
# go here.<br />
<br />
# Make ext directory<br />
mkdir $EXT<br />
<br />
# Install Boost library (Ubuntu doesn't seem to have a version that is compiled<br />
# against MPICH2).<br />
echo 'using mpi ;' > ~/user-config.jam # Boost's build script won't build MPI<br />
# library without this for some reason<br />
mkdir $EXT/install<br />
cd $EXT/install<br />
wget https://dl.dropboxusercontent.com/u/5358991/boost_1_53_0.tar.gz # or<br />
# get it from Sourceforge<br />
tar -xzf boost_1_53_0.tar.gz<br />
cd boost_1_53_0<br />
./bootstrap.sh --with-libraries=mpi --prefix=$EXT<br />
./b2 install<br />
<br />
# Install ancient MR-MPI C&C depends on<br />
cd $EXT/install<br />
wget http://sydney.edu.au/it/~tdaw3088/misc/mrmpi-22Apr09.tbz2 # If this link is<br />
# dead, try http://dl.dropbox.com/u/5358991/mrmpi-22Apr09.tbz2<br />
tar jxf mrmpi-22Apr09.tbz2<br />
cd mrmpi-22Apr09/src<br />
make -f Makefile.linux clean<br />
make -f Makefile.linux<br />
cp *.h $EXT/include<br />
cp libmrmpi.a $EXT/lib<br />
<br />
# Build C&C<br />
cd $CANDC<br />
make -f Makefile.linux all train bin/generate<br />
<br />
# Create data<br />
# Will only work with CCGbank 1.2 for now, not with LDC version of CCGbank<br />
$SCRIPTS/create_data $CCGBANK $NUMNODES working/<br />
<br />
# Train the POS tagger and Supertagger:<br />
$SCRIPTS/train_taggers working/<br />
<br />
# Evaluate the supertagger model to ensure its results are sane:<br />
$SCRIPTS/cl07_table4 working/<br />
<br />
# Create the model_hybrid directory and empty config file:<br />
mkdir working/model_hybrid<br />
touch working/model_hybrid/config<br />
<br />
# Train a hybrid model:<br />
export LD_LIBRARY_PATH=$EXT/lib:$LIB<br />
$SCRIPTS/create_model_hybrid `pwd` $NUMNODES working/<br />
$SCRIPTS/train_model_hybrid `pwd` $NUMNODES working/<br />
<br />
# Evaluate the parser model:<br />
$SCRIPTS/cl07_table7 working/</div>KEvang