◀ Ballier, Pacquetet & Arnold | ▲ Proceedings |
ISBN: 978-2-9570549-0-9 e-ISBN: 978-2-9570549-1-6 Download (57.57 MB) | Vocalic and Consonantal Grapheme Classification through Spectral
Decomposition Patricia Thaine & Gerald Penn Download (352.36 kB) Abstract. We consider two related problems in this paper. Given an undeciphered alphabetic writing system or mono-alphabetic cipher, determine: (1) which of its letters correspond to vowels and which to consonants; and (2) whether the writing system is a vocalic alphabet or an abjad. We are able to show that a very simple spectral decomposition based on character co-occurrences provides nearly perfect performance with respect to answering both question types. DOI: https://doi.org/10.36824/2018-graf-thai
@MISC{VoynichManuscript, AUTHOR = {Gillogly, Jim}, TITLE = {{Voynich Manuscript}}, YEAR = {2002}, } @ARTICLE{berg12, AUTHOR = {Berg, K.}, TITLE = {{Identifying Graphematic Units}}, JOURNAL = {Written Language \& Literacy}, YEAR = {2012}, VOLUME = {15}, NUMBER = {1}, PAGES = {26--45}, } @BOOK{adler2001cross, AUTHOR = {Adler, Leonore Loeb and Gielen, Uwe Peter}, TITLE = {{Cross-Cultural Topics in Psychology}}, PUBLISHER = {Greenwood Publishing Group}, ADDRESS = {Westport, CO}, YEAR = {2001}, } @MISC{ager1omniglot, AUTHOR = {Ager,Simon}, TITLE = {{Writing direction index}}, YEAR = {2015}, NOTE = {\url{http://www.omniglot.com/writing/direction.htm} [Accessed: 2014-07-30]}, } @BOOK{anderson1993writing, AUTHOR = {Anderson, Lloyd B.}, TITLE = {{The Writing System of La Mojarra and Associated Monuments}}, PUBLISHER = {Ecological Linguistics}, YEAR = {1993}, VOLUME = {1}, } @INPROCEEDINGS{belkin2002using, AUTHOR = {Belkin, Mikhail and Goldsmith, John}, TITLE = {{Using Eigenvectors of the Bigram Graph to Infer Morpheme Identity}}, BOOKTITLE = {{Proceedings of the ACL-02 Workshop on Morphological and Phonological Learning---Volume 6}}, YEAR = {2002}, PAGES = {41--47}, } @BOOK{chadwick1990decipherment, AUTHOR = {Chadwick, John}, TITLE = {{The Decipherment of Linear B}}, PUBLISHER = {Cambridge University Press}, ADDRESS = {Cambridge}, YEAR = {1990}, } @MISC{Currier1976voynich, AUTHOR = {Currier, Captain Prescott H.}, TITLE = {{Papers on the Voynich Manuscript}}, YEAR = {2013}, NOTE = {\url{http://www.voynich.nu/extra/curr_pdfs.html} [Accessed: 2014-07-30]}, } @BOOK{daniels1996world, AUTHOR = {Daniels, Peter T. and Bright, William}, TITLE = {{The World's Writing Systems}}, PUBLISHER = {Oxford University Press}, ADDRESS = {Oxford}, YEAR = {1996}, } @ARTICLE{farmer2004collapse, AUTHOR = {Farmer, Steve and Sproat, Richard and Witzel, Michael}, TITLE = {{The Collapse of the Indus-Script Thesis: The Myth of a Literate Harappan Civilization}}, JOURNAL = {Electronic Journal of Vedic Studies}, YEAR = {2004}, VOLUME = {11}, NUMBER = {2}, PAGES = {19--57}, } @MISC{Gillogly2002Voynich, AUTHOR = {Gillogly, Jim}, TITLE = {{Voynich manuscript}}, YEAR = {2002}, NOTE = {\url{http://www.ic.unicamp.br/~stolfi/voynich/mirror/gillogly/voynich.orig} [Accessed: 2014-07-30]}, } @INPROCEEDINGS{goldsmith2004signatures, AUTHOR = {Goldsmith, John and Hu, Yu}, TITLE = {{From Signatures to Finite State Automata}}, BOOKTITLE = {{Midwest Computational Linguistics Colloquium. Bloomington, Indiana}}, YEAR = {2004}, } @ARTICLE{goldsmith2001unsupervised, AUTHOR = {Goldsmith, John}, TITLE = {{Unsupervised Learning of the Morphology of a Natural Language}}, JOURNAL = {Computational linguistics}, YEAR = {2001}, VOLUME = {27}, NUMBER = {2}, PAGES = {153--198}, } @ARTICLE{guy1991statistical, AUTHOR = {Guy, Jacques B.M.}, TITLE = {{Statistical Properties of Two Folios of the Voynich Manuscript}}, JOURNAL = {Cryptologia}, YEAR = {1991}, VOLUME = {15}, NUMBER = {3}, PAGES = {207--218}, } @ARTICLE{guy1991vowel, AUTHOR = {Guy, Jacques B.M.}, TITLE = {{Vowel Identification: An Old (but Good) Algorithm}}, JOURNAL = {Cryptologia}, YEAR = {1991}, VOLUME = {15}, NUMBER = {3}, PAGES = {258--262}, } @ARTICLE{houston2003has, AUTHOR = {Houston, Stephen D. and Coe, Michael D.}, TITLE = {{Has Isthmian Writing Been Deciphered}}, JOURNAL = {Mexicon}, YEAR = {2003}, VOLUME = {25}, NUMBER = {6}, PAGES = {151--161}, } @INPROCEEDINGS{hu2005sed, AUTHOR = {Hu, Yu and Matveeva, Irina and Goldsmith, John and Sprague, Colin}, TITLE = {{The SED Heuristic for Morpheme Discovery: A Look at Swahili}}, BOOKTITLE = {{Proceedings of the Workshop on Psychocomputational Models of Human Language Acquisition}}, YEAR = {2005}, PAGES = {28--35}, } @ARTICLE{justeson1993decipherment, AUTHOR = {Justeson, John S. and Kaufman, Terrence}, TITLE = {{A Decipherment of Epi-Olmec Hieroglyphic Writing}}, JOURNAL = {Science}, YEAR = {1993}, VOLUME = {259}, NUMBER = {5102}, PAGES = {1703--1711}, } @INPROCEEDINGS{kim2013unsupervised, AUTHOR = {Kim, Young-Bum and Snyder, Benjamin}, TITLE = {{Unsupervised Consonant-Vowel Prediction over Hundreds of Languages}}, BOOKTITLE = {{Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics}}, ADDRESS = {Sofia, Bulgaria}, YEAR = {2013}, PAGES = {1527--1536}, } @INPROCEEDINGS{kim2012universal, AUTHOR = {Kim, Young-Bum and Snyder, Benjamin}, TITLE = {{Universal Grapheme-to-Phoneme Prediction over Latin Alphabets}}, BOOKTITLE = {{Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning}}, YEAR = {2012}, PAGES = {332--343}, } @INPROCEEDINGS{knight2011copiale, AUTHOR = {Knight, Kevin and Megyesi, Beáta and Schaefer, Christiane}, TITLE = {{The Copiale Cipher}}, BOOKTITLE = {{Proceedings of the 4th Workshop on Building and Using Comparable Corpora: Comparable Corpora and the Web}}, YEAR = {2011}, PAGES = {2--9}, } @INPROCEEDINGS{knight2006unsupervised, AUTHOR = {Knight, Kevin and Nair, Anish and Rathod, Nishit and Yamada, Kenji}, TITLE = {{Unsupervised Analysis for Decipherment Problems}}, BOOKTITLE = {{Proceedings of the COLING/ACL Conference}}, YEAR = {2006}, PAGES = {499--506}, } @INPROCEEDINGS{knight1999computational, AUTHOR = {Knight, Kevin and Yamada, Kenji}, TITLE = {{A Computational Approach to Deciphering Unknown Scripts}}, BOOKTITLE = {{ACL Workshop on Unsupervised Learning in Natural Language Processing}}, YEAR = {1999}, PAGES = {37--44}, } @ARTICLE{kober1946inflection, AUTHOR = {Kober, Alice E.}, TITLE = {{Inflection in Linear Class B: 1-Declension}}, JOURNAL = {American Journal of Archaeology}, YEAR = {1946}, PAGES = {268--276}, } @ARTICLE{de2006discovering, AUTHOR = {Lin, Shou and Knight, Kevin}, TITLE = {{Discovering the Linear Writing Order of a Two-Dimensional Ancient Hieroglyphic Script}}, JOURNAL = {Artificial Intelligence}, YEAR = {2006}, VOLUME = {170}, NUMBER = {4}, PAGES = {409--421}, } @ARTICLE{macri1996rongorongo, AUTHOR = {Macri, Martha J.}, TITLE = {{RongoRongo of Easter Island}}, JOURNAL = {The world's writing systems}, YEAR = {1996}, PAGES = {183--188}, } @BOOK{melchert2003luwians, AUTHOR = {Melchert, H. Craig}, TITLE = {{The Luwians}}, PUBLISHER = {Brill}, ADDRESS = {Leuven}, YEAR = {2003}, } @ARTICLE{moler1983singular, AUTHOR = {Moler, Cleve and Morrison, Donald}, TITLE = {{Singular Value Analysis of Cryptograms}}, JOURNAL = {American Mathematical Monthly}, YEAR = {1983}, VOLUME = {90}, PAGES = {78--87}, } @INPROCEEDINGS{mukherjee2009discovering, AUTHOR = {Mukherjee, Animesh and Choudhury, Monojit and Kannan, Ravi}, TITLE = {{Discovering Global Patterns in Linguistic Networks through Spectral Analysis: A Case Study of the Consonant Inventories}}, BOOKTITLE = {{Proceedings of the 12th Conference of the European Chapter of the Association for Computational Linguistics}}, YEAR = {2009}, PAGES = {585--593}, } @ARTICLE{mukherjee2007modeling, AUTHOR = {Mukherjee, Animesh and Choudhury, Monojit and Basu, Anupam and Ganguly, Niloy}, TITLE = {{Modeling the Co-occurrence Principles of the Consonant Inventories: A Complex Network Approach}}, JOURNAL = {International Journal of Modern Physics C}, YEAR = {2007}, VOLUME = {18}, NUMBER = {02}, PAGES = {281--295}, } @BOOK{ohaver33, AUTHOR = {Ohaver, Merle E.}, TITLE = {{Cryptogram Solving}}, PUBLISHER = {Etcetera Press}, ADDRESS = {Columbus, OH}, YEAR = {1933}, } @INPROCEEDINGS{penn2006quantitative, AUTHOR = {Penn, Gerald and Choma, Travis}, TITLE = {{Quantitative Methods for Classifying Writing Systems}}, BOOKTITLE = {{Proceedings of the Human Language Technology Conference of the NAACL, Companion Volume: Short Papers}}, YEAR = {2006}, PAGES = {117--120}, } @ARTICLE{rao2010probabilistic, AUTHOR = {Rao, Rajesh P.N.}, TITLE = {{Probabilistic Analysis of an Ancient Undeciphered Script}}, JOURNAL = {IEEE Computer}, YEAR = {2010}, VOLUME = {43}, NUMBER = {4}, PAGES = {76--80}, } @ARTICLE{rao2009markov, AUTHOR = {Rao, Rajesh P.N. and Yadav, Nisha and Vahia, Mayank N. and Joglekar, Hrishikesh and Adhikari, R. and Mahadevan, Iravatham}, TITLE = {{A Markov Model of the Indus Script}}, JOURNAL = {Proceedings of the National Academy of Sciences}, YEAR = {2009}, VOLUME = {106}, NUMBER = {33}, PAGES = {13685--13690}, } @INPROCEEDINGS{ravi2011bayesian, AUTHOR = {Ravi, Sujith and Knight, Kevin}, TITLE = {{Bayesian Inference for Zodiac and Other Homophonic Ciphers}}, BOOKTITLE = {{Proceedings of the 49th Annual Meeting of the Association for omputational Linguistics: Human Language Technologies-Volume 1}}, YEAR = {2011}, PAGES = {239--247}, } @INPROCEEDINGS{reddy2011we, AUTHOR = {Reddy, Sravana and Knight, Kevin}, TITLE = {{What We Know about the Voynich Manuscript}}, BOOKTITLE = {{Proceedings of the 5th ACL-HLT Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities}}, ADDRESS = {Portland, OR}, YEAR = {2011}, PAGES = {78--86}, } @BOOK{robinson2002lost, AUTHOR = {Robinson, Andrew}, TITLE = {{Lost Languages: The Enigma of the World's Undeciphered Scripts}}, PUBLISHER = {McGraw-Hill}, ADDRESS = {New York}, YEAR = {2002}, } @BOOK{saussure1966course, AUTHOR = {Saussure, Ferdinand de}, EDITOR = {Bally, Charles and Sechehaye, Albert and Riedlinger, Albert}, TRANSLATOR = {Wade, Baskin}, TITLE = {{Course in General Linguistics}}, PUBLISHER = {McGraw-Hill}, ADDRESS = {New York}, YEAR = {1966}, } @INPROCEEDINGS{snyder2010statistical, AUTHOR = {Snyder, Benjamin and Barzilay, Regina and Knight, Kevin}, TITLE = {{A Statistical Model for Lost Language Decipherment}}, BOOKTITLE = {{Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics}}, YEAR = {2010}, PAGES = {1048--1057}, } @BOOK{sproat2000computational, AUTHOR = {Sproat, Richard William}, TITLE = {{A Computational Theory of Writing Systems}}, PUBLISHER = {MIT Press}, ADDRESS = {Cambridge, MA}, YEAR = {2000}, } @ARTICLE{sproat2010ancient, AUTHOR = {Sproat, Richard}, TITLE = {{Ancient Symbols, Computational Linguistics, and the Reviewing Practices of the General Science Journals}}, JOURNAL = {Computational Linguistics}, YEAR = {2010}, VOLUME = {36}, NUMBER = {3}, PAGES = {585--594}, } @BOOK{Strang, AUTHOR = {Strang,Gilbert}, TITLE = {{Linear Algebra and Its Applications}}, EDITION = {4}, PUBLISHER = {Brooks/Cole Publishing Company}, ADDRESS = {Pacific Grove, CA}, YEAR = {2005}, } @ARTICLE{stubbs-barth03, AUTHOR = {Stubbs, Michael and Barth, Isabel}, TITLE = {{Using Recurrent Phrases as Text-Type}}, JOURNAL = {Functions of language}, YEAR = {2003}, VOLUME = {10}, NUMBER = {1}, PAGES = {61--104}, } @ARTICLE{sukhotin62, AUTHOR = {Sukhotin, B.V.}, AUTHOR_ORIGINAL = {Сухотин, Б.В.}, AUTHOR+AN = {1=ru-Cyrl}, TITLE = {{Экспериментальное выделение классов букв с помощью электронной вычислительной ма\-ши\-ны [Experimental Selection of Letter Classes with the Help of Electronic Digital Machines]}}, JOURNAL = {Проблемы структурной лингвистики [Problems of Structural Linguistics]}, YEAR = {1962}, VOLUME = {234}, PAGES = {198--106}, } @MISC{OED, AUTHOR = {Weiner, Edmund}, TITLE = {{Early Modern English Pronunciation and Spelling}}, YEAR = {2013}, NOTE = {\url{http://public.oed.com/aspects-of-english/english-in-time/early-modern-english-pronunciation-and-spelling/} [Accessed: 2014-07-29]}, } @INPROCEEDINGS{wu2012corpora, AUTHOR = {Wu, Katherine and Solman, Jennifer and Linehan, Ruth and Sproat, Richard}, TITLE = {{Corpora of Non-linguistic Symbol Systems}}, BOOKTITLE = {{LSA Annual Meeting Extended Abstracts}}, YEAR = {2012}, } @ARTICLE{goldsmith-xanthos09, AUTHOR = {Goldsmith, J. and Xanthos, A.}, TITLE = {{Learning Phonological Categories}}, JOURNAL = {Language}, YEAR = {2009}, VOLUME = {85}, NUMBER = {1}, PAGES = {4--38}, } Patricia Thaine & Gerald Penn (2019), Vocalic and Consonantal Grapheme Classification through Spectral Decomposition, in Proceedings of Graphemics in the 21st Century, Brest 2018 (Yannis Haralambous, Ed.), Brest: Fluxus Editions, 367–386
@INPROCEEDINGS{gla1-thai, AUTHOR = {Thaine, Patricia and Penn, Gerald}, EDITOR = {Haralambous, Yannis}, TITLE = {{Vocalic and Consonantal Grapheme Classification through Spectral Decomposition}}, BOOKTITLE = {{Proceedings of Graphemics in the 21st Century, Brest 2018}}, PUBLISHER = {Fluxus Editions}, ADDRESS = {Brest}, YEAR = {2019}, PAGES = {367--386}, DOI = {https://doi.org/10.36824/2018-graf-thai}, } |