ISSN: 2534-5192 (electronic) – 2681-8566 (print)| ◀ Ballier, Pacquetet & Arnold | ▲ Proceedings |
![]() ISBN: 978-2-9570549-0-9 e-ISBN: 978-2-9570549-1-6 ![]() | Vocalic and Consonantal Grapheme Classification through Spectral
Decomposition Patricia Thaine & Gerald Penn ![]() Abstract. We consider two related problems in this paper. Given an undeciphered alphabetic writing system or mono-alphabetic cipher, determine: (1) which of its letters correspond to vowels and which to consonants; and (2) whether the writing system is a vocalic alphabet or an abjad. We are able to show that a very simple spectral decomposition based on character co-occurrences provides nearly perfect performance with respect to answering both question types. DOI: https://doi.org/10.36824/2018-graf-thai
@MISC{VoynichManuscript,
AUTHOR = {Gillogly, Jim},
TITLE = {{Voynich Manuscript}},
YEAR = {2002},
}
@ARTICLE{berg12,
AUTHOR = {Berg, K.},
TITLE = {{Identifying Graphematic Units}},
JOURNAL = {Written Language \& Literacy},
YEAR = {2012},
VOLUME = {15},
NUMBER = {1},
PAGES = {26--45},
}
@BOOK{adler2001cross,
AUTHOR = {Adler, Leonore Loeb and Gielen, Uwe Peter},
TITLE = {{Cross-Cultural Topics in Psychology}},
PUBLISHER = {Greenwood Publishing Group},
ADDRESS = {Westport, CO},
YEAR = {2001},
}
@MISC{ager1omniglot,
AUTHOR = {Ager,Simon},
TITLE = {{Writing direction index}},
YEAR = {2015},
NOTE = {\url{http://www.omniglot.com/writing/direction.htm} [Accessed: 2014-07-30]},
}
@BOOK{anderson1993writing,
AUTHOR = {Anderson, Lloyd B.},
TITLE = {{The Writing System of La Mojarra and Associated Monuments}},
PUBLISHER = {Ecological Linguistics},
YEAR = {1993},
VOLUME = {1},
}
@INPROCEEDINGS{belkin2002using,
AUTHOR = {Belkin, Mikhail and Goldsmith, John},
TITLE = {{Using Eigenvectors of the Bigram Graph to Infer Morpheme Identity}},
BOOKTITLE = {{Proceedings of the ACL-02 Workshop on Morphological and Phonological
Learning---Volume 6}},
YEAR = {2002},
PAGES = {41--47},
}
@BOOK{chadwick1990decipherment,
AUTHOR = {Chadwick, John},
TITLE = {{The Decipherment of Linear B}},
PUBLISHER = {Cambridge University Press},
ADDRESS = {Cambridge},
YEAR = {1990},
}
@MISC{Currier1976voynich,
AUTHOR = {Currier, Captain Prescott H.},
TITLE = {{Papers on the Voynich Manuscript}},
YEAR = {2013},
NOTE = {\url{http://www.voynich.nu/extra/curr_pdfs.html} [Accessed: 2014-07-30]},
}
@BOOK{daniels1996world,
AUTHOR = {Daniels, Peter T. and Bright, William},
TITLE = {{The World's Writing Systems}},
PUBLISHER = {Oxford University Press},
ADDRESS = {Oxford},
YEAR = {1996},
}
@ARTICLE{farmer2004collapse,
AUTHOR = {Farmer, Steve and Sproat, Richard and Witzel, Michael},
TITLE = {{The Collapse of the Indus-Script Thesis: The Myth of a Literate Harappan
Civilization}},
JOURNAL = {Electronic Journal of Vedic Studies},
YEAR = {2004},
VOLUME = {11},
NUMBER = {2},
PAGES = {19--57},
}
@MISC{Gillogly2002Voynich,
AUTHOR = {Gillogly, Jim},
TITLE = {{Voynich manuscript}},
YEAR = {2002},
NOTE = {\url{http://www.ic.unicamp.br/~stolfi/voynich/mirror/gillogly/voynich.orig}
[Accessed: 2014-07-30]},
}
@INPROCEEDINGS{goldsmith2004signatures,
AUTHOR = {Goldsmith, John and Hu, Yu},
TITLE = {{From Signatures to Finite State Automata}},
BOOKTITLE = {{Midwest Computational Linguistics Colloquium. Bloomington, Indiana}},
YEAR = {2004},
}
@ARTICLE{goldsmith2001unsupervised,
AUTHOR = {Goldsmith, John},
TITLE = {{Unsupervised Learning of the Morphology of a Natural Language}},
JOURNAL = {Computational linguistics},
YEAR = {2001},
VOLUME = {27},
NUMBER = {2},
PAGES = {153--198},
}
@ARTICLE{guy1991statistical,
AUTHOR = {Guy, Jacques B.M.},
TITLE = {{Statistical Properties of Two Folios of the Voynich Manuscript}},
JOURNAL = {Cryptologia},
YEAR = {1991},
VOLUME = {15},
NUMBER = {3},
PAGES = {207--218},
}
@ARTICLE{guy1991vowel,
AUTHOR = {Guy, Jacques B.M.},
TITLE = {{Vowel Identification: An Old (but Good) Algorithm}},
JOURNAL = {Cryptologia},
YEAR = {1991},
VOLUME = {15},
NUMBER = {3},
PAGES = {258--262},
}
@ARTICLE{houston2003has,
AUTHOR = {Houston, Stephen D. and Coe, Michael D.},
TITLE = {{Has Isthmian Writing Been Deciphered}},
JOURNAL = {Mexicon},
YEAR = {2003},
VOLUME = {25},
NUMBER = {6},
PAGES = {151--161},
}
@INPROCEEDINGS{hu2005sed,
AUTHOR = {Hu, Yu and Matveeva, Irina and Goldsmith, John and Sprague, Colin},
TITLE = {{The SED Heuristic for Morpheme Discovery: A Look at Swahili}},
BOOKTITLE = {{Proceedings of the Workshop on Psychocomputational Models of Human
Language Acquisition}},
YEAR = {2005},
PAGES = {28--35},
}
@ARTICLE{justeson1993decipherment,
AUTHOR = {Justeson, John S. and Kaufman, Terrence},
TITLE = {{A Decipherment of Epi-Olmec Hieroglyphic Writing}},
JOURNAL = {Science},
YEAR = {1993},
VOLUME = {259},
NUMBER = {5102},
PAGES = {1703--1711},
}
@INPROCEEDINGS{kim2013unsupervised,
AUTHOR = {Kim, Young-Bum and Snyder, Benjamin},
TITLE = {{Unsupervised Consonant-Vowel Prediction over Hundreds of Languages}},
BOOKTITLE = {{Proceedings of the 51st Annual Meeting of the Association for
Computational Linguistics}},
ADDRESS = {Sofia, Bulgaria},
YEAR = {2013},
PAGES = {1527--1536},
}
@INPROCEEDINGS{kim2012universal,
AUTHOR = {Kim, Young-Bum and Snyder, Benjamin},
TITLE = {{Universal Grapheme-to-Phoneme Prediction over Latin Alphabets}},
BOOKTITLE = {{Proceedings of the 2012 Joint Conference on Empirical Methods in Natural
Language Processing and Computational Natural Language Learning}},
YEAR = {2012},
PAGES = {332--343},
}
@INPROCEEDINGS{knight2011copiale,
AUTHOR = {Knight, Kevin and Megyesi, Beáta and Schaefer, Christiane},
TITLE = {{The Copiale Cipher}},
BOOKTITLE = {{Proceedings of the 4th Workshop on Building and Using Comparable Corpora:
Comparable Corpora and the Web}},
YEAR = {2011},
PAGES = {2--9},
}
@INPROCEEDINGS{knight2006unsupervised,
AUTHOR = {Knight, Kevin and Nair, Anish and Rathod, Nishit and Yamada, Kenji},
TITLE = {{Unsupervised Analysis for Decipherment Problems}},
BOOKTITLE = {{Proceedings of the COLING/ACL Conference}},
YEAR = {2006},
PAGES = {499--506},
}
@INPROCEEDINGS{knight1999computational,
AUTHOR = {Knight, Kevin and Yamada, Kenji},
TITLE = {{A Computational Approach to Deciphering Unknown Scripts}},
BOOKTITLE = {{ACL Workshop on Unsupervised Learning in Natural Language Processing}},
YEAR = {1999},
PAGES = {37--44},
}
@ARTICLE{kober1946inflection,
AUTHOR = {Kober, Alice E.},
TITLE = {{Inflection in Linear Class B: 1-Declension}},
JOURNAL = {American Journal of Archaeology},
YEAR = {1946},
PAGES = {268--276},
}
@ARTICLE{de2006discovering,
AUTHOR = {Lin, Shou and Knight, Kevin},
TITLE = {{Discovering the Linear Writing Order of a Two-Dimensional Ancient
Hieroglyphic Script}},
JOURNAL = {Artificial Intelligence},
YEAR = {2006},
VOLUME = {170},
NUMBER = {4},
PAGES = {409--421},
}
@ARTICLE{macri1996rongorongo,
AUTHOR = {Macri, Martha J.},
TITLE = {{RongoRongo of Easter Island}},
JOURNAL = {The world's writing systems},
YEAR = {1996},
PAGES = {183--188},
}
@BOOK{melchert2003luwians,
AUTHOR = {Melchert, H. Craig},
TITLE = {{The Luwians}},
PUBLISHER = {Brill},
ADDRESS = {Leuven},
YEAR = {2003},
}
@ARTICLE{moler1983singular,
AUTHOR = {Moler, Cleve and Morrison, Donald},
TITLE = {{Singular Value Analysis of Cryptograms}},
JOURNAL = {American Mathematical Monthly},
YEAR = {1983},
VOLUME = {90},
PAGES = {78--87},
}
@INPROCEEDINGS{mukherjee2009discovering,
AUTHOR = {Mukherjee, Animesh and Choudhury, Monojit and Kannan, Ravi},
TITLE = {{Discovering Global Patterns in Linguistic Networks through Spectral Analysis:
A Case Study of the Consonant Inventories}},
BOOKTITLE = {{Proceedings of the 12th Conference of the European Chapter of the
Association for Computational Linguistics}},
YEAR = {2009},
PAGES = {585--593},
}
@ARTICLE{mukherjee2007modeling,
AUTHOR = {Mukherjee, Animesh and Choudhury, Monojit and Basu, Anupam and Ganguly, Niloy},
TITLE = {{Modeling the Co-occurrence Principles of the Consonant Inventories: A Complex
Network Approach}},
JOURNAL = {International Journal of Modern Physics C},
YEAR = {2007},
VOLUME = {18},
NUMBER = {02},
PAGES = {281--295},
}
@BOOK{ohaver33,
AUTHOR = {Ohaver, Merle E.},
TITLE = {{Cryptogram Solving}},
PUBLISHER = {Etcetera Press},
ADDRESS = {Columbus, OH},
YEAR = {1933},
}
@INPROCEEDINGS{penn2006quantitative,
AUTHOR = {Penn, Gerald and Choma, Travis},
TITLE = {{Quantitative Methods for Classifying Writing Systems}},
BOOKTITLE = {{Proceedings of the Human Language Technology Conference of the NAACL,
Companion Volume: Short Papers}},
YEAR = {2006},
PAGES = {117--120},
}
@ARTICLE{rao2010probabilistic,
AUTHOR = {Rao, Rajesh P.N.},
TITLE = {{Probabilistic Analysis of an Ancient Undeciphered Script}},
JOURNAL = {IEEE Computer},
YEAR = {2010},
VOLUME = {43},
NUMBER = {4},
PAGES = {76--80},
}
@ARTICLE{rao2009markov,
AUTHOR = {Rao, Rajesh P.N. and Yadav, Nisha and Vahia, Mayank N. and Joglekar,
Hrishikesh and Adhikari, R. and Mahadevan, Iravatham},
TITLE = {{A Markov Model of the Indus Script}},
JOURNAL = {Proceedings of the National Academy of Sciences},
YEAR = {2009},
VOLUME = {106},
NUMBER = {33},
PAGES = {13685--13690},
}
@INPROCEEDINGS{ravi2011bayesian,
AUTHOR = {Ravi, Sujith and Knight, Kevin},
TITLE = {{Bayesian Inference for Zodiac and Other Homophonic Ciphers}},
BOOKTITLE = {{Proceedings of the 49th Annual Meeting of the Association for
omputational Linguistics: Human Language Technologies-Volume 1}},
YEAR = {2011},
PAGES = {239--247},
}
@INPROCEEDINGS{reddy2011we,
AUTHOR = {Reddy, Sravana and Knight, Kevin},
TITLE = {{What We Know about the Voynich Manuscript}},
BOOKTITLE = {{Proceedings of the 5th ACL-HLT Workshop on Language Technology for
Cultural Heritage, Social Sciences, and Humanities}},
ADDRESS = {Portland, OR},
YEAR = {2011},
PAGES = {78--86},
}
@BOOK{robinson2002lost,
AUTHOR = {Robinson, Andrew},
TITLE = {{Lost Languages: The Enigma of the World's Undeciphered Scripts}},
PUBLISHER = {McGraw-Hill},
ADDRESS = {New York},
YEAR = {2002},
}
@BOOK{saussure1966course,
AUTHOR = {Saussure, Ferdinand de},
EDITOR = {Bally, Charles and Sechehaye, Albert and Riedlinger, Albert},
TRANSLATOR = {Wade, Baskin},
TITLE = {{Course in General Linguistics}},
PUBLISHER = {McGraw-Hill},
ADDRESS = {New York},
YEAR = {1966},
}
@INPROCEEDINGS{snyder2010statistical,
AUTHOR = {Snyder, Benjamin and Barzilay, Regina and Knight, Kevin},
TITLE = {{A Statistical Model for Lost Language Decipherment}},
BOOKTITLE = {{Proceedings of the 48th Annual Meeting of the Association for
Computational Linguistics}},
YEAR = {2010},
PAGES = {1048--1057},
}
@BOOK{sproat2000computational,
AUTHOR = {Sproat, Richard William},
TITLE = {{A Computational Theory of Writing Systems}},
PUBLISHER = {MIT Press},
ADDRESS = {Cambridge, MA},
YEAR = {2000},
}
@ARTICLE{sproat2010ancient,
AUTHOR = {Sproat, Richard},
TITLE = {{Ancient Symbols, Computational Linguistics, and the Reviewing
Practices of the General Science Journals}},
JOURNAL = {Computational Linguistics},
YEAR = {2010},
VOLUME = {36},
NUMBER = {3},
PAGES = {585--594},
}
@BOOK{Strang,
AUTHOR = {Strang,Gilbert},
TITLE = {{Linear Algebra and Its Applications}},
EDITION = {4},
PUBLISHER = {Brooks/Cole Publishing Company},
ADDRESS = {Pacific Grove, CA},
YEAR = {2005},
}
@ARTICLE{stubbs-barth03,
AUTHOR = {Stubbs, Michael and Barth, Isabel},
TITLE = {{Using Recurrent Phrases as Text-Type}},
JOURNAL = {Functions of language},
YEAR = {2003},
VOLUME = {10},
NUMBER = {1},
PAGES = {61--104},
}
@ARTICLE{sukhotin62,
AUTHOR = {Sukhotin, B.V.},
AUTHOR_ORIGINAL = {Сухотин, Б.В.},
AUTHOR+AN = {1=ru-Cyrl},
TITLE = {{Экспериментальное выделение классов букв с помощью электронной вычислительной
ма\-ши\-ны [Experimental Selection of Letter Classes with the Help of Electronic
Digital Machines]}},
JOURNAL = {Проблемы структурной лингвистики [Problems of Structural Linguistics]},
YEAR = {1962},
VOLUME = {234},
PAGES = {198--106},
}
@MISC{OED,
AUTHOR = {Weiner, Edmund},
TITLE = {{Early Modern English Pronunciation and Spelling}},
YEAR = {2013},
NOTE = {\url{http://public.oed.com/aspects-of-english/english-in-time/early-modern-english-pronunciation-and-spelling/} [Accessed: 2014-07-29]},
}
@INPROCEEDINGS{wu2012corpora,
AUTHOR = {Wu, Katherine and Solman, Jennifer and Linehan, Ruth and Sproat, Richard},
TITLE = {{Corpora of Non-linguistic Symbol Systems}},
BOOKTITLE = {{LSA Annual Meeting Extended Abstracts}},
YEAR = {2012},
}
@ARTICLE{goldsmith-xanthos09,
AUTHOR = {Goldsmith, J. and Xanthos, A.},
TITLE = {{Learning Phonological Categories}},
JOURNAL = {Language},
YEAR = {2009},
VOLUME = {85},
NUMBER = {1},
PAGES = {4--38},
}
Patricia Thaine & Gerald Penn
@INPROCEEDINGS{gla1-thai,
AUTHOR = {Thaine, Patricia and Penn, Gerald},
EDITOR = {Haralambous, Yannis},
TITLE = {{Vocalic and Consonantal Grapheme Classification through Spectral
Decomposition}},
BOOKTITLE = {{Proceedings of Graphemics in the 21st Century, Brest 2018}},
PUBLISHER = {Fluxus Editions},
ADDRESS = {Brest},
YEAR = {2019},
PAGES = {367--386},
DOI = {https://doi.org/10.36824/2018-graf-thai},
}
|