Saturday, October 4, 2025

 Frequency analysis of Pyu corpus text


The simplest quantitative analysis of the Pyu corpus would be the frequency analysis of features. We will use the dfm created from the previous analysis.

library(quanteda)
library(quanteda.textstats)
library(quanteda.textplots)
library(RColorBrewer)

# first we change the docnames text1, text2 etc., to the Inscription number of the Pyu inscriptions  
docnames(pyu_dfm) <- x1_df.1$InscriptionNumber
print(pyu_dfm)
Document-feature matrix of: 196 documents, 1,893 features (99.01% sparse) and 0 docvars.
     features
docs  @|| ḅay·ṁḥ dak·ṃ viy·ṃṁ tim·ṁ mlik· °o saḥ tgaṃ knon·
  001   1      2     1      1     1     1  3   1    1     1
  002   0      0     0      0     0     0  0   0    0     0
  003   1      0     0      0     0     0  4   0    0     0
  004   1      0     0      0     0     0  3   0    0     0
  005   1      0     0      0     0     0  3   0    0     0
  006   1      0     0      0     0     0  3   0    0     0
[ reached max_ndoc ... 190 more documents, reached max_nfeat ... 1,883 more features ]
# get frequencies of features  
tstat_freq <- textstat_frequency(pyu_dfm) 

#  view 30 most frequent features
 library(kableExtra)
 tstat_freq[1:30, ] %>%
  kbl() %>%
    kable_styling(full_width = F, font_size = 10) %>%
    column_spec(1, width = "2") %>%
    row_spec(0, background = "lightgrey") %>%
    row_spec(1:30, background = "lightblue") %>%  
    kable_styling(bootstrap_options = "condensed")
featurefrequencyrankdocfreqgroup
°o320148all
tiṁ190232all
ḅaṁḥ93327all
ta93324all
yaṁ85535all
ḅiṁḥ81616all
tin·ṁ73711all
ḅay·ṁḥ61811all
//5893all
||551035all
tar·551010all
///531218all
ḅin·ṁḥ471310all
ḅa451419all
saḥ441519all
gi42166all
tim·ṁ411711all
ma391813all
pau37196all
tdav·ṃḥ342010all
ḅaḥ332111all
kdaṅ·332112all
dav·ṃḥ29238all
mra282416all
ḅiṁ28248all
tir·ṁ27266all
priṅ·ḥ26278all
traḥ25283all
pay·ṁḥ25289all
/24302all
 
# create wordcloud of features   
set.seed(132)
textplot_wordcloud(pyu_dfm, max_size = 14, max_words = 200, color = rev(RColorBrewer::brewer.pal(10, "Spectral")))

No comments:

Post a Comment