作者:熊子瑜
脚本ID:Praat.XZY20211214.021
上传时间:2021年12月14日
简介:此脚本可针对用户所指定的某个声音文件夹(全路径,以“\”结束),对每个声音文件所对应的PitchTier文件或Pitch文件进行分析(PitchTier文件优先,其次是Pitch文件),从这些音高数据文件中提取出该发音人的全部音高数据,然后将超出用户所设定的频率范围的音高数据予以剔除,还可以根据需要将频率数据转换为半音,最后测算出发音人的音高基本数据(如均值、标准差等), 并绘制出音高数据的频数分布图。下面给出了某发音人的音高数据的频数分布图,上图是Hz标度,下图是半音标度,有效音高数据范围均设定为60Hz至240Hz,转换为半音时所采用的参考频率为发音人的音高中值。


#By XIONG Ziyu
#last modify: 2021/12/30
#Praat version: 6.1.05
#功能:针对用户所指定的某个声音文件夹(全路径,以“\”结束),
# 对每个声音文件所对应的PitchTier文件或Pitch文件进行分析,
# PitchTier文件优先,其次是Pitch文件,
# 从这些音高数据文件中提取出全部音高数据,并据此测算出发音人的音高基本数据:
# 音高均值、音高标准差、音高中值、音域上限(97.5%)、音域下限(2.5%)等;
# 脚本程序还会绘制出音高数据的频数分布图,以帮助用户确定发音人的音域范围,
# 可以选择采用Hz标度,还是采用半音标度,
# 如果选择半音标度,脚本程序会以发音人的音高中值为参考率进行转换。
form 测算发音人的音域范围
sentence Sound_FilePath_(声音路径) F:\语音库\北京\ABC\
sentence Speaker_Code_(发音人编码)
positive Pitch_Max_(上限频率:Hz) 600
positive Pitch_Min_(下限频率:Hz) 60
real Stylized_(简化音高点:半音) 0
real Smooth_Pitch_(平滑处理:Hz) 10
boolean Semitones_(作图时换算为半音) 1
endform
#清空主窗口对象列表中的数据对象
Create TextGrid: 0, 1, "Mary John bell", "bell"
select all
Remove
#将Praat读取和存储文本文件的编码格式设定为UTF8,避免汉字乱码
Text reading preferences: "UTF-8"
Text writing preferences: "UTF-8"
#判断是文件路径还是文件名列表
sound_FilePath$=replace$(sound_FilePath$,"/","\",0)
if right$(sound_FilePath$,1)!="\"
sound_FilePath$=sound_FilePath$+"\"
endif
danwei$="Hz"
if semitones=1
danwei$="St"
endif
Create Strings as file list: "List", "'sound_FilePath$'*.wav"
selectObject: "Strings List"
fileNum=Get number of strings
if fileNum=0
exitScript: "没有发现声音文件!"+newline$+newline$
else
spkData$=left$(sound_FilePath$,length(sound_FilePath$)-1)+"('pitch_Min'-'pitch_Max')-'danwei$'-'stylized:1'-'smooth_Pitch:0'.ini"
call GETSPEAKERDATA
endif
Create TextGrid: 0, 1, "Mary John bell", "bell"
select all
Remove
exitScript: "操作过程已结束,请检查数据结果!"+newline$+newline$
procedure GETSPEAKERDATA
Create Table with column names: "ALL", 0, "value stylize"
Create Table with column names: "STYLIZED", 0, "value"
aNum=0
sNum=0
allCount=0
oriCount=0
allDuration=0
chafen=0
chafenNum=0
for f from 1 to fileNum
selectObject: "Strings List"
filename$=Get string: 'f'
filename$=sound_FilePath$+filename$
soundfile$=filename$
pitchfile$=filename$-".wav"+".Pitch"
pitchtierfile$=filename$-".wav"+".PitchTier"
stylefile$=filename$-".wav"+"('pitch_Min'-'pitch_Max')-'danwei$'-'stylized:1'-'smooth_Pitch:0'.Stylized"
echo 'f'/'fileNum''newline$''soundfile$'
dataok=0
if fileReadable(pitchtierfile$)
Read from file: "'pitchtierfile$'"
dataok=1
endif
if dataok=0 and fileReadable(pitchfile$)
Read from file: "'pitchfile$'"
Down to PitchTier
Save as text file: "'pitchtierfile$'"
dataok=1
endif
if dataok=1
sname$=selected$("PitchTier")
pNums=Get number of points
oriCount=oriCount+pNums
kk=0
for p from 1 to pNums
v=Get value at index: 'p'
if v>pitch_Max or v<pitch_Min
Remove point: 'p'
p=p-1
pNums=pNums-1
endif
endfor
pNums=Get number of points
allCount=allCount+pNums
if fileReadable(stylefile$)=0
selectObject: "PitchTier 'sname$'"
Copy: "TMP"
if stylized>0 and smooth_Pitch>0
To Pitch: 0.01, 'pitch_Min', 'pitch_Max'
Smooth: 10
Down to PitchTier
endif
if stylized>0
Stylize: 'stylized', "Semitones"
endif
Rename: "STYLIZED"
Save as text file: "'stylefile$'"
else
Read from file: "'stylefile$'"
Rename: "STYLIZED"
endif
selectObject: "PitchTier 'sname$'"
pNums=Get number of points
for p from 1 to pNums
v1'p'=Get value at index: 'p'
t'p' =Get time from index: 'p'
endfor
stime=t1
etime=t'pNums'
duration=etime-stime
allDuration=allDuration+duration
selectObject: "PitchTier STYLIZED"
for p from 1 to pNums
t=t'p'
v2'p'=Get value at time: 't'
endfor
selectObject: "Table ALL"
for p from 1 to pNums
v1=v1'p'
v2=v2'p'
Append row
aNum=aNum+1
Set numeric value: 'aNum', "value", 'v1:1'
Set numeric value: 'aNum', "stylize", 'v2:1'
endfor
selectObject: "PitchTier STYLIZED"
prev=0
pNums=Get number of points
for p from 1 to pNums
v'p'=Get value at index: 'p'
if p>1
v=v'p'
st=((12*log10(v/prev)/log10(2))^2)^0.5
chafen=chafen+st
chafenNum=chafenNum+1
endif
prev=v'p'
endfor
selectObject: "Table STYLIZED"
for p from 1 to pNums
v=v'p'
Append row
sNum=sNum+1
Set numeric value: 'sNum', "value", 'v:1'
endfor
endif
Create TextGrid: 0, 1, "Mary John bell", "bell"
select all
minus Strings List
minus Table ALL
minus Table STYLIZED
Remove
endfor
call DRAWDATA STYLIZED
endproc
procedure DRAWDATA tName$
Erase all
clearinfo
filedelete 'spkData$'
selectObject: "Table ALL"
relation$=Report correlation (Pearson r): "value", "stylize", 0.025
rel=extractNumber(relation$,"=")
rel='rel:3'
selectObject: "Table 'tName$'"
newCount=Get number of rows
pMid=Get quantile: "value", 0.5
ospkMid=pMid
if semitones=1
selectObject: "Table 'tName$'"
Formula: "value", "12*log10(self/'pMid')/log10(2)"
endif
if newCount>0
spkMean=Get mean: "value"
spkStdv=Get standard deviation: "value"
spkMax=Get quantile: "value", 0.975
spkMid=Get quantile: "value", 0.50
spkMin=Get quantile: "value", 0.025
pmin=Get minimum: "value"
pmax=Get maximum: "value"
if semitones=0
cmin=floor(pmin/10)*10
cmax=ceiling(pmax/10)*10
else
cmin=-15
cmax=15
endif
cmin='cmin:0'
cmax='cmax:0'
if semitones=0
nums=ceiling((cmax-cmin)/10)
step=20
if cmax-cmin>200
nums=ceiling((cmax-cmin)/15)
step=30
endif
if cmax-cmin>280
nums=ceiling((cmax-cmin)/20)
step=40
endif
else
nums=ceiling((cmax-cmin))
step=2
endif
Down to Matrix
Black
Axes: 0, 1, 0, 1
Select outer viewport: 0, 6, 0, 3
Draw rectangle: 0, 1, 0, 1
Text left: "no", "Number / bin"
Draw distribution: 0, 0, 0, 0, 'cmin', 'cmax', 'nums', 0, 0, "no"
Marks right: 6, "yes", "no", "yes"
Marks bottom every: 1, 'step', "yes", "no", "no"
cFilePath$=right$(sound_FilePath$,length(sound_FilePath$)-2)
marker$="\语音库\"
if index(cFilePath$,marker$)>0
cFilePath$=right$(cFilePath$,length(cFilePath$)-index(cFilePath$,marker$)-length(marker$)+1)
endif
cFilePath$=left$(cFilePath$,length(cFilePath$)-1)
cFilePath$=replace$(cFilePath$,"\","|",0)
if speaker_Code$!=""
cFilePath$=left$(cFilePath$,rindex(cFilePath$,"|"))+speaker_Code$
endif
percent=round(newCount/allCount*1000)/10
Text top: "no", "音高数据分布图:'cFilePath$'"
Text bottom: "yes", "[横坐标单位:'danwei$',Smooth:'smooth_Pitch'Hz,Stylized:'stylized'St,剩余音高点数占比:'percent',r='rel']"
Axes: 0, 1, 0, 1
Text special: 0.5, "Centre", -0.22, "Half", "Times", 10, "0", "[取值范围:'pitch_Min'Hz-'pitch_Max'Hz,中值:'ospkMid:1'Hz,总音高点数:'oriCount'个,有效音高点数:'allCount'个]"
Axes: 0, 1, 0, 1
Select outer viewport: 0, 0.5, 0, 0.5
Paint rectangle: "white", 0, 1, 0, 1
png$=spkData$-".ini"+".png"
Axes: 0, 1, 0, 1
Select outer viewport: 0, 6.3, 0, 3.2
Save as 300-dpi PNG file: "'png$'"
selectObject: "Table 'tName$'"
plusObject: "Matrix 'tName$'"
Remove
info$=newline$
info$=info$+"输入控制参数信息如下:'newline$'"
info$=info$+"1.声音文件路径:'sound_FilePath$''newline$'"
info$=info$+"2.用户设定的发音人代码:'speaker_Code$''newline$'"
info$=info$+"3.用户设定的有效音高上限:'pitch_Max:0'Hz'newline$'"
info$=info$+"4.用户设定的有效音高下限:'pitch_Min:0'Hz'newline$'"
info$=info$+"5.是否转换为半音标度(1为是,0为否):'semitones''newline$'"
info$=info$+"6.Smooth平滑处理的阈值:'smooth_Pitch'Hz'newline$'"
info$=info$+"7.Stylize简化处理的阈值:'stylized'St'newline$'"
info$=info$+"'newline$'音高数据分析结果如下:'newline$'"
info$=info$+"1.声音文件个数:'fileNum:0'个'newline$'"
info$=info$+"2.声音总时长:'allDuration:3'秒'newline$'"
percent=round(allCount*0.01/allDuration*1000)/10
info$=info$+"3.带音段时长占比:'percent:1'%'newline$'"
info$=info$+"4.全部音高数据点个数(oriCount):'oriCount:0'个'newline$'"
percent=round(allCount/oriCount*1000)/10
info$=info$+"5.['pitch_Min'Hz-'pitch_Max'Hz]内的有效音高数据点个数(allCount):'allCount:0'个,占oriCount的'percent:1'%'newline$'"
percent=round(newCount/allCount*1000)/10
info$=info$+"6.Stylize后的音高数据点个数:'newCount:0'个,占allCount的'percent:1'%'newline$'"
info$=info$+"7.styli
ze后的音高数据和有效音高数据的相关系数:'rel''newline$'"
info$=info$+"8.stylize后的发音人音高数据均值:'spkMean:1''danwei$''newline$'"
info$=info$+"9.stylize后的发音人音高数据标准差:'spkStdv:1''danwei$''newline$'"
info$=info$+"10.stylize后的发音人音高数据上限(97.5%):'spkMax:1''danwei$''newline$'"
info$=info$+"11.stylize后的发音人音高数据中值(50.0%):'ospkMid:1'Hz'newline$'"
info$=info$+"12.stylize后的发音人音高数据下限(2.5%):'spkMin:1''danwei$''newline$'"
info$=info$+"13.stylize后的发音人音高数据最大值:'pmax:1''danwei$''newline$'"
info$=info$+"14.stylize后的发音人音高数据最小值:'pmin:1''danwei$''newline$'"
chafen=(chafen/chafenNum)
info$=info$+"15.stylize后的发音人音高数据相邻两点的平均音高变化:'chafen:1''danwei$''newline$'"
info$=info$+"'newline$'图片文件:'png$''newline$'"
info$=info$+"数据文件:'spkData$''newline$'"
appendInfo: info$
fileappend "'spkData$'" 'info$'
endif
endproc