-
Notifications
You must be signed in to change notification settings - Fork 0
メーリングリスト・データファイルの作成手順
hideki kuno edited this page Sep 14, 2023
·
20 revisions
https://osdn.net/projects/gauche/lists/archive/devel-jp/
より提供されるアーカイブファイルから記事単位でファイル分割を行う手順を示す。
mkdir ${WHERE}
cd ${WHERE}
curl -s -o index.html -L 'https://osdn.net/projects/gauche/lists/archive/devel-jp/'
for F in `grep Gzip index.html | sed -e 's/^.*href..//' -e 's/".*$//'`
do
curl -s -O -L 'https://osdn.net/projects/gauche/lists/archive/devel-jp/'${F}
done
TMPFILE=`mktemp`
for F in `ls *gz`
do
gzip -d $F
TXT=`echo $F|sed 's/.gz$//'`
nkf $TXT > $TMPFILE
mv $TMPFILE $TXT
done
count=1
for F in `grep Gzip index.html | sed -e 's/^.*href..//' -e 's/".*$//' -e 's/.gz//' |awk '{buf[NR] = $0}END{for(i=NR;0<i;i--){print buf[i]}}'`
do
cat $F | awk -v cnt=$count '(/^From /){filename=sprintf("%05d",cnt++)}(!/^From /){print $0 >filename}'
sepcnt=`grep -a -e '^From ' $F |wc -l`
count=`expr $count + $sepcnt`
done
rm *.txt *.html
https://lists.ubuntu.com/archives/ubuntu-jp.mbox/ubuntu-jp.mbox から記事単位でファイル分割を行う手順を示す。
mkdir ${WHERE}
cd ${WHERE}
curl -s -O https://lists.ubuntu.com/archives/ubuntu-jp.mbox/ubuntu-jp.mbox -H 'Referer: https://lists.ubuntu.com/mailman/listinfo/ubuntu-jp'
$HOME/el-ml-file/split_mbox.sh ubuntu-jp.mbox
rm ubuntu-jp.mbox
TMPFILE=`mktemp`
for F in `grep -l --exclude=idx1 -E '^Subject.*(]|Re:)$' *`
do
awk '/^Subject: /{buf=$0;flg=1}($0 !~ /^Subject: /){ if(flg==1){printf("%s", buf);flg=0} print }' $F > $TMPFILE
mv $TMPFILE $F
done
for F in `grep -l --exclude=idx1 -E '^Subject.*(]|Re:) *$' *`
do
awk '/^Subject: /{buf=$0;flg=1}($0 !~ /^Subject: /){ if(flg==1){printf("%s", buf);flg=0} print }' $F > $TMPFILE
mv $TMPFILE $F
done
#!/usr/bin/python
import os
month = ("January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",)
for y in list(range(2013,2022)):
for m in month:
f = str(y) + '-' + m + ".txt"
if os.path.isfile(f):
print(f)