很拙劣,献丑了。
我分成几个部分:
readnews.c (去取新闻列表,写入目录结构中,用户就可以看到
新闻的标题,时间,地址了)
fetchnews.c (把新闻的具体内容填上)
另外若干小工具:
nohtml.c
getbody.c
/*
*
* 自动取新闻
*
*
http://news.sina.com.cn/news1000.shtml *
* readnews.c by ZV 2002.5.15
*
* */
#include "/root/bbssrc/include/bbs.h"
#define LOGFILE "/home/bbs/readnews.log"
#define OLDNEWSFILE "/home/bbs/oldnews.log"
#define NEWSFILE "/home/bbs/news1000.shtml"
#define BOARD "News"
#define DIR BBSHOME##"/boards/"##BOARD##"/.DIR"
#define MAXLOG 10000
int main()
{
int fd, filesize, pos1, pos2, i, j, k;
int isurl;
FILE *fp, *fpout, *fplog, *fpold;
char buf[50000], title[1024], url[1000], fname[100];
char oldnews[MAXLOG][200];
int oldptr = 0, id = 0;
struct stat filestat;
struct fileheader info;
time_t mytime = time(0);
if ((fplog = fopen(LOGFILE, "a")) == NULL) {
perror(LOGFILE);
exit(-1);
}
flock(fileno(fplog), LOCK_EX);
if ((fp = fopen(NEWSFILE, "r")) == NULL) {
perror(NEWSFILE);
exit(-1);
}
if ((fpold = fopen(OLDNEWSFILE, "r+")) == NULL) {
perror(OLDNEWSFILE);
exit(-1);
}
while (!feof(fpold)) {
fscanf(fpold, "%s\n", url);
if (strstr(url, "http") && strstr(url, "html")) {
printf("{%s}\n", url);
strcpy(oldnews[oldptr++], url);
}
}
stat(NEWSFILE, &filestat);
filesize = filestat.st_size;
if (filesize > 50000) {
printf("File too large.\n");
exit(-1);
}
fread(buf, filesize, 1, fp);
fclose(fp);
if ((fd = open(DIR, O_RDWR | O_CREAT, 0644)) == -1) {
perror(DIR);
exit(-1);
}
flock(fd, LOCK_EX);
lseek(fd, 0, SEEK_END);
pos1 = 0;
while ((pos1<filesize) && (buf[pos1] != '[')) pos1++;
pos2 = pos1+1;
for(i=pos2; i<filesize; i++)
if (buf[i]=='\n') break;
buf[i] = 0;
while (buf[pos2++]) {
while ((buf[pos2])&&(buf[pos2] != '[')) pos2++;
isurl = 1; k=0;
for (i=pos1, j=0; i<pos2; i++)
if (buf[i] == '<') {
i++;
if (isurl) { // 分析url
// a href="http://sports.sina.com.
cn/k/2002-05-14/14273010.shtml" TARGET=_blank
while ((i<pos2) && (buf[i] != '"')) i++;
i++;
while ((i<pos2) && (buf[i] != '"')) {
url[k++] = buf[i];
i++;
}
url[k] = 0;
while ((i<pos2) && (buf[i] != '>')) i++;
isurl = 0;
} else {
while ((i<pos2) && (buf[i] != '>')) i++;
}
} else {
title[j] = buf[i];
j++;
}
title[j] =0;
pos1 = pos2;
for (i=0; i<oldptr; i++)
if (strcmp(oldnews[i], url) == 0) {
printf("Old:%s\n", url);
break;
}
if (i<oldptr) continue;
bzero(&info, sizeof(info));
sprintf(info.filename, "M.%d.A", mytime++);
for (i=0; (i<80) && (title[i+7])/* && (title[i+7] != '(')*/;
i++)
if (title[i+7])
info.title[i] = title[i+7];
info.title[i] = 0;
info.title[79] = 0;
if ((strlen(info.title)==0) || (strlen(url)<15))
continue;
sprintf(buf, "0;1;33;41mK发信人: deliver (自动发信系统), 信
区: "BOARD"\n"
"0;37;40m发信站: 一网深情3m自动发信系统\n"
"\n0;1;33;44mK新闻地址: \n"
"0;1;37;40m%s\n"
"\n0;1;38;42m 新闻提要:\n"
"0;1;37;40m%s\n"
"\n0;1;33;44mK新闻内容:\n"
"0;1;37;40m",
url, title);
strcpy(info.owner, "deliver");
sprintf(fname, BBSHOME"/boards/"BOARD"/%s", info.filename);
printf("[%d]%s\n", ++id, fname);
if ((fpout = fopen(fname, "w")) != NULL) {
fprintf(fpout, "%s", buf);
fclose(fpout);
// 写目录信息
write(fd, &info, sizeof(info));
// old news
fprintf(fpold, "%s\n", url);
// log
fprintf(fplog, BOARD" %s %s\n", info.filename, url);
}
}
flock(fd, LOCK_UN);
close(fd);
flock(fileno(fplog), LOCK_UN);
fclose(fplog);
return 0;
}
--
FROM AfterDark