User-agent: *
Disallow: /snapshots/
Disallow: /cgi-bin/
Disallow: /cgi2-bin/在 $HOME/.wgetrc 中加入
robots = off一切OK,镜像sina?
wget -c -m -p -q -b http://www.sina.com-c 是继续传送, -m 是镜像, -p 是下载媒体文件,比如图片, -q 是安静模式, -b 是后台运行
User-agent: *
Disallow: /snapshots/
Disallow: /cgi-bin/
Disallow: /cgi2-bin/robots = offwget -c -m -p -q -b http://www.sina.com
import re
import urllib
import os
import time
from urllib import urlretrieve
def spider_url(url):
r = re.compile('')
page=urllib.urlopen(url).read()
b=r.findall(page)
for x in b:
try:
type, rest = urllib.splittype(x)
host, path = urllib.splithost(rest)
#if not path or path[-1] == "/":
#path = path + "index.html"
#if os.sep != "/":
# path = os.sep.join(path.split("/"))
#if os.name == "mac":
# path = os.sep + path
path = os.path.join(host, path)
dir, base = os.path.split(path)
#i=str(time.time())+".htm"
makedirs("C:/temp/"+dir)
#i="1.htm"
#urlretrieve(x,"C:/temp/"+i)
f = open("C:/temp/"+path, "wb")
page1=urllib.urlopen(x).read()
f.write(page1)
f.close()
#self.message("saved %s", path)
print "saved %s" % x
except:pass
#return b
def makedirs(dir):
if not dir:
return
if os.path.exists(dir):
if not os.path.isdir(dir):
try:
os.rename(dir, dir + ".bak")
os.mkdir(dir)
os.rename(dir + ".bak", os.path.join(dir, "index.html"))
except os.error:
pass
return
head, tail = os.path.split(dir)
if not tail:
print "Huh? Don't know how to make dir", dir
return
makedirs(head)
os.mkdir(dir, 0777)
# -*- encoding:UTF-8 -*-
'''
This is geturl.
Wirtten by yuzebin : yuzebin@gmail.com
Important:this script is running in cygwin or linux,if you run at windows
you need the curl and wget for windows .
'''
class='''
CGetPage is charge of to get a url , it have three methods to get a page : urllib,curl and wget;
CParsePage is charge of to parse the page , and return the match;
CGetMatch is the forcad class to wrap the CGetPage and CParsePage.
'''
history='''
2006.07.10 version 0.0.0.9 :
Publish this code to internet , ;-)
2006.06.27 version 0.0.0.7 :
refrectoring class CParsePage : return re.match only
refrectoring class CGetCount : rename to CGetMatch
2006.06.26 version 0.0.0.3 :
modify class CParsePage , return re.match
2006.06.22 version 0.0.0.2 :
add class CGetCount
this version is the first workable version.
add cnsky.
2006.06.21 initial version 0.0.0.1 :
add class CGetPage and CParsePage
cannot work ;-)
'''
import string,re,os,fnmatch,sys,copy,gzip,time,datetime,urllib
from types import *
isDebugMode = False
funcUrlRead = lambda url: urllib.urlopen(url).read()
def funcOutputMessage(msg):
print str(msg)
def funcDebugInfo(msg):
if(isDebugMode==True):
print str(msg)
class CGetPage:
def __init__(self,url):
if self.urlCheck(url)==True:
self.url=url
else:
return None
def urlCheck(self,url):
#todo , check the url is valid url.
return True
def getPage(self):
self.page = funcUrlRead(self.url)
def curlPage(self):
#call curl to get a page,this requir curl is installed.
self.page = os.popen("curl -A "" -s "" + self.url + """).read()
def setPath(self,path):
self.path = path
def wgetPage(self):
#call wget to download a url to path,this requir wget is installed.
os.chdir(self.path)
os.system('wget -c ' + self.url)
class CParsePage:
def __init__(self,rule,page):
if (self.ruleCompile(rule)!=False):
self.page = page
else:
return None
def ruleCompile(self,rule):
#compile the rule
try:
self.rule = re.compile(rule)
except:
return False
def parsePage(self):
self.match = re.search(self.rule,unicode(self.page,self.getCharset(self.page)))
funcDebugInfo(type(self.match))
def getCharset(self,string):
import chardet
#todo : automatic discern the charset
charset = chardet.detect(string)
return charset['encoding']
class CGetMatch:
def __init__(self,url,rule):
self.url = url
self.rule = rule
self.cgetpage = CGetPage(self.url)
self.cgetpage.getPage()
self.page = self.cgetpage.page
self.cparsepage = CParsePage(self.rule,self.cgetpage.page)
def getMatch(self,url,rule):
self.url = url
self.rule = rule
self.cgetpage.url = url
self.cparsepage.rule = rule
self.cgetpage.getPage()
self.page = self.cgetpage.page
self.cparsepage.page = self.cgetpage.page
self.cparsepage.parsePage()
self.match = self.cparsepage.match
if __name__ == '__main__':
funcOutputMessage('===This is a get url script===')
runTest()
def runTest():
#initialization
ccount = CGetMatch('http://www.sina.com.cn','')
i=0
#1
try:
sitename = 'huajun'
rule = 'hit[587]='47588,([0-9]+)'
url = 'http://www.onlinedown.net/soft/hitjs/hits47.js'
i += 1
ccount.getMatch(url,rule)
funcOutputMessage(str(i).rjust(2) + '.' + sitename.ljust(12) +':' + str(ccount.match.group(1)))
except:
pass
#2
try:
sitename = 'skycn'
rule = u'下载次数: ([0-9]+)'
url = 'http://www.skycn.com/soft/23265.html'
i += 1
ccount.getMatch(url,rule)
funcOutputMessage(str(i).rjust(2) + '.' + sitename.ljust(12) +':' + str(ccount.match.group(1)))
except:
pass
#apt-get install prolog-el C-x C-f /path/to/python-mode.el RET
M-x byte-compile-file RETM-x locate-library RET python-mode RET(setq load-path (cons "/dir/of/python-mode/" load-path))(setq auto-mode-alist
(cons '("\\.py$" . python-mode) auto-mode-alist))
(setq intercodeter-mode-alis
(cons '("python" . python-mode)
intercodeter-mode-alist))(autoload 'python-mode "python-mode" "Python editing mode." t);;; add these lines if you like color-based syntax highlighting
(global-font-lock-mode t)
(setq font-lock-maximum-decoration t)(set-language-environment 'Chinese-GB)
(set-keyboard-coding-system 'euc-cn)
(set-clipboard-coding-system 'euc-cn)
(set-terminal-coding-system 'euc-cn)
(set-buffer-file-coding-system 'euc-cn)
(set-selection-coding-system 'euc-cn)
(modify-coding-system-alist 'process "*" 'euc-cn)
(setq default-process-coding-system
'(euc-cn . euc-cn))
(setq-default pathname-coding-system 'euc-cn)$ emacs abc.py
import urllib2
from sgmllib import SGMLParser
class URLLister(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.urls = []
def start_a(self, attrs):
href = [v for k, v in attrs if k=='href']
if href:
self.urls.extend(href)
f = urllib2.urlopen("http://www.donews.com")
if f.code == 200:
parser = URLLister()
parser.feed(f.read())
f.close()
for url in parser.urls: print url
import win32com.client, pythoncom
import time
ie = win32com.client.DispatchEx('InternetExplorer.Application.1')
ie.Visible = 1
ie.Navigate("http://news.sina.com.cn")
while ie.Busy:
time.sleep(0.05)
doc = ie.Document
for i in doc.images:
print i.src, i.width, i.height
edButtons[edButtons.length] =
new edButton('ed_next'
,'page'
,'<!--nextpage-->'
,''
,'p'
,-1
);
function TinyMCE_wordcodess_getControlHTML(control_name) {
switch (control_name) {
case "wordcodess":
var titleMore = tinyMCE.getLang('lang_wordcodess_more_button');
var titlePage = tinyMCE.getLang('lang_wordcodess_page_button');
var titleHelp = tinyMCE.getLang('lang_wordcodess_help_button');
var buttons = '
';
return buttons;
}
return '';
}








Google发布了其2006度的各种热门搜索,除了一般的年度热门关键字索和热门新闻搜索,这次还包括了 "热门”,“实事”,“重要事件”,“娱乐”,“体育”这几类的搜索排名。其中在“实事”中的三个TOP关键字就是“伊拉克”、“北韩”和“伊朗”。在娱乐方面,看来都是美国人比较喜欢他们"Live Show",TOP的三字关键词是"Dancing With the Stars vs. American Idol vs.Project Runway",虽然这些只是英文世界的TREND,不过有的东西还是可以借鉴一下的,BAIDU的2006度搜索报告呢?不会在那上面出现广告吧? ;)
Array ( [0] => Array ( [HelpClassName] => 常见问题 [HelpClassID] => 1
[Help] => Array ( [0] => Array ( [HelpSubjectName] => PERL! [HelpSubjectID] => 1 ) [1] => Array ( [HelpSubjectName] => PYTHON[HelpSubjectID] => 4 )[2] => Array ( [HelpSubjectName] => RUBY? [HelpSubjectID] => 14 ) ) )
[1] => Array ( [HelpClassName] => 关于概念 [HelpClassID] => 3 [Help] => Array ( [0] => Array ( [HelpSubjectName] => 到底什么是PHP [HelpSubjectID] => 6 ) [1] => Array ( [HelpSubjectName] => php与PYTHON的区别是什么? [HelpSubjectID] => 7 ) ) )
[2] => Array ( [HelpClassName] => 新人必读 [HelpClassID] => 4 [Help] => Array ( [0] => Array ( [HelpSubjectName] => 怎样在PHP? [HelpSubjectID] => 13 ) [1] => Array ( [HelpSubjectName] => PHP使用说明(一) [HelpSubjectID] => 15 ) [2] => Array ( [HelpSubjectName] => PYTHON使用说明(二) [HelpSubjectID] => 16 ) [3] => Array ( [HelpSubjectName] => 如何PHP? [HelpSubjectID] => 17 ) [4] => Array ( [HelpSubjectName] => RUBY(三) [HelpSubjectID] => 18 ) ) ) );
?>




安装Skype For Debian的最新版本(我安装的是1.3.0.53)$wget http://skype.com/go/getskype-linux-deb
$sudo dpkg -i skype_debian-1.3.0.53-1_i386.deb
$ export LANG=c
$ QT_IM_MODULE=xim skype
function setCookie(c_name,value,expiredays)
{
var exdate=new Date()
exdate.setDate(exdate.getDate()+expiredays)
document.cookie=c_name+ "=" +escape(value)+
((expiredays==null) ? "" : ";expires="+exdate.toGMTString())
}
function getCookie(c_name)
{
if (document.cookie.length>0)
{
c_start=document.cookie.indexOf(c_name + "=")
if (c_start!=-1)
{
c_start=c_start + c_name.length+1
c_end=document.cookie.indexOf(";",c_start)
if (c_end==-1) c_end=document.cookie.length
return unescape(document.cookie.substring(c_start,c_end))
}
}
return ""
}
function checkCookie()
{
username=getCookie('username')
if (username!=null && username!="")
{alert('Welcome again '+username+'!')}
else
{
username=prompt('Please enter your name:',"")
if (username!=null && username!="")
{
setCookie('username',username,365)
}
}
}
header("Cache-control: private");
session_cache_limiter('nocache'); // 清空表单
session_cache_limiter('private'); //不清空表单,只在session生效期间
session_cache_limiter('public'); //不清空表单,如同没使用session一般
//获得Cookie解码后的值
function GetCookieVal(offset) {
var endstr = document.cookie.indexOf (";", offset);
if (endstr == -1)
endstr = document.cookie.length;
return unescape(document.cookie.substring(offset, endstr));
}
//设定Cookie值
function SetCookie(name, value) {
var expdate = new Date();
var argv = SetCookie.arguments;
var argc = SetCookie.arguments.length;
var expires = (argc > 2) ? argv[2] : null;
var path = (argc > 3) ? argv[3] : null;
var domain = (argc > 4) ? argv[4] : null;
var secure = (argc > 5) ? argv[5] : false;
if(expires!=null) expdate.setTime(expdate.getTime() + ( expires * 1000 ));
document.cookie = name + "=" + escape (value) +((expires == null) ? "" : ("; expires="+ expdate.toGMTString()))
+((path == null) ? "" : ("; path=" + path)) +((domain == null) ? "" : ("; domain=" + domain))
+((secure == true) ? "; secure" : "");
}
//删除Cookie
function DelCookie(name) {
var exp = new Date();
exp.setTime (exp.getTime() - 1);
var cval = GetCookie (name);
document.cookie = name + "=" + cval + "; expires="+ exp.toGMTString();
}
//获得Cookie的原始值
function GetCookie(name) {
var arg = name + "=";
var alen = arg.length;
var clen = document.cookie.length;
var i = 0;
while (i < clen)
{
var j = i + alen;
if (document.cookie.substring(i, j) == arg)
return GetCookieVal (j);
i = document.cookie.indexOf(" ", i) + 1;
if (i == 0) break;
}
return null;
}
使用Windows上安装Ruby on Rails的时候,在使用gem安装Rails的时候遇到这样的问题gem install rails --include-dependencies
$ perl -MCPAN -e shell
[....]
Can't exec "mysql_config": No such file or directory at Makefile.PL line 76.
Cannot find the file 'mysql_config'! Your execution PATH doesn't seem
not contain the path to mysql_config. Resorting to guessed values!
Can't exec "mysql_config": No such file or directory at Makefile.PL line 454.
Can't exec "mysql_config": No such file or directory at Makefile.PL line 454.
Can't exec "mysql_config": No such file or directory at Makefile.PL line 454.
Can't exec "mysql_config": No such file or directory at Makefile.PL line 454.
Can't exec "mysql_config": No such file or directory at Makefile.PL line 454.
Can't exec "mysql_config": No such file or directory at Makefile.PL line 454.
Failed to determine directory of mysql.h. Use
[....]
$ cd $HOME/.cpan/build/DBD-mysql-3.0008
$ perl --mysql_config=$MYSQL_PREFIX_DIR/bin/mysql_config
$ make
$ make test
$ make install
在测试服务器上安装perl5.8.8
$./configure.gnu --codefix=$HOME/local
make
make[1]: Leaving directory `/home/.salk/ndw/src/perl-5.8.6/x2p'
Making B (dynamic)
opendir(./../../../../..): Permission denied at ../../lib/File/Find.pm line 597
Use of chdir('') or chdir(undef) as chdir() is decodecated at ../../lib/File/Find.pm line 741.
opendir(./..): Permission denied at ../../lib/ExtUtils/MakeMaker.pm line 165
Couldn't change to directory /home/.salk/ndw/C: No such file or directory at Makefile.PL line 18
Writing Makefile for B
Warning: No Makefile!
make[1]: Entering directory `/home/.salk/ndw/src/perl-5.8.6/ext/B'
make[1]: *** No rule to make target `config'. Stop.
make[1]: Leaving directory `/home/.salk/ndw/src/perl-5.8.6/ext/B'
make config failed, continuing anyway...
make[1]: Entering directory `/home/.salk/ndw/src/perl-5.8.6/ext/B'
make[1]: *** No rule to make target `all'. Stop.
make[1]: Leaving directory `/home/.salk/ndw/src/perl-5.8.6/ext/B'
make: *** [lib/auto/B/B.so] Error 2
$make minitest/tmp$ ./configure.gnu --codefix=$HOME/local && make && make test && make install
今天遇到一个PHP下文本的换行替换问题,好无聊,在Windows或者Linux上有区别?不用管,用这个就好了codeg_replace("~(?:\r)?\n~s", "", $str);
QQ2006 Mobile是腾讯最近发布的QQ手机版本,与以前的不同,这个版本几乎拥有的PC版本QQ的所有功能,不过前提是你的手机型号是有要求的,一般来说智能手机是可以用上的,我的N3230当然可以了,现在就在用,在昨天下载的时候却遇到问题,怎么都下不到N3230版本的,后来在ZJ的帮助下,我得到了这个N3230的QQ2006test MOBILE,不需要支持其他费用,只需要支持手机上网的WAP或者GPRS费用就可以了。主要功能:
最近KAKA在DH上搭建了一个Django的测试环境,感觉Python还是很强大,不过似乎做有些小而快速的WEB应用就不那么实在。今天在 Dream Host 上测试 Ruby on Rails,非常成功,安装非常简单,只要在DH的控制面板中把你Domain的FastCGI支持打开就可以了,我使用的是子目录的形式建立Ruby on Rails应用的。Application error (Rails)ruby script/server
周杰伦的新歌《黄金甲》最近发布了,可以点这里观看MV,再按这里收听电台的首播。
Let your Dreamhost hosting as a Youtube? You can upload AVI/MOV/MPEG/MPG media files and Dreamhost can convert it to flv format for you! Like the Youtube, but, you can give faster speed to visitors, and upload media files as much as your Dreamhost space limited! I have 200G~(+1.0 GB / week). BTW, you can get more information about the media tool on Dreamhost, now!

@ECHO OFF
ECHO.如果你msn中修复不了就运行吧.
ECHO *************************************
ECHO * This file is provided by msn-problems.com *
ECHO * Copyright (C) 2004-2005 *
ECHO *************************************
ECHO.
ECHO 等一下.
REM Following files only needed in windows XP
IF NOT "%os%"=="Windows_NT" GOTO WIN9X
regsvr32 Dssenh.dll /s
regsvr32 Gpkcsp.dll /s
regsvr32 Slbcsp.dll /s
regsvr32 Sccbase.dll /s
:WIN9X
IF NOT "%os%"=="Windows_NT" CD %windir%\System\
REM Needed by both XP and 9X
regsvr32 Softpub.dll /s
regsvr32 Wintrust.dll /s
regsvr32 Initpki.dll /s
regsvr32 Rsaenh.dll /s
regsvr32 Mssip32.dll /s
regsvr32 Cryptdlg.dll /s
regsvr32 Msxml3.dll /s
ECHO.
ECHO 搞定,还不可以到这网站的论坛询问 www.msn-problems.com
ECHO.
pause
After a few hours of trial and error using advice from many different sites/posts, this is the process that I found successful in getting Ruby on Rails working with the Apache 2 fcgi module on linux. I hope this helps someone hang onto a few more hair follicles.curl -O ftp://ftp.ruby-lang.org/pub/ruby/1.8/ruby-1.8.4.tar.gz
tar xvfz ruby-1.8.4.tar.gz
cd ruby-1.8.4
./configure --codefix=/usr/local
make
make install
cd ..ruby --versionruby 1.8.4 (2005-12-24) [i686-linux]curl -O http://rubyforge.org/frs/download.php/5207/rubygems-0.8.11.tgz
tar xvfz rubygems-0.8.11.tgz
cd rubygems-0.8.11
ruby setup.rb
cd ..curl -O http://www.fastcgi.com/dist/fcgi-2.4.0.tar.gz
tar xvfz fcgi-2.4.0.tar.gz
cd fcgi-2.4.0
./configure --codefix=/usr/local
make
make install
cd ..