User-agent: *
Disallow: /snapshots/
Disallow: /cgi-bin/
Disallow: /cgi2-bin/
在 $HOME/.wgetrc 中加入
robots = off
一切OK,镜像sina?
wget -c -m -p -q -b http://www.sina.com
-c 是继续传送, -m 是镜像, -p 是下载媒体文件,比如图片, -q 是安静模式, -b 是后台运行
User-agent: *
Disallow: /snapshots/
Disallow: /cgi-bin/
Disallow: /cgi2-bin/
robots = off
wget -c -m -p -q -b http://www.sina.com
import re
import urllib
import os
import time
from urllib import urlretrieve
def spider_url(url):
r = re.compile('')
page=urllib.urlopen(url).read()
b=r.findall(page)
for x in b:
try:
type, rest = urllib.splittype(x)
host, path = urllib.splithost(rest)
#if not path or path[-1] == "/":
#path = path + "index.html"
#if os.sep != "/":
# path = os.sep.join(path.split("/"))
#if os.name == "mac":
# path = os.sep + path
path = os.path.join(host, path)
dir, base = os.path.split(path)
#i=str(time.time())+".htm"
makedirs("C:/temp/"+dir)
#i="1.htm"
#urlretrieve(x,"C:/temp/"+i)
f = open("C:/temp/"+path, "wb")
page1=urllib.urlopen(x).read()
f.write(page1)
f.close()
#self.message("saved %s", path)
print "saved %s" % x
except:pass
#return b
def makedirs(dir):
if not dir:
return
if os.path.exists(dir):
if not os.path.isdir(dir):
try:
os.rename(dir, dir + ".bak")
os.mkdir(dir)
os.rename(dir + ".bak", os.path.join(dir, "index.html"))
except os.error:
pass
return
head, tail = os.path.split(dir)
if not tail:
print "Huh? Don't know how to make dir", dir
return
makedirs(head)
os.mkdir(dir, 0777)
# -*- encoding:UTF-8 -*-
'''
This is geturl.
Wirtten by yuzebin : yuzebin@gmail.com
Important:this script is running in cygwin or linux,if you run at windows
you need the curl and wget for windows .
'''
class='''
CGetPage is charge of to get a url , it have three methods to get a page : urllib,curl and wget;
CParsePage is charge of to parse the page , and return the match;
CGetMatch is the forcad class to wrap the CGetPage and CParsePage.
'''
history='''
2006.07.10 version 0.0.0.9 :
Publish this code to internet , ;-)
2006.06.27 version 0.0.0.7 :
refrectoring class CParsePage : return re.match only
refrectoring class CGetCount : rename to CGetMatch
2006.06.26 version 0.0.0.3 :
modify class CParsePage , return re.match
2006.06.22 version 0.0.0.2 :
add class CGetCount
this version is the first workable version.
add cnsky.
2006.06.21 initial version 0.0.0.1 :
add class CGetPage and CParsePage
cannot work ;-)
'''
import string,re,os,fnmatch,sys,copy,gzip,time,datetime,urllib
from types import *
isDebugMode = False
funcUrlRead = lambda url: urllib.urlopen(url).read()
def funcOutputMessage(msg):
print str(msg)
def funcDebugInfo(msg):
if(isDebugMode==True):
print str(msg)
class CGetPage:
def __init__(self,url):
if self.urlCheck(url)==True:
self.url=url
else:
return None
def urlCheck(self,url):
#todo , check the url is valid url.
return True
def getPage(self):
self.page = funcUrlRead(self.url)
def curlPage(self):
#call curl to get a page,this requir curl is installed.
self.page = os.popen("curl -A "" -s "" + self.url + """).read()
def setPath(self,path):
self.path = path
def wgetPage(self):
#call wget to download a url to path,this requir wget is installed.
os.chdir(self.path)
os.system('wget -c ' + self.url)
class CParsePage:
def __init__(self,rule,page):
if (self.ruleCompile(rule)!=False):
self.page = page
else:
return None
def ruleCompile(self,rule):
#compile the rule
try:
self.rule = re.compile(rule)
except:
return False
def parsePage(self):
self.match = re.search(self.rule,unicode(self.page,self.getCharset(self.page)))
funcDebugInfo(type(self.match))
def getCharset(self,string):
import chardet
#todo : automatic discern the charset
charset = chardet.detect(string)
return charset['encoding']
class CGetMatch:
def __init__(self,url,rule):
self.url = url
self.rule = rule
self.cgetpage = CGetPage(self.url)
self.cgetpage.getPage()
self.page = self.cgetpage.page
self.cparsepage = CParsePage(self.rule,self.cgetpage.page)
def getMatch(self,url,rule):
self.url = url
self.rule = rule
self.cgetpage.url = url
self.cparsepage.rule = rule
self.cgetpage.getPage()
self.page = self.cgetpage.page
self.cparsepage.page = self.cgetpage.page
self.cparsepage.parsePage()
self.match = self.cparsepage.match
if __name__ == '__main__':
funcOutputMessage('===This is a get url script===')
runTest()
def runTest():
#initialization
ccount = CGetMatch('http://www.sina.com.cn','')
i=0
#1
try:
sitename = 'huajun'
rule = 'hit[587]='47588,([0-9]+)'
url = 'http://www.onlinedown.net/soft/hitjs/hits47.js'
i += 1
ccount.getMatch(url,rule)
funcOutputMessage(str(i).rjust(2) + '.' + sitename.ljust(12) +':' + str(ccount.match.group(1)))
except:
pass
#2
try:
sitename = 'skycn'
rule = u'下载次数: ([0-9]+)'
url = 'http://www.skycn.com/soft/23265.html'
i += 1
ccount.getMatch(url,rule)
funcOutputMessage(str(i).rjust(2) + '.' + sitename.ljust(12) +':' + str(ccount.match.group(1)))
except:
pass
#apt-get install prolog-el
C-x C-f /path/to/python-mode.el RET
M-x byte-compile-file RET
M-x locate-library RET python-mode RET
(setq load-path (cons "/dir/of/python-mode/" load-path))
(setq auto-mode-alist
(cons '("\\.py$" . python-mode) auto-mode-alist))
(setq intercodeter-mode-alis
(cons '("python" . python-mode)
intercodeter-mode-alist))
(autoload 'python-mode "python-mode" "Python editing mode." t)
;;; add these lines if you like color-based syntax highlighting
(global-font-lock-mode t)
(setq font-lock-maximum-decoration t)
(set-language-environment 'Chinese-GB)
(set-keyboard-coding-system 'euc-cn)
(set-clipboard-coding-system 'euc-cn)
(set-terminal-coding-system 'euc-cn)
(set-buffer-file-coding-system 'euc-cn)
(set-selection-coding-system 'euc-cn)
(modify-coding-system-alist 'process "*" 'euc-cn)
(setq default-process-coding-system
'(euc-cn . euc-cn))
(setq-default pathname-coding-system 'euc-cn)
$ emacs abc.py
import urllib2
from sgmllib import SGMLParser
class URLLister(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.urls = []
def start_a(self, attrs):
href = [v for k, v in attrs if k=='href']
if href:
self.urls.extend(href)
f = urllib2.urlopen("http://www.donews.com")
if f.code == 200:
parser = URLLister()
parser.feed(f.read())
f.close()
for url in parser.urls: print url
import win32com.client, pythoncom
import time
ie = win32com.client.DispatchEx('InternetExplorer.Application.1')
ie.Visible = 1
ie.Navigate("http://news.sina.com.cn")
while ie.Busy:
time.sleep(0.05)
doc = ie.Document
for i in doc.images:
print i.src, i.width, i.height
edButtons[edButtons.length] =
new edButton('ed_next'
,'page'
,'<!--nextpage-->'
,''
,'p'
,-1
);
function TinyMCE_wordcodess_getControlHTML(control_name) {
switch (control_name) {
case "wordcodess":
var titleMore = tinyMCE.getLang('lang_wordcodess_more_button');
var titlePage = tinyMCE.getLang('lang_wordcodess_page_button');
var titleHelp = tinyMCE.getLang('lang_wordcodess_help_button');
var buttons = '';
return buttons;
}
return '';
}
Array ( [0] => Array ( [HelpClassName] => 常见问题 [HelpClassID] => 1
[Help] => Array ( [0] => Array ( [HelpSubjectName] => PERL! [HelpSubjectID] => 1 ) [1] => Array ( [HelpSubjectName] => PYTHON[HelpSubjectID] => 4 )[2] => Array ( [HelpSubjectName] => RUBY? [HelpSubjectID] => 14 ) ) )
[1] => Array ( [HelpClassName] => 关于概念 [HelpClassID] => 3 [Help] => Array ( [0] => Array ( [HelpSubjectName] => 到底什么是PHP [HelpSubjectID] => 6 ) [1] => Array ( [HelpSubjectName] => php与PYTHON的区别是什么? [HelpSubjectID] => 7 ) ) )
[2] => Array ( [HelpClassName] => 新人必读 [HelpClassID] => 4 [Help] => Array ( [0] => Array ( [HelpSubjectName] => 怎样在PHP? [HelpSubjectID] => 13 ) [1] => Array ( [HelpSubjectName] => PHP使用说明(一) [HelpSubjectID] => 15 ) [2] => Array ( [HelpSubjectName] => PYTHON使用说明(二) [HelpSubjectID] => 16 ) [3] => Array ( [HelpSubjectName] => 如何PHP? [HelpSubjectID] => 17 ) [4] => Array ( [HelpSubjectName] => RUBY(三) [HelpSubjectID] => 18 ) ) ) );
?>
$wget http://skype.com/go/getskype-linux-deb
$sudo dpkg -i skype_debian-1.3.0.53-1_i386.deb
$ export LANG=c
$ QT_IM_MODULE=xim skype
function setCookie(c_name,value,expiredays)
{
var exdate=new Date()
exdate.setDate(exdate.getDate()+expiredays)
document.cookie=c_name+ "=" +escape(value)+
((expiredays==null) ? "" : ";expires="+exdate.toGMTString())
}
function getCookie(c_name)
{
if (document.cookie.length>0)
{
c_start=document.cookie.indexOf(c_name + "=")
if (c_start!=-1)
{
c_start=c_start + c_name.length+1
c_end=document.cookie.indexOf(";",c_start)
if (c_end==-1) c_end=document.cookie.length
return unescape(document.cookie.substring(c_start,c_end))
}
}
return ""
}
function checkCookie()
{
username=getCookie('username')
if (username!=null && username!="")
{alert('Welcome again '+username+'!')}
else
{
username=prompt('Please enter your name:',"")
if (username!=null && username!="")
{
setCookie('username',username,365)
}
}
}
header("Cache-control: private");
session_cache_limiter('nocache'); // 清空表单
session_cache_limiter('private'); //不清空表单,只在session生效期间
session_cache_limiter('public'); //不清空表单,如同没使用session一般
//获得Cookie解码后的值
function GetCookieVal(offset) {
var endstr = document.cookie.indexOf (";", offset);
if (endstr == -1)
endstr = document.cookie.length;
return unescape(document.cookie.substring(offset, endstr));
}
//设定Cookie值
function SetCookie(name, value) {
var expdate = new Date();
var argv = SetCookie.arguments;
var argc = SetCookie.arguments.length;
var expires = (argc > 2) ? argv[2] : null;
var path = (argc > 3) ? argv[3] : null;
var domain = (argc > 4) ? argv[4] : null;
var secure = (argc > 5) ? argv[5] : false;
if(expires!=null) expdate.setTime(expdate.getTime() + ( expires * 1000 ));
document.cookie = name + "=" + escape (value) +((expires == null) ? "" : ("; expires="+ expdate.toGMTString()))
+((path == null) ? "" : ("; path=" + path)) +((domain == null) ? "" : ("; domain=" + domain))
+((secure == true) ? "; secure" : "");
}
//删除Cookie
function DelCookie(name) {
var exp = new Date();
exp.setTime (exp.getTime() - 1);
var cval = GetCookie (name);
document.cookie = name + "=" + cval + "; expires="+ exp.toGMTString();
}
//获得Cookie的原始值
function GetCookie(name) {
var arg = name + "=";
var alen = arg.length;
var clen = document.cookie.length;
var i = 0;
while (i < clen)
{
var j = i + alen;
if (document.cookie.substring(i, j) == arg)
return GetCookieVal (j);
i = document.cookie.indexOf(" ", i) + 1;
if (i == 0) break;
}
return null;
}
gem install rails --include-dependencies
$ perl -MCPAN -e shell
[....]
Can't exec "mysql_config": No such file or directory at Makefile.PL line 76.
Cannot find the file 'mysql_config'! Your execution PATH doesn't seem
not contain the path to mysql_config. Resorting to guessed values!
Can't exec "mysql_config": No such file or directory at Makefile.PL line 454.
Can't exec "mysql_config": No such file or directory at Makefile.PL line 454.
Can't exec "mysql_config": No such file or directory at Makefile.PL line 454.
Can't exec "mysql_config": No such file or directory at Makefile.PL line 454.
Can't exec "mysql_config": No such file or directory at Makefile.PL line 454.
Can't exec "mysql_config": No such file or directory at Makefile.PL line 454.
Failed to determine directory of mysql.h. Use
[....]
$ cd $HOME/.cpan/build/DBD-mysql-3.0008
$ perl --mysql_config=$MYSQL_PREFIX_DIR/bin/mysql_config
$ make
$ make test
$ make install
$./configure.gnu --codefix=$HOME/local
make
make[1]: Leaving directory `/home/.salk/ndw/src/perl-5.8.6/x2p'
Making B (dynamic)
opendir(./../../../../..): Permission denied at ../../lib/File/Find.pm line 597
Use of chdir('') or chdir(undef) as chdir() is decodecated at ../../lib/File/Find.pm line 741.
opendir(./..): Permission denied at ../../lib/ExtUtils/MakeMaker.pm line 165
Couldn't change to directory /home/.salk/ndw/C: No such file or directory at Makefile.PL line 18
Writing Makefile for B
Warning: No Makefile!
make[1]: Entering directory `/home/.salk/ndw/src/perl-5.8.6/ext/B'
make[1]: *** No rule to make target `config'. Stop.
make[1]: Leaving directory `/home/.salk/ndw/src/perl-5.8.6/ext/B'
make config failed, continuing anyway...
make[1]: Entering directory `/home/.salk/ndw/src/perl-5.8.6/ext/B'
make[1]: *** No rule to make target `all'. Stop.
make[1]: Leaving directory `/home/.salk/ndw/src/perl-5.8.6/ext/B'
make: *** [lib/auto/B/B.so] Error 2
$make minitest
/tmp$ ./configure.gnu --codefix=$HOME/local && make && make test && make install
codeg_replace("~(?:\r)?\n~s", "", $str);
Application error (Rails)
ruby script/server
@ECHO OFF
ECHO.如果你msn中修复不了就运行吧.
ECHO *************************************
ECHO * This file is provided by msn-problems.com *
ECHO * Copyright (C) 2004-2005 *
ECHO *************************************
ECHO.
ECHO 等一下.
REM Following files only needed in windows XP
IF NOT "%os%"=="Windows_NT" GOTO WIN9X
regsvr32 Dssenh.dll /s
regsvr32 Gpkcsp.dll /s
regsvr32 Slbcsp.dll /s
regsvr32 Sccbase.dll /s
:WIN9X
IF NOT "%os%"=="Windows_NT" CD %windir%\System\
REM Needed by both XP and 9X
regsvr32 Softpub.dll /s
regsvr32 Wintrust.dll /s
regsvr32 Initpki.dll /s
regsvr32 Rsaenh.dll /s
regsvr32 Mssip32.dll /s
regsvr32 Cryptdlg.dll /s
regsvr32 Msxml3.dll /s
ECHO.
ECHO 搞定,还不可以到这网站的论坛询问 www.msn-problems.com
ECHO.
pause
curl -O ftp://ftp.ruby-lang.org/pub/ruby/1.8/ruby-1.8.4.tar.gz
tar xvfz ruby-1.8.4.tar.gz
cd ruby-1.8.4
./configure --codefix=/usr/local
make
make install
cd ..
ruby --version
ruby 1.8.4 (2005-12-24) [i686-linux]
curl -O http://rubyforge.org/frs/download.php/5207/rubygems-0.8.11.tgz
tar xvfz rubygems-0.8.11.tgz
cd rubygems-0.8.11
ruby setup.rb
cd ..
curl -O http://www.fastcgi.com/dist/fcgi-2.4.0.tar.gz
tar xvfz fcgi-2.4.0.tar.gz
cd fcgi-2.4.0
./configure --codefix=/usr/local
make
make install
cd ..