历经几次换域名,以及从GAE-wordpress造成不少历史遗留的图片分布在不同域名或gae目录下。
主要分布
http://oldres.fengsage.com
http://www.fengsage.com
http://fensageblog.appspot.com
原来使用代码实现图片代理访问。但自己越看越不爽。索性写个脚本一次性把原来的图片下载到wordpress本地。并且更新数据库。主要是圣诞节闲的蛋疼。
代码有不少问题:
- 不用php,因为懒得看wordpress插件制作。
- 因为是python脚本,所以必须是有执行python的权限
- 没有生成wordpress media记录。不能后台编辑多媒体
wordpress插件实现:
http://wordpress.org/extend/plugins/velvet-blues-update-urls/ 等发现的时候脚本已经写好了。悲哀~~
脚本笔记简单。分享下。
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
#!/usr/bin/env python
# --*-- encoding:utf-8 --*--
'''
Created on 2011-12-25
@author: fred <fredzhu.info>
'''
import MySQLdb
import re
import urllib
import time
import os
WP_POST_CONTENT = u"wp_posts"
SIMPLE_IMG_URL = r'<img[^/]*src="([^"]*)"[^/]*/>'
NEW_IMG_URL = u''
DOWNLOAD_PATH = u'wp-content/uploads/%s'%(time.strftime('%m/%d', time.localtime(time.time())))
REPLACE_LIST = ['http://oldres.fengsage.com','../media','http://www.fengsage.com']
conn = MySQLdb.connect(host="localhost",
user="root",
passwd="zhufeng",
db="wordpress",
charset='utf8')
def get_post_list():
cursor = conn.cursor()
cursor.execute("SET NAMES utf8")
cursor.execute("select * from %s"%WP_POST_CONTENT);
return cursor.fetchall()
def find_img_urls(str):
pattern = re.compile(SIMPLE_IMG_URL)
match = pattern.findall(str)
if match:
return match
return None
def process_imgs(img_urls,replace):
result = {}
for img in img_urls:
for rp in replace:
if img.find(rp)>=0:
print "\tdownload picture"
new_img_ath = download_img(img)
result.update({img:new_img_ath})
return result
def download_img(img_url):
IMG_NAME_REG = r'([^./]*).(png|jpg|gif|jpeg|bmp)'
m = re.findall(IMG_NAME_REG, img_url)
if m:
name,suf = m[0]
if os.path.isdir(DOWNLOAD_PATH) == False:
os.makedirs(DOWNLOAD_PATH)
file_path = u'%s/%s.%s'%(DOWNLOAD_PATH,name,suf)
downloaded_image = file(file_path.encode('utf-8'), "wb")
try:
image_on_web = urllib.urlopen(img_url.encode('utf-8'))
while True:
buf = image_on_web.read(65536)
if len(buf) == 0:
break
downloaded_image.write(buf)
downloaded_image.close()
image_on_web.close()
return file_path
except:
print '\tdownload img failture:%s'%img_url
def refresh_content(post_content,result):
for k in result.keys():
post_content = post_content.replace(k,'%s/%s'%(NEW_IMG_URL,result[k]))
return post_content
def update_content(post_id,post_content):
sql = "update "+WP_POST_CONTENT+" set post_content=%s where ID=%s"
cursor = conn.cursor()
cursor.execute("SET NAMES utf8")
cursor.execute(sql,(post_content,post_id))
def go():
post_list = get_post_list()
for post in post_list:
post_id = post[0]
post_content = post[4]
post_title = post[5]
if post_id and post_content and post_title:
print "process article ID:%s"%(post_id)
img_urls = find_img_urls(post_content)
if img_urls:
result = process_imgs(img_urls,REPLACE_LIST)
if result and len(result)>0:
post_content = refresh_content(post_content,result)
try:
update_content(post_id,post_content)
except Exception, e:
print 'update database failture:%s'%e
if __name__ == '__main__':
print go() |
近期评论