随笔

我只是占个tag坑…😂

用Python抓取王垠的博客

直接上代码(由于他博客是异步的,所以我们要用mechanize

#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Created on 2017年5月24日

@author: BirdZhang
'''
from bs4 import BeautifulSoup
import mechanize
import cookielib
from wyblog import BLOG_URL

class NoHistory(object): 
    def add(self, *a, **k): pass 
    def clear(self): pass 

def getBrowers():
    br = mechanize.Browser(history=NoHistory())
    #options
    br.set_handle_equiv(True)
    #br.set_handle_gzip(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)
    cj = cookielib.LWPCookieJar()  
    br.set_cookiejar(cj)##关联cookies  
    br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    br.set_debug_http(False)
    br.set_debug_redirects(False)
    br.set_debug_responses(False)
    br.addheaders = [("User-agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36")]
    return br


if __name__ == "__main__":
    br = getBrowers()
    r = br.open(BLOG_URL)
    html = r.read()
#     print html
    soup = BeautifulSoup(html,"html5lib")
    lis = soup.find_all(name='li',attrs={
                                               "class":"list-group-item title"
                                               })
    for i in lis:
        print i.a["href"]," ".join(i.a.contents)

剩下的自己该干嘛干嘛吧

后面干脆撸了一个旗鱼的客户端

https://github.com/0312birdzhang/harbour-blogofwy

最靠谱的Tomcat日志切割教程

强大的logrotate

更多logrotate的说明见 http://www.linuxcommand.org/man_pages/logrotate8.html

或者这里 https://linux.cn/article-4126-1.html

/etc/logrotate.d/tomcat里写入下面的内容

1
2
3
4
5
6
7
8
9
10
/data/logs/logpath/catalina-daemon.out {
daily
rotate 3
size 100M
nocompress
notifempty
missingok
copytruncate
create 0600 root root
}

测试

logrotate --force /etc/logrotate.d/tomcat.conf

查看是否有一个/data/logs/logpath/catalina-daemon.out.1生成

为什么不用cronolog?

很简单,cronolog对jsvc启动的tomcat不友好

python备份文件到谷歌Drive

原文 https://developers.google.com/drive/v3/web/quickstart/python

安装依赖

pip install --upgrade google-api-python-client

下载认证文件 client_secret.json

谷歌上的步骤:

1
2
3
4
5
6
7
8
Use this wizard to create or select a project in the Google Developers Console and automatically turn on the API. Click Continue, then Go to credentials.
On the Add credentials to your project page, click the Cancel button.
At the top of the page, select the OAuth consent screen tab. Select an Email address, enter a Product name if not already set, and click the Save button.
Select the Credentials tab, click the Create credentials button and select OAuth client ID.
Select the application type Other, enter the name "Drive API Quickstart", and click the Create button.
Click OK to dismiss the resulting dialog.
Click the file_download (Download JSON) button to the right of the client ID.
Move this file to your working directory and rename it client_secret.json.

内容大概如下

1
{"installed":{"client_id":"334720361216-xxxxx.apps.googleusercontent.com","project_id":"analog-fastness-167807","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://accounts.google.com/o/oauth2/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"xxxxxx","redirect_uris":["urn:ietf:wg:oauth:2.0:oob","http://localhost"]}}

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Created on 2017年5月16日
@author: debo.zhang
'''
#!/usr/bin/env python

from __future__ import print_function
import os

from apiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
from googleapiclient.http import MediaFileUpload
try:
import argparse
flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args()
except ImportError:
flags = None

SCOPES = 'https://www.googleapis.com/auth/drive.file'
store = file.Storage('storage.json')
creds = store.get()
if not creds or creds.invalid:
home_dir = os.path.expanduser('~')
credential_dir = os.path.join(home_dir, '.credentials')
if not os.path.exists(credential_dir):
os.makedirs(credential_dir)
credential_path = os.path.join(credential_dir,
'client_secret.json')
flow = client.flow_from_clientsecrets(credential_path, SCOPES)
creds = tools.run_flow(flow, store, flags) \
if flags else tools.run(flow, store)

http = creds.authorize(Http())
DRIVE = build('drive', 'v3', http=http)
# DRIVE = build('drive', 'v2', http=creds.authorize(Http()))

FILES = (
('tmp.jpg', False),
)



for filename, convert in FILES:
file_metadata = { 'name' : 'tmp.jpg' }
media = MediaFileUpload('tmp.jpg',
mimetype='image/jpeg')
res = DRIVE.files().create(body=file_metadata,
media_body=media,
fields='id').execute()
print(res)

irc 加回频道op

用以下命令:

/cs op #jolla-cn +o birdzhang