python
python2 编码修改
def to_unicode(unicode_or_str):
''' 将str字符串转换为unicode类型
'''
if isinstance(unicode_or_str, str):
value = unicode_or_str.decode('utf-8')
else:
value = unicode_or_str
return value
def to_str(unicode_or_str):
''' 将unicode字符串转换为str类型
'''
if isinstance(unicode_or_str, unicode):
value = unicode_or_str.encode('utf-8')
else:
value = unicode_or_str
return value
基础语法
conditions
# list init
[x for x in range(1, 10)]
[x for x in range(1, 10) if x % 2]
[ x if x%2 else x*100 for x in range(1, 10) ]
# oneline
'Yes' if fruit == 'Apple' else 'No'
if i > 3: print("We are done.")
count = 0 if count == N else N+1
loop
[i ** 3 for i in range(0, x+1, 3)]
input
input([prompt])
# input:
a=input('input:')
# 以空格输入
a_list=a.split(' ')
Functions
def sign(x):
if x > 0:
return 'positive'
elif x < 0:
return 'negative'
else:
return 'zero'
for x in [-1, 0, 1]:
print(sign(x))
# Prints "negative", "zero", "positive"
# while
i = 1
while i < 6:
print(i)
i += 1
import
# /home/el/foo/fox.py
def what_does_the_fox_say():
print('cry')
# /home/el/foo/main.py
import fox
fox.what_does_the_fox_say()
# import parent dir py
import sys
import os
sys.path.append(os.path.abspath('..'))
print(sys.path)
import water_config
range
range(start, stop, step)
运算符
# 按位取反
~
小数点取后两位
# 80.23
round(80.23456, 2)
Extended Slices
L = range(10)
# [0, 2, 4, 6, 8]
L[::2]
s='abcd'
# 'ac'
s[::2]
#'dcba'
s[::-1]
a = range(3)
#[0, 1, 2]
a
a[1:3] = [4, 5, 6]
# [0, 4, 5, 6]
a
a=range(10)
#[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
a
#[0, 1, 2, 3, 4, 5, 6, 7, 8]
a[:-1]
Magic Methods
# 动态获取对象的所有属性
__dict__
# 打印__str__的返回值
__str__
SimpleServer
python -m http.server 8080
nohup
nohup python -u ./cmd.py > cmd.log 2>&1 &
dict
thisdict = {
"brand": "Ford",
"model": "Mustang",
"year": 1964
}
# Check if Key Exists
if "model" in thisdict:
print("yes")
# add
thisdict["color"] = "red"
# remove
thisdict.pop("model")
# 获取属性, 可迭代
# 用元祖来存放key和vlaue
dict.items()
# 只包含key
dict.keys()
# 只包含value
dict.values()
# loop
for key, value in d.items():
python 执行bash命令
import subprocess
subprocess.call(['ls','-l'])
process = subprocess.Popen(['ls', '-l'])
(output, err) = process.communicate()
tuple
# A tuple is a collection which is ordered and unchangeable.
thistuple = ("apple", "banana", "cherry")
print(thistuple[1])
# Change Tuple Values
x = ("apple", "banana", "cherry")
y = list(x)
y[1] = "kiwi"
x = tuple(y)
判断变量是否存在
# To check the existence of a local variable:
if 'myVar' in locals():
# myVar exists.
# To check the existence of a global variable:
if 'myVar' in globals():
# myVar exists.
判断变量类型
x = 12
>>> isinstance(x, int)
True
python 默认执行
if __name__ == '__main__':
List comprehensions can also contain conditions:
nums = [0, 1, 2, 3, 4]
even_squares = [x ** 2 for x in nums if x % 2 == 0]
print(even_squares) # Prints "[0, 4, 16]"
zip() in conjunction with the * operator can be used to unzip a list:
>>> x = [1, 2, 3]
>>> y = [4, 5, 6]
>>> zipped = zip(x, y)
>>> list(zipped)
[(1, 4), (2, 5), (3, 6)]
>>> x2, y2 = zip(*zip(x, y))
>>> x == list(x2) and y == list(y2)
True
面向对象
Classes
class Greeter(object):
# Constructor
def __init__(self, name):
self.name = name # Create an instance variable
# 保护变量
self._say = "haha"
# 私有变量
self.__data = "test"
# 私有方法
def __say():
print(self._say)
# Instance method
def greet(self, loud=False):
if loud:
print('HELLO, %s!' % self.name.upper())
else:
print('Hello, %s' % self.name)
# call 方法
def __call__(a, b):
self.a = a
self.b = b
g = Greeter('Fred') # Construct an instance of the Greeter class
g.greet() # Call an instance method; prints "Hello, Fred"
g.greet(loud=True) # Call an instance method; prints "HELLO, FRED!"
# oncall 方法
g(a, b)
call & init
class Foo:
def __init__(self, a, b, c):
# ...
x = Foo(1, 2, 3) # __init__
class Foo:
def __call__(self, a, b, c):
# ...
x = Foo()
x(1, 2, 3) # __call__
@property装饰器
Python
内置的@property
装饰器可以把类的方法伪装成属性调用的方式。也就是本来是Foo.func()
的调用方法,变成Foo.func
的方式。在很多场合下,这是一种非常有用的机制。
class People:
def __init__(self, name, age):
self.__name = name
self.__age = age
@property
def age(self):
return self.__age
@age.setter
def age(self, age):
if isinstance(age, int):
self.__age = age
else:
raise ValueError
@age.deleter
def age(self):
print("删除年龄数据!")
obj = People("jack", 18)
print(obj.age)
obj.age = 19
print("obj.age: ", obj.age)
del obj.age
---------------------------
打印结果:
18
obj.age: 19
删除年龄数据!
# 使用方法
obj = People("jack", 18)
a = obj.age # 获取值
obj.age = 19 # 重新赋值
del obj.age # 删除属性
setattr
# The setattr() function sets the value of the specified attribute of the specified object.
class Person:
name = "John"
age = 36
country = "Norway"
setattr(Person, 'age', 40)
vars
# The vars() function returns the __dic__ attribute of an object.
# The __dict__ attribute is a dictionary containing the object's changeable attributes.
class Person:
name = "John"
age = 36
country = "norway"
x = vars(Person)
高级用法
lambda
# Program to double each item in a list using map()
my_list = [1, 5, 4, 6, 8, 11, 3, 12]
new_list = list(map(lambda x: x * 2 , my_list))
# Output: [2, 10, 8, 12, 16, 22, 6, 24]
print(new_list)
# 冒号表示匿名函数的参数
f = lambda x: x*x
# 25
f(5)
map
#Map applies a function to all the items in an input_list.
map(function_to_apply, list_of_inputs)
items = [1, 2, 3, 4, 5]
squared = list(map(lambda x: x**2, items))
#Most of the times we use lambdas with map so I did the same. Instead of a list of inputs we can even have a list of functions!
def multiply(x):
return (x*x)
def add(x):
return (x+x)
funcs = [multiply, add]
for i in range(5):
value = list(map(lambda x: x(i), funcs))
print(value)
# Output:
# [0, 0]
# [1, 2]
# [4, 4]
# [9, 6]
# [16, 8]
collections
from collections import namedtuple
# Python code to demonstrate namedtuple() and
# Access by name, index and getattr()
# importing "collections" for namedtuple()
import collections
# Declaring namedtuple()
Student = collections.namedtuple('Student',['name','age','DOB'])
# Adding values
S = Student('Nandini','19','2541997')
# Access using index
print ("The Student age using index is : ",end ="")
print (S[1])
# Access using name
print ("The Student name using keyname is : ",end ="")
print (S.name)
# Access using getattr()
print ("The Student DOB using getattr() is : ",end ="")
print (getattr(S,'DOB'))
slice
# A slice object is used to specify how to slice a sequence. You can specify where to start the slicing, and where to end. You can also specify the step, which allows you to e.g. slice only every other item.
# slice(start, end, step)
a = ("a", "b", "c", "d", "e", "f", "g", "h")
x = slice(3, 5)
# ('d', 'e')
print(a[x])
kwargs
# You can use **kwargs to let your functions take an arbitrary number of keyword arguments
>>> def print_keyword_args(**kwargs):
... # kwargs is a dict of the keyword args passed to the function
... for key, value in kwargs.items():
... print "%s = %s" % (key, value)
...
>>> print_keyword_args(first_name="John", last_name="Doe")
first_name = John
last_name = Doe
def some_kwargs(kwarg_1, kwarg_2, kwarg_3):
print("kwarg_1:", kwarg_1)
print("kwarg_2:", kwarg_2)
print("kwarg_3:", kwarg_3)
kwargs = {"kwarg_1": "Val", "kwarg_2": "Harper", "kwarg_3": "Remy"}
some_kwargs(**kwargs)
yield
def createGenerator():
mylist = range(3)
for i in mylist:
yield i*i
# create a generator
mygenerator = createGenerator()
# mygenerator is an object!
print(mygenerator)
# <generator object createGenerator at 0xb7555c34>
for i in mygenerator:
print(i)
# 0
# 1
# 4
# get generator content
next(mygenerator)
getopt
def parseArgs(self):
try:
# 短参数后面跟:代表可以接收值,长参数后面跟=代表接收一个值
options, args = getopt.getopt(sys.argv[1:], 'cgf:', ['common', 'sg', 'filename='])
for opt, value in options:
if opt in ('-c', '--common'):
self.water_name = "beijing_do_s"
self.csv_file = "../csv/Beijing_DO_s.csv"
if opt in ('-g', '--sg'):
self.water_name = "beijing_do_s_savgol"
self.csv_file = "../csv/Beijing_DO_s_savgol.csv"
if opt in ('-f','--filename'):
fileName = value
print("[*] Filename is ",fileName)
# do something
exit()
except getopt.GetoptError as msg:
print(msg)
sys.exit(2)
# 调用
# 短参数使用的时候是参数名[空格]参数值
# 长参数使用的时候是参数名=参值
进度条
import time
from tqdm import tqdm
with tqdm(total=200) as pbar:
pbar.set_description('Processing:')
# total表示总的项目, 循环的次数20*10(每次更新数目) = 200(total)
for i in range(20):
# 进行动作, 这里是过0.1s
time.sleep(0.1)
# 进行进度更新, 这里设置10个
pbar.update(10)
Processing:: 100%|██████████| 200/200 [00:02<00:00, 91.94it/s]
计时
import time
# 开始时间
start_time = time.time()
# 需要计算的方法
time.sleep(1)
end_time = time.time()
print("proc time: {:.2f}s".format(end_time - start_time))
itertools
import itertools
# groupby1
groups = []
uniquekeys = []
data = sorted(data, key=keyfunc)
for k, g in groupby(data, keyfunc):
groups.append(list(g)) # Store group iterator as a list
uniquekeys.append(k)
# groupby2
for (uid, group) in itertools.groupby(iter([(yield line.strip().split('\t')) for line in sys.stdin]),lambda item: item[0]):
try:
if not uid or len(uid) > 20 or uid == 'null':
continue
except:
continue
sort
list.sort(key=..., reverse=...)
sorted(list, key=..., reverse=...)
enumerate
for counter, value in enumerate(some_list):
print(counter, value)
Anaconda
anaconda2
# Create a conda environment named tensorflow to run a version of Python by invoking the following command:
conda create -n tensorflow pip python=2.7
# remove conda environment
conda remove -n tensorflow --all
# Activate the conda environment by issuing the following command:
source activate tensorflow
# win activate
activate tensorflow
# deactivate
source deactivate
# 列出所有环境
conda info --envs
# 换源
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/
conda config --set show_channel_urls yes
# site-packages
/home/joseph/anaconda3/envs/neo4j/lib/python3.6/site-packages
# 查看conda配置
conda config --show
pip
# 列出已安装的包
pip freeze or pip list
# 导出包到requirements.txt
pip freeze > requirements.txt
# 安装
pip install <包名> or pip install -r requirements.txt
# 卸载包
pip uninstall <包名> 或 pip uninstall -r requirements.txt
# 升级包
pip install -U <包名>
# 显示包所在的目录
pip show -f <包名>
# 安装指定版本
pip install numpy==1.13.1
# 搜索包
pip search <搜索关键字>
# 显示已安装包的版本
pip show <搜索关键字>
# 换源
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple <包名>
# 阿里源
pip install -i https://mirrors.aliyun.com/pypi/simple/ xx
查看包的所有依赖版本
# main.py
from pip._vendor import pkg_resources
def find_dependencies(package_name):
package = pkg_resources.working_set.by_key[package_name]
print([str(dependency) for dependency in package.requires()])
find_dependencies('requests')
jupyter
# 添加核
source activate <tensorflow>
pip install ipykernel
# list kernel
jupyter kernelspec list
# add kernel(处于想添加kernel的环境中)
python -m ipykernel install --name <tensorflow> --user
# remove kernel
jupyter kernelspec remove <kernelname>
# 生成配置文件
jupyter notebook --generate-config
# 生成密码
jupyter notebook password
# 修改文件
# 外网可访问
c.NotebookApp.ip='*'
c.NotebookApp.password = u'sha:ce...刚才复制的那个密文'
c.NotebookApp.open_browser = False
c.NotebookApp.port =8888 #可自行指定一个端口, 访问时使用该端口
# 后台挂起
nohup jupyter notebook > jupyter.log 2>&1 &
文件管理
path
os.path.join('/home/build/test/sandboxes/', todaystr, '/new_sandbox/')
# 获取当前目录
os.getcwd()
# create dir if not exist
import os
if not os.path.exists(directory):
os.makedirs(directory)
# remove file
os.remove("demofile.txt")
# remove folder
os.rmdir("myfolder")
# 列出文件夹中的文件
for filename in os.listdir(csv_dir):
if filename.endswith(".csv"):
filenames.append(filename)
continue
else:
continue
# 合并文件
with open(target_file, 'w', encoding='UTF-8') as outfile:
for fname in filenames:
with open(csv_dir+fname, encoding='UTF-8') as infile:
for line in infile:
outfile.write(line)
file io
#'a' is for append, or use
#'w' to write with truncation
#'r' to read the content of the file
with open('somefile.txt', 'a', encoding='UTF-8') as the_file:
the_file.write('Hello\n')
contents = the_file.read()
# return the 5 first characters of the file
f.read(5)
# return one line of the file
f1 = f.readline()
for x in f1:
print(x)
# looping through the lines of the file
for x in f:
print(x)
# or
f = open('myfile', 'w', encoding='UTF-8')
f.write('hi there\n') # python will convert \n to os.linesep
f.close() # you can omit in most cases as the destructor will call it
# create file
f = open("myfile.txt", "x")
# format
with open("results/gen.{}_epoch{}.json".format(config.graph_name, num_epoch), 'w') as fout:
fout.write(json.dumps(results))
# Double precision numbers
print("%.2f" % a)
数据处理
json
import json
# some JSON:
x = '{ "name":"John", "age":30, "city":"New York"}'
# parse x:
y = json.loads(x)
# the result is a Python dictionary:
print(y["age"])
# a Python object (dict):
x = {
"name": "John",
"age": 30,
"city": "New York"
}
# convert into JSON:
y = json.dumps(x)
# the result is a JSON string:
print(y)
pickle
import pickle
a = ['test value','test value 2','test value 3']
file_Name = "testfile.pkl"
# open the file for writing
fileObject = open(file_Name,'wb')
# this writes the object a to the
# file named 'testfile.pkl'
pickle.dump(a,fileObject)
# here we close the fileObject
fileObject.close()
# we open the file for reading
fileObject = open(file_Name.pkl,'rb')
# load the object from the file into var b
b = pickle.load(fileObject, encoding='latin1')
b
['test value','test value 2','test value 3']
logger
import logging
# 定义
def get_logger(log_dir, name, log_filename='info.log', level=logging.INFO):
logger = logging.getLogger(name)
logger.setLevel(level)
# Add file handler and stdout handler
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler = logging.FileHandler(os.path.join(log_dir, log_filename))
file_handler.setFormatter(formatter)
# Add console handler.
console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(console_formatter)
logger.addHandler(file_handler)
logger.addHandler(console_handler)
# Add google cloud log handler
logger.info('Log directory: %s', log_dir)
return logger
# 使用
logger = get_logger(self._log_dir, __name__, 'info.log', level=log_level)
logger.info(kwargs)
datetime
import datetime
x = datetime.datetime.now()
# 2020-05-17 00:00:00
x = datetime.datetime(2020, 5, 17)
x = datetime.datetime(2018, 6, 1, 4)
#June
print(x.strftime("%B"))
#3点12分
x = datetime.time(3,12,0)
# using - as separator
datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
# str to datetime
from datetime import datetime
datetime_object = datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p')
regular expression正则表达式
import re
# re.match只匹配字符串的开始,如果字符串开始不符合正则表达式,则匹配失败,函数返回None;而re.search匹配整个字符串,直到找到一个匹配。
# 使用group(num) 或 groups() 匹配对象函数来获取匹配表达式。
re.match(pattern, string, flags=0)
re.search(pattern, string, flags=0)
# 非贪婪模式
(.*?)
re.I #忽略大小写
re.M #多行模式
line = "Cats are smarter than dogs";
# Python的r前缀,就不用考虑转义的问题了
searchObj = re.search( r'(.*) are (.*?) .*', line, re.M|re.I)
searchObj.group() #Cats are smarter than dogs
searchObj.group(1) #Cats
searchObj.group(2) #smarter
math
# Return the ceiling of x as a float, the smallest integer value greater than or equal to x.
math.ceil(x)
# Return the floor of x as a float, the largest integer value less than or equal to x.
math.floor(x)
random
# seed
random.seed(5)
# get random number
random.random()
# returns a randomly selected element from the specified range
random.randrange(start, stop, step)
Set
# A set is a collection which is unordered and unindexed. In Python sets are written with curly brackets.
thisset = {"apple", "banana", "cherry"}
print(thisset)
# To add one item to a set use the add() method.
thisset.add("orange")
# Add multiple items to a set, using the update() method:
thisset.update(["orange", "mango", "grapes"])
# To remove an item in a set, use the remove(), or the discard() method.
thisset.remove("banana")
# The intersection() method returns a set that contains the similarity between two or more sets.
# set.intersection(set1, set2 ... etc)
y = {"c", "d", "e"}
z = {"f", "g", "c"}
#{'c'}
result = x.intersection(y, z)
keyword argument unpacking syntax
# argument unpacking
*args
# keyword argument unpacking
**kwargs
# usually a tuple, always an iterable[1]
args = (1, 2, 3)
# f(*args) → f(1, 2, 3)
# usually a dict, always a mapping*
kwargs = {"a": 1, "b": 2, "c": 3}
# f(**kwargs) -> f(a=1, b=2, c=3)
argparse
import argparse
parser = argparse.ArgumentParser(description='命令行中传入一个数字')
#type是要传入的参数的数据类型 help是该参数的提示信息
parser.add_argument('integers', type=str, help='传入的数字')
args = parser.parse_args()
print(args.integers)
parser = argparse.ArgumentParser(description='姓名')
parser.add_argument('--family', type=str, default='张',help='姓')
parser.add_argument('--name', type=str, default='三', help='名')
args = parser.parse_args()
#打印姓名
print(args.family+args.name)
raise
x = -1
if x < 0:
raise Exception("Sorry, no numbers below zero")
assert
x = "hello"
#if condition returns True, then nothing happens:
assert x == "hello"
#if condition returns False, AssertionError is raised:
assert x == "goodbye", "x should be 'hello'"
Raise an exception 抛出异常
if x < 0:
raise Exception("Sorry, no numbers below zero")
多线程
multiprocessing
from multiprocessing import Process
import os
# 子进程要执行的代码
def run_proc(name):
print 'Run child process %s (%s)...' % (name, os.getpid())
if __name__=='__main__':
print 'Parent process %s.' % os.getpid()
p = Process(target=run_proc, args=('test',))
print 'Process will start.'
p.start()
p.join()
print 'Process end.'
# 进程池
from multiprocessing import Pool
import os, time, random
def long_time_task(name):
print 'Run task %s (%s)...' % (name, os.getpid())
start = time.time()
time.sleep(random.random() * 3)
end = time.time()
print 'Task %s runs %0.2f seconds.' % (name, (end - start))
if __name__=='__main__':
print 'Parent process %s.' % os.getpid()
p = Pool()
for i in range(5):
p.apply_async(long_time_task, args=(i,))
print 'Waiting for all subprocesses done...'
p.close()
p.join()
print 'All subprocesses done.'
XML解析
<?xml version="1.0"?>
<data>
<country name="Liechtenstein">
<rank>1</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E"/>
<neighbor name="Switzerland" direction="W"/>
</country>
<country name="Singapore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N"/>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W"/>
<neighbor name="Colombia" direction="E"/>
</country>
</data>
import xml.etree.ElementTree as ET
tree = ET.parse('country_data.xml')
root = tree.getroot()
# parse from string
root = ET.fromstring(country_data_as_string)
# root
>>> root.tag
'data'
>>> root.attrib
{}
# loop
>>> for child in root:
... print(child.tag, child.attrib)
...
country {'name': 'Liechtenstein'}
country {'name': 'Singapore'}
country {'name': 'Panama'}
# get text
>>> root[0][1].text
'2008'
单元测试
测试模块
class Dict(dict):
def __init__(self, **kw):
super().__init__(**kw)
def __getattr__(self, key):
try:
return self[key]
except KeyError:
raise AttributeError(r"'Dict' object has no attribute '%s'" % key)
def __setattr__(self, key, value):
self[key] = value
单元测试模块
import unittest
from mydict import Dict
class TestDict(unittest.TestCase):
def test_init(self):
d = Dict(a=1, b='test')
self.assertEqual(d.a, 1)
self.assertEqual(d.b, 'test')
self.assertTrue(isinstance(d, dict))
def test_key(self):
d = Dict()
d['key'] = 'value'
self.assertEqual(d.key, 'value')
def test_attr(self):
d = Dict()
d.key = 'value'
self.assertTrue('key' in d)
self.assertEqual(d['key'], 'value')
def test_keyerror(self):
d = Dict()
with self.assertRaises(KeyError):
value = d['empty']
def test_attrerror(self):
d = Dict()
with self.assertRaises(AttributeError):
value = d.empty
运行单元测试
if __name__ == '__main__':
unittest.main()