attachment:cqp-code.txt of LabmeetingSP09w8

Attachment 'cqp-code.txt'

   1 #To start:
   2 $ cqp -e
   3 
   4 #To see available corpora:
   5  show corpora
   6 
   7 #Show information about a given corpus:
   8  info BNC-XML
   9 
  10 #Activate corpus:
  11  BNC-XML
  12 
  13 #Show corpus attributes:
  14  show cd
  15 
  16 #Search for words (via regular expressions) and sort:
  17  "spe(ech|aks?(ing)?)"
  18  sort by word
  19 
  20 #Set context to 8 words preceding the target, 2 sentences following the target, 1 sentence pre- and post-target:
  21  set lc 8 words
  22  set rc 2s
  23  set c s
  24 
  25 #Redisplay matches:
  26  cat
  27 
  28 #Display or hide POS and lemma annotation:
  29  show +pos
  30  show +lemma
  31  show -pos -lemma
  32 
  33 #Search by lemma:
  34  [lemma = "speak_VERB"]
  35  [lemma = "speech_SUBST"]
  36  [lemma = "(speak_VERB|speech_SUBST)"]
  37 
  38 #See size of last query:
  39  size Last
  40 
  41 #Show structural attributes (shown as XML tags):
  42  show +s
  43 
  44 #Create .cqprc file with favorite settings:
  45 set ProgressBar on;
  46 set HistoryFile "/tmp/cqphistory.jdegen";
  47 set WriteHistory yes;
  48 set c s;
  49 
  50 #Searching for POS information:
  51  "work"
  52  [word="work" & pos="N.*"]
  53  [word="work" & pos="V.*"]
  54  [word="work" & pos !="V.*"]
  55 
  56 #Use \verb|/codist[]| macro to get frequency distributions of POS-tags/lemmas over a given word:
  57  /codist["work",  pos]
  58  /codist[lemma, "speak_VERB", word]
  59 
  60 
  61 #Search for sequences, search within a context:
  62  [lemma="work_VERB"][]*[word="day"]
  63  [lemma="work_VERB"][]*[word="day"] within s
  64  [lemma="work_VERB"][]*[word="day"] within 2 words
  65  [lemma="work_VERB"][]{2}[word="day"]
  66 
  67 #Count: 
  68  count by word
  69  count by lemma
  70 
  71 #Set frequency thresholds:
  72  [pos="VVB" & word = "w.*"]
  73  count by lemma cut 50
  74 
  75 #Save query results:
  76  Some = [word = "some" %c] [pos="NN2*"]
  77  set DataDirectory "."
  78  BNC-XML
  79  save Some
  80  cat Some > "some.txt"
  81  cat Some > "| gzip > some.txt.gz"
  82  sort Some by word
  83 
  84 #Anchor points:
  85  A = [pos="(AT.*|DT.*)"] @[pos="AJ.*" & word="f.*"] [pos="N.*"]
  86  sort by word
  87 
  88 #Display corpus positions of anchor points in tabular format:
  89  dump A
  90  dump A 10 20
  91 
  92 #Frequency distributions:
  93  group A matchend word by target word cut 100
  94  group A match word by target lemma cut 100
  95 
  96 #Reduce data randomly:
  97  reduce A to 10%

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.

You are not allowed to attach a file to this page.